summaryrefslogtreecommitdiff
path: root/db
diff options
context:
space:
mode:
authorZhang Qiang <qiang.z.zhang@intel.com>2012-05-29 11:25:24 +0800
committerZhang Qiang <qiang.z.zhang@intel.com>2012-05-29 11:25:24 +0800
commite776056ea09ba0b6d9505ced6913c9190a12d632 (patch)
tree092838f2a86042abc586aa5576e36ae6cb47e256 /db
parent2e082c838d2ca750f5daac6dcdabecc22dfd4e46 (diff)
downloaddb4-e776056ea09ba0b6d9505ced6913c9190a12d632.tar.gz
db4-e776056ea09ba0b6d9505ced6913c9190a12d632.tar.bz2
db4-e776056ea09ba0b6d9505ced6913c9190a12d632.zip
updated with Tizen:Base source codes
Diffstat (limited to 'db')
-rw-r--r--db/Makefile.inc5
-rw-r--r--db/crdel.src72
-rw-r--r--db/crdel_auto.c945
-rw-r--r--db/crdel_autop.c227
-rw-r--r--db/crdel_rec.c298
-rw-r--r--db/db.c1544
-rw-r--r--db/db.src328
-rw-r--r--db/db_am.c1015
-rw-r--r--db/db_auto.c3267
-rw-r--r--db/db_autop.c802
-rw-r--r--db/db_cam.c3460
-rw-r--r--db/db_cds.c177
-rw-r--r--db/db_conv.c733
-rw-r--r--db/db_dispatch.c953
-rw-r--r--db/db_dup.c203
-rw-r--r--db/db_iface.c2817
-rw-r--r--db/db_join.c940
-rw-r--r--db/db_meta.c1299
-rw-r--r--db/db_method.c1052
-rw-r--r--db/db_open.c628
-rw-r--r--db/db_overflow.c706
-rw-r--r--db/db_ovfl_vrfy.c409
-rw-r--r--db/db_pr.c1659
-rw-r--r--db/db_rec.c1859
-rw-r--r--db/db_reclaim.c246
-rw-r--r--db/db_remove.c492
-rw-r--r--db/db_rename.c372
-rw-r--r--db/db_ret.c156
-rw-r--r--db/db_setid.c213
-rw-r--r--db/db_setlsn.c137
-rw-r--r--db/db_sort_multiple.c287
-rw-r--r--db/db_stati.c494
-rw-r--r--db/db_truncate.c225
-rw-r--r--db/db_upg.c510
-rw-r--r--db/db_upg_opd.c343
-rw-r--r--db/db_vrfy.c2894
-rw-r--r--db/db_vrfy_stub.c117
-rw-r--r--db/db_vrfyutil.c916
-rw-r--r--db/partition.c2048
-rw-r--r--db/tags205
40 files changed, 262 insertions, 34791 deletions
diff --git a/db/Makefile.inc b/db/Makefile.inc
new file mode 100644
index 0000000..59478ba
--- /dev/null
+++ b/db/Makefile.inc
@@ -0,0 +1,5 @@
+# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
+
+.PATH: ${.CURDIR}/db/db
+
+SRCS+= db.c
diff --git a/db/crdel.src b/db/crdel.src
deleted file mode 100644
index cd0b02f..0000000
--- a/db/crdel.src
+++ /dev/null
@@ -1,72 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-DBPRIVATE
-PREFIX __crdel
-
-INCLUDE #include "db_int.h"
-INCLUDE #include "dbinc/crypto.h"
-INCLUDE #include "dbinc/db_page.h"
-INCLUDE #include "dbinc/db_dispatch.h"
-INCLUDE #include "dbinc/db_am.h"
-INCLUDE #include "dbinc/log.h"
-INCLUDE #include "dbinc/txn.h"
-INCLUDE
-
-/*
- * Metasub: log the creation of a subdatabase meta data page.
- *
- * fileid: identifies the file being acted upon.
- * pgno: page number on which to write this meta-data page
- * page: the actual meta-data page
- * lsn: lsn of the page.
- */
-BEGIN metasub 42 142
-DB fileid int32_t ld
-ARG pgno db_pgno_t lu
-PGDBT page DBT s
-POINTER lsn DB_LSN * lu
-END
-
-/*
- * Inmem_create: Log the creation of an in-memory database.
- *
- * name: Name of the database
- * fid: File id of the database
- */
-BEGIN inmem_create 44 138
-ARG fileid int32_t ld
-DBT name DBT s
-DBT fid DBT s
-ARG pgsize u_int32_t lu
-END
-
-/*
- * Inmem_rename: Log the renaming of an in-memory only database.
- *
- * oldname: database's starting name
- * newname: database's ending name
- * fid: fileid
- */
-BEGIN inmem_rename 44 139
-DBT oldname DBT s
-DBT newname DBT s
-DBT fid DBT s
-END
-
-/*
- * Inmem_remove: Log the removal of an in-memory only database.
- *
- * name: database's ending name
- * fid: fileid
- */
-BEGIN inmem_remove 44 140
-DBT name DBT s
-DBT fid DBT s
-END
-
diff --git a/db/crdel_auto.c b/db/crdel_auto.c
deleted file mode 100644
index 801a0a5..0000000
--- a/db/crdel_auto.c
+++ /dev/null
@@ -1,945 +0,0 @@
-/* Do not edit: automatically built by gen_rec.awk. */
-
-#include "db_config.h"
-#include "db_int.h"
-#include "dbinc/crypto.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_dispatch.h"
-#include "dbinc/db_am.h"
-#include "dbinc/log.h"
-#include "dbinc/txn.h"
-
-/*
- * PUBLIC: int __crdel_metasub_read __P((ENV *, DB **, void *,
- * PUBLIC: void *, __crdel_metasub_args **));
- */
-int
-__crdel_metasub_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __crdel_metasub_args **argpp;
-{
- __crdel_metasub_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__crdel_metasub_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->page, 0, sizeof(argp->page));
- LOGCOPY_32(env,&argp->page.size, bp);
- bp += sizeof(u_int32_t);
- argp->page.data = bp;
- bp += argp->page.size;
- if (LOG_SWAPPED(env) && dbpp != NULL && *dbpp != NULL) {
- int t_ret;
- if ((t_ret = __db_pageswap(*dbpp, (PAGE *)argp->page.data,
- (size_t)argp->page.size, NULL, 1)) != 0)
- return (t_ret);
- }
-
- LOGCOPY_TOLSN(env, &argp->lsn, bp);
- bp += sizeof(DB_LSN);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __crdel_metasub_log __P((DB *, DB_TXN *, DB_LSN *,
- * PUBLIC: u_int32_t, db_pgno_t, const DBT *, DB_LSN *));
- */
-int
-__crdel_metasub_log(dbp, txnp, ret_lsnp, flags, pgno, page, lsn)
- DB *dbp;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- db_pgno_t pgno;
- const DBT *page;
- DB_LSN * lsn;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- ENV *env;
- u_int32_t zero, uinttmp, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- env = dbp->env;
- rlsnp = ret_lsnp;
- rectype = DB___crdel_metasub;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
- F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- DB_ASSERT(env, dbp->log_filename != NULL);
- if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
- (ret = __dbreg_lazy_id(dbp)) != 0)
- return (ret);
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t) + (page == NULL ? 0 : page->size)
- + sizeof(*lsn);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- uinttmp = (u_int32_t)dbp->log_filename->id;
- LOGCOPY_32(env, bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (page == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &page->size);
- bp += sizeof(page->size);
- memcpy(bp, page->data, page->size);
- if (LOG_SWAPPED(env))
- if ((ret = __db_pageswap(dbp,
- (PAGE *)bp, (size_t)page->size, (DBT *)NULL, 0)) != 0)
- return (ret);
- bp += page->size;
- }
-
- if (lsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(lsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, lsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, lsn);
- } else
- memset(bp, 0, sizeof(*lsn));
- bp += sizeof(*lsn);
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__crdel_metasub_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __crdel_inmem_create_read __P((ENV *, void *,
- * PUBLIC: __crdel_inmem_create_args **));
- */
-int
-__crdel_inmem_create_read(env, recbuf, argpp)
- ENV *env;
- void *recbuf;
- __crdel_inmem_create_args **argpp;
-{
- __crdel_inmem_create_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__crdel_inmem_create_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->name, 0, sizeof(argp->name));
- LOGCOPY_32(env,&argp->name.size, bp);
- bp += sizeof(u_int32_t);
- argp->name.data = bp;
- bp += argp->name.size;
-
- memset(&argp->fid, 0, sizeof(argp->fid));
- LOGCOPY_32(env,&argp->fid.size, bp);
- bp += sizeof(u_int32_t);
- argp->fid.data = bp;
- bp += argp->fid.size;
-
- LOGCOPY_32(env, &argp->pgsize, bp);
- bp += sizeof(argp->pgsize);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __crdel_inmem_create_log __P((ENV *, DB_TXN *,
- * PUBLIC: DB_LSN *, u_int32_t, int32_t, const DBT *, const DBT *,
- * PUBLIC: u_int32_t));
- */
-int
-__crdel_inmem_create_log(env, txnp, ret_lsnp, flags,
- fileid, name, fid, pgsize)
- ENV *env;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- int32_t fileid;
- const DBT *name;
- const DBT *fid;
- u_int32_t pgsize;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- u_int32_t zero, uinttmp, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- rlsnp = ret_lsnp;
- rectype = DB___crdel_inmem_create;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t) + (name == NULL ? 0 : name->size)
- + sizeof(u_int32_t) + (fid == NULL ? 0 : fid->size)
- + sizeof(u_int32_t);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- uinttmp = (u_int32_t)fileid;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (name == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &name->size);
- bp += sizeof(name->size);
- memcpy(bp, name->data, name->size);
- bp += name->size;
- }
-
- if (fid == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &fid->size);
- bp += sizeof(fid->size);
- memcpy(bp, fid->data, fid->size);
- bp += fid->size;
- }
-
- LOGCOPY_32(env, bp, &pgsize);
- bp += sizeof(pgsize);
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__crdel_inmem_create_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __crdel_inmem_rename_read __P((ENV *, void *,
- * PUBLIC: __crdel_inmem_rename_args **));
- */
-int
-__crdel_inmem_rename_read(env, recbuf, argpp)
- ENV *env;
- void *recbuf;
- __crdel_inmem_rename_args **argpp;
-{
- __crdel_inmem_rename_args *argp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__crdel_inmem_rename_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- memset(&argp->oldname, 0, sizeof(argp->oldname));
- LOGCOPY_32(env,&argp->oldname.size, bp);
- bp += sizeof(u_int32_t);
- argp->oldname.data = bp;
- bp += argp->oldname.size;
-
- memset(&argp->newname, 0, sizeof(argp->newname));
- LOGCOPY_32(env,&argp->newname.size, bp);
- bp += sizeof(u_int32_t);
- argp->newname.data = bp;
- bp += argp->newname.size;
-
- memset(&argp->fid, 0, sizeof(argp->fid));
- LOGCOPY_32(env,&argp->fid.size, bp);
- bp += sizeof(u_int32_t);
- argp->fid.data = bp;
- bp += argp->fid.size;
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __crdel_inmem_rename_log __P((ENV *, DB_TXN *,
- * PUBLIC: DB_LSN *, u_int32_t, const DBT *, const DBT *, const DBT *));
- */
-int
-__crdel_inmem_rename_log(env, txnp, ret_lsnp, flags,
- oldname, newname, fid)
- ENV *env;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- const DBT *oldname;
- const DBT *newname;
- const DBT *fid;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- u_int32_t zero, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- rlsnp = ret_lsnp;
- rectype = DB___crdel_inmem_rename;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t) + (oldname == NULL ? 0 : oldname->size)
- + sizeof(u_int32_t) + (newname == NULL ? 0 : newname->size)
- + sizeof(u_int32_t) + (fid == NULL ? 0 : fid->size);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- if (oldname == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &oldname->size);
- bp += sizeof(oldname->size);
- memcpy(bp, oldname->data, oldname->size);
- bp += oldname->size;
- }
-
- if (newname == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &newname->size);
- bp += sizeof(newname->size);
- memcpy(bp, newname->data, newname->size);
- bp += newname->size;
- }
-
- if (fid == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &fid->size);
- bp += sizeof(fid->size);
- memcpy(bp, fid->data, fid->size);
- bp += fid->size;
- }
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__crdel_inmem_rename_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __crdel_inmem_remove_read __P((ENV *, void *,
- * PUBLIC: __crdel_inmem_remove_args **));
- */
-int
-__crdel_inmem_remove_read(env, recbuf, argpp)
- ENV *env;
- void *recbuf;
- __crdel_inmem_remove_args **argpp;
-{
- __crdel_inmem_remove_args *argp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__crdel_inmem_remove_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- memset(&argp->name, 0, sizeof(argp->name));
- LOGCOPY_32(env,&argp->name.size, bp);
- bp += sizeof(u_int32_t);
- argp->name.data = bp;
- bp += argp->name.size;
-
- memset(&argp->fid, 0, sizeof(argp->fid));
- LOGCOPY_32(env,&argp->fid.size, bp);
- bp += sizeof(u_int32_t);
- argp->fid.data = bp;
- bp += argp->fid.size;
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __crdel_inmem_remove_log __P((ENV *, DB_TXN *,
- * PUBLIC: DB_LSN *, u_int32_t, const DBT *, const DBT *));
- */
-int
-__crdel_inmem_remove_log(env, txnp, ret_lsnp, flags,
- name, fid)
- ENV *env;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- const DBT *name;
- const DBT *fid;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- u_int32_t zero, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- rlsnp = ret_lsnp;
- rectype = DB___crdel_inmem_remove;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t) + (name == NULL ? 0 : name->size)
- + sizeof(u_int32_t) + (fid == NULL ? 0 : fid->size);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- if (name == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &name->size);
- bp += sizeof(name->size);
- memcpy(bp, name->data, name->size);
- bp += name->size;
- }
-
- if (fid == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &fid->size);
- bp += sizeof(fid->size);
- memcpy(bp, fid->data, fid->size);
- bp += fid->size;
- }
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__crdel_inmem_remove_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __crdel_init_recover __P((ENV *, DB_DISTAB *));
- */
-int
-__crdel_init_recover(env, dtabp)
- ENV *env;
- DB_DISTAB *dtabp;
-{
- int ret;
-
- if ((ret = __db_add_recovery_int(env, dtabp,
- __crdel_metasub_recover, DB___crdel_metasub)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __crdel_inmem_create_recover, DB___crdel_inmem_create)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __crdel_inmem_rename_recover, DB___crdel_inmem_rename)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __crdel_inmem_remove_recover, DB___crdel_inmem_remove)) != 0)
- return (ret);
- return (0);
-}
diff --git a/db/crdel_autop.c b/db/crdel_autop.c
deleted file mode 100644
index 6bf4bb6..0000000
--- a/db/crdel_autop.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/* Do not edit: automatically built by gen_rec.awk. */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/crypto.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_dispatch.h"
-#include "dbinc/db_am.h"
-#include "dbinc/log.h"
-#include "dbinc/txn.h"
-
-/*
- * PUBLIC: int __crdel_metasub_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__crdel_metasub_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __crdel_metasub_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __crdel_metasub_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__crdel_metasub%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\tpage: ");
- for (i = 0; i < argp->page.size; i++) {
- ch = ((u_int8_t *)argp->page.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tlsn: [%lu][%lu]\n",
- (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __crdel_inmem_create_print __P((ENV *, DBT *,
- * PUBLIC: DB_LSN *, db_recops, void *));
- */
-int
-__crdel_inmem_create_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __crdel_inmem_create_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret = __crdel_inmem_create_read(env, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__crdel_inmem_create%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tname: ");
- for (i = 0; i < argp->name.size; i++) {
- ch = ((u_int8_t *)argp->name.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tfid: ");
- for (i = 0; i < argp->fid.size; i++) {
- ch = ((u_int8_t *)argp->fid.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tpgsize: %lu\n", (u_long)argp->pgsize);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __crdel_inmem_rename_print __P((ENV *, DBT *,
- * PUBLIC: DB_LSN *, db_recops, void *));
- */
-int
-__crdel_inmem_rename_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __crdel_inmem_rename_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret = __crdel_inmem_rename_read(env, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__crdel_inmem_rename%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\toldname: ");
- for (i = 0; i < argp->oldname.size; i++) {
- ch = ((u_int8_t *)argp->oldname.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tnewname: ");
- for (i = 0; i < argp->newname.size; i++) {
- ch = ((u_int8_t *)argp->newname.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tfid: ");
- for (i = 0; i < argp->fid.size; i++) {
- ch = ((u_int8_t *)argp->fid.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __crdel_inmem_remove_print __P((ENV *, DBT *,
- * PUBLIC: DB_LSN *, db_recops, void *));
- */
-int
-__crdel_inmem_remove_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __crdel_inmem_remove_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret = __crdel_inmem_remove_read(env, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__crdel_inmem_remove%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tname: ");
- for (i = 0; i < argp->name.size; i++) {
- ch = ((u_int8_t *)argp->name.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tfid: ");
- for (i = 0; i < argp->fid.size; i++) {
- ch = ((u_int8_t *)argp->fid.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __crdel_init_print __P((ENV *, DB_DISTAB *));
- */
-int
-__crdel_init_print(env, dtabp)
- ENV *env;
- DB_DISTAB *dtabp;
-{
- int ret;
-
- if ((ret = __db_add_recovery_int(env, dtabp,
- __crdel_metasub_print, DB___crdel_metasub)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __crdel_inmem_create_print, DB___crdel_inmem_create)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __crdel_inmem_rename_print, DB___crdel_inmem_rename)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __crdel_inmem_remove_print, DB___crdel_inmem_remove)) != 0)
- return (ret);
- return (0);
-}
diff --git a/db/crdel_rec.c b/db/crdel_rec.c
deleted file mode 100644
index 285b965..0000000
--- a/db/crdel_rec.c
+++ /dev/null
@@ -1,298 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/fop.h"
-#include "dbinc/hash.h"
-#include "dbinc/log.h"
-#include "dbinc/mp.h"
-#include "dbinc/txn.h"
-
-/*
- * __crdel_metasub_recover --
- * Recovery function for metasub.
- *
- * PUBLIC: int __crdel_metasub_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__crdel_metasub_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __crdel_metasub_args *argp;
- DB_THREAD_INFO *ip;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- int cmp_p, ret, t_ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- pagep = NULL;
- REC_PRINT(__crdel_metasub_print);
- REC_INTRO(__crdel_metasub_read, ip, 0);
-
- /*
- * If we are undoing this operation, but the DB that we got back
- * was never really opened, then this open was an in-memory open
- * that did not finish. We can let the file creation take care
- * of any necessary undo/cleanup.
- */
- if (DB_UNDO(op) && !F_ISSET(file_dbp, DB_AM_OPEN_CALLED))
- goto done;
-
- if ((ret = __memp_fget(mpf, &argp->pgno,
- ip, NULL, 0, &pagep)) != 0) {
- /* If this is an in-memory file, this might be OK. */
- if (F_ISSET(file_dbp, DB_AM_INMEM) &&
- (ret = __memp_fget(mpf, &argp->pgno, ip, NULL,
- DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &pagep)) == 0) {
- LSN_NOT_LOGGED(LSN(pagep));
- } else {
- *lsnp = argp->prev_lsn;
- ret = 0;
- goto out;
- }
- }
-
- cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
-
- if (cmp_p == 0 && DB_REDO(op)) {
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- memcpy(pagep, argp->page.data, argp->page.size);
- LSN(pagep) = *lsnp;
-
- /*
- * If this was an in-memory database and we are re-creating
- * and this is the meta-data page, then we need to set up a
- * bunch of fields in the dbo as well.
- */
- if (F_ISSET(file_dbp, DB_AM_INMEM) &&
- argp->pgno == PGNO_BASE_MD &&
- (ret = __db_meta_setup(file_dbp->env, file_dbp,
- file_dbp->dname, (DBMETA *)pagep, 0, DB_CHK_META)) != 0)
- goto out;
- } else if (DB_UNDO(op)) {
- /*
- * We want to undo this page creation. The page creation
- * happened in two parts. First, we called __db_pg_alloc which
- * was logged separately. Then we wrote the meta-data onto
- * the page. So long as we restore the LSN, then the recovery
- * for __db_pg_alloc will do everything else.
- *
- * Don't bother checking the lsn on the page. If we are
- * rolling back the next thing is that this page will get
- * freed. Opening the subdb will have reinitialized the
- * page, but not the lsn.
- */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- LSN(pagep) = argp->lsn;
- }
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: if (pagep != NULL && (t_ret = __memp_fput(mpf,
- ip, pagep, file_dbp->priority)) != 0 &&
- ret == 0)
- ret = t_ret;
-
- REC_CLOSE;
-}
-
-/*
- * __crdel_inmem_create_recover --
- * Recovery function for inmem_create.
- *
- * PUBLIC: int __crdel_inmem_create_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__crdel_inmem_create_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __crdel_inmem_create_args *argp;
- DB *dbp;
- int do_close, ret, t_ret;
-
- COMPQUIET(info, NULL);
-
- dbp = NULL;
- do_close = 0;
- REC_PRINT(__crdel_inmem_create_print);
- REC_NOOP_INTRO(__crdel_inmem_create_read);
-
- /* First, see if the DB handle already exists. */
- if (argp->fileid == DB_LOGFILEID_INVALID) {
- if (DB_REDO(op))
- ret = ENOENT;
- else
- ret = 0;
- } else
- ret = __dbreg_id_to_db(env, argp->txnp, &dbp, argp->fileid, 0);
-
- if (DB_REDO(op)) {
- /*
- * If the dbreg failed, that means that we're creating a
- * tmp file.
- */
- if (ret != 0) {
- if ((ret = __db_create_internal(&dbp, env, 0)) != 0)
- goto out;
-
- F_SET(dbp, DB_AM_RECOVER | DB_AM_INMEM);
- memcpy(dbp->fileid, argp->fid.data, DB_FILE_ID_LEN);
- if (((ret = __os_strdup(env,
- argp->name.data, &dbp->dname)) != 0))
- goto out;
-
- /*
- * This DBP is never going to be entered into the
- * dbentry table, so if we leave it open here,
- * then we're going to lose it.
- */
- do_close = 1;
- }
-
- /* Now, set the fileid. */
- memcpy(dbp->fileid, argp->fid.data, argp->fid.size);
- if ((ret = __memp_set_fileid(dbp->mpf, dbp->fileid)) != 0)
- goto out;
- dbp->preserve_fid = 1;
- MAKE_INMEM(dbp);
- if ((ret = __env_setup(dbp,
- NULL, NULL, argp->name.data, TXN_INVALID, 0)) != 0)
- goto out;
- ret = __env_mpool(dbp, argp->name.data, 0);
-
- if (ret == ENOENT) {
- dbp->pgsize = argp->pgsize;
- if ((ret = __env_mpool(dbp,
- argp->name.data, DB_CREATE)) != 0)
- goto out;
- } else if (ret != 0)
- goto out;
- }
-
- if (DB_UNDO(op)) {
- if (ret == 0)
- ret = __memp_nameop(env, argp->fid.data, NULL,
- (const char *)argp->name.data, NULL, 1);
-
- if (ret == ENOENT || ret == DB_DELETED)
- ret = 0;
- else
- goto out;
- }
-
- *lsnp = argp->prev_lsn;
-
-out: if (dbp != NULL) {
- t_ret = 0;
-
- if (do_close || ret != 0)
- t_ret = __db_close(dbp, NULL, DB_NOSYNC);
- if (t_ret != 0 && ret == 0)
- ret = t_ret;
- }
- REC_NOOP_CLOSE;
-}
-
-/*
- * __crdel_inmem_rename_recover --
- * Recovery function for inmem_rename.
- *
- * PUBLIC: int __crdel_inmem_rename_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__crdel_inmem_rename_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __crdel_inmem_rename_args *argp;
- u_int8_t *fileid;
- int ret;
-
- COMPQUIET(info, NULL);
-
- REC_PRINT(__crdel_inmem_rename_print);
- REC_NOOP_INTRO(__crdel_inmem_rename_read);
- fileid = argp->fid.data;
-
- /* Void out errors because the files may or may not still exist. */
- if (DB_REDO(op))
- (void)__memp_nameop(env, fileid,
- (const char *)argp->newname.data,
- (const char *)argp->oldname.data,
- (const char *)argp->newname.data, 1);
-
- if (DB_UNDO(op))
- (void)__memp_nameop(env, fileid,
- (const char *)argp->oldname.data,
- (const char *)argp->newname.data,
- (const char *)argp->oldname.data, 1);
-
- *lsnp = argp->prev_lsn;
- ret = 0;
-
- REC_NOOP_CLOSE;
-}
-
-/*
- * __crdel_inmem_remove_recover --
- * Recovery function for inmem_remove.
- *
- * PUBLIC: int __crdel_inmem_remove_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__crdel_inmem_remove_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __crdel_inmem_remove_args *argp;
- int ret;
-
- COMPQUIET(info, NULL);
-
- REC_PRINT(__crdel_inmem_remove_print);
- REC_NOOP_INTRO(__crdel_inmem_remove_read);
-
- /*
- * Since removes are delayed; there is no undo for a remove; only redo.
- * The remove may fail, which is OK.
- */
- if (DB_REDO(op)) {
- (void)__memp_nameop(env,
- argp->fid.data, NULL, argp->name.data, NULL, 1);
- }
-
- *lsnp = argp->prev_lsn;
- ret = 0;
-
- REC_NOOP_CLOSE;
-}
diff --git a/db/db.c b/db/db.c
index 9caa1aa..a18f056 100644
--- a/db/db.c
+++ b/db/db.c
@@ -1,14 +1,5 @@
/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995
+ * Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -19,7 +10,11 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
@@ -34,1506 +29,71 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_swap.h"
-#include "dbinc/btree.h"
-#include "dbinc/fop.h"
-#include "dbinc/hash.h"
-#include "dbinc/lock.h"
-#include "dbinc/log.h"
-#include "dbinc/mp.h"
-#include "dbinc/partition.h"
-#include "dbinc/qam.h"
-#include "dbinc/txn.h"
-
-static int __db_disassociate __P((DB *));
-static int __db_disassociate_foreign __P ((DB *));
-
-#ifdef CONFIG_TEST
-static int __db_makecopy __P((ENV *, const char *, const char *));
-static int __qam_testdocopy __P((DB *, const char *));
-#endif
-
-/*
- * DB.C --
- * This file contains the utility functions for the DBP layer.
- */
-
-/*
- * __db_master_open --
- * Open up a handle on a master database.
- *
- * PUBLIC: int __db_master_open __P((DB *, DB_THREAD_INFO *,
- * PUBLIC: DB_TXN *, const char *, u_int32_t, int, DB **));
- */
-int
-__db_master_open(subdbp, ip, txn, name, flags, mode, dbpp)
- DB *subdbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *name;
- u_int32_t flags;
- int mode;
- DB **dbpp;
-{
- DB *dbp;
- int ret;
-
- *dbpp = NULL;
-
- /* Open up a handle on the main database. */
- if ((ret = __db_create_internal(&dbp, subdbp->env, 0)) != 0)
- return (ret);
-
- /*
- * It's always a btree.
- * Run in the transaction we've created.
- * Set the pagesize in case we're creating a new database.
- * Flag that we're creating a database with subdatabases.
- */
- dbp->pgsize = subdbp->pgsize;
- F_SET(dbp, DB_AM_SUBDB);
- F_SET(dbp, F_ISSET(subdbp,
- DB_AM_RECOVER | DB_AM_SWAP |
- DB_AM_ENCRYPT | DB_AM_CHKSUM | DB_AM_NOT_DURABLE));
-
- /*
- * If there was a subdb specified, then we only want to apply
- * DB_EXCL to the subdb, not the actual file. We only got here
- * because there was a subdb specified.
- */
- LF_CLR(DB_EXCL);
- LF_SET(DB_RDWRMASTER);
- if ((ret = __db_open(dbp, ip,
- txn, name, NULL, DB_BTREE, flags, mode, PGNO_BASE_MD)) != 0)
- goto err;
-
- /*
- * The items in dbp are initialized from the master file's meta page.
- * Other items such as checksum and encryption are checked when we
- * read the meta-page, so we do not check those here. However, if
- * the meta-page caused checksumming to be turned on and it wasn't
- * already, set it here.
- */
- if (F_ISSET(dbp, DB_AM_CHKSUM))
- F_SET(subdbp, DB_AM_CHKSUM);
-
- /*
- * The user may have specified a page size for an existing file,
- * which we want to ignore.
- */
- subdbp->pgsize = dbp->pgsize;
- *dbpp = dbp;
-
- if (0) {
-err: if (!F_ISSET(dbp, DB_AM_DISCARD))
- (void)__db_close(dbp, txn, 0);
- }
-
- return (ret);
-}
-
-/*
- * __db_master_update --
- * Add/Open/Remove a subdatabase from a master database.
- *
- * PUBLIC: int __db_master_update __P((DB *, DB *, DB_THREAD_INFO *, DB_TXN *,
- * PUBLIC: const char *, DBTYPE, mu_action, const char *, u_int32_t));
- */
-int
-__db_master_update(mdbp, sdbp, ip, txn, subdb, type, action, newname, flags)
- DB *mdbp, *sdbp;
- DB_TXN *txn;
- DB_THREAD_INFO *ip;
- const char *subdb;
- DBTYPE type;
- mu_action action;
- const char *newname;
- u_int32_t flags;
-{
- DBC *dbc, *ndbc;
- DBT key, data, ndata;
- ENV *env;
- PAGE *p, *r;
- db_pgno_t t_pgno;
- int modify, ret, t_ret;
-
- env = mdbp->env;
- dbc = ndbc = NULL;
- p = NULL;
-
- /*
- * Open up a cursor. If this is CDB and we're creating the database,
- * make it an update cursor.
- *
- * Might we modify the master database? If so, we'll need to lock.
- */
- modify = (action != MU_OPEN || LF_ISSET(DB_CREATE)) ? 1 : 0;
-
- if ((ret = __db_cursor(mdbp, ip, txn, &dbc,
- (CDB_LOCKING(env) && modify) ? DB_WRITECURSOR : 0)) != 0)
- return (ret);
-
- /*
- * Point the cursor at the record.
- *
- * If we're removing or potentially creating an entry, lock the page
- * with DB_RMW.
- *
- * We do multiple cursor operations with the cursor in some cases and
- * subsequently access the data DBT information. Set DB_DBT_MALLOC so
- * we don't risk modification of the data between our uses of it.
- *
- * !!!
- * We don't include the name's nul termination in the database.
- */
- DB_INIT_DBT(key, subdb, strlen(subdb));
- memset(&data, 0, sizeof(data));
- F_SET(&data, DB_DBT_MALLOC);
-
- ret = __dbc_get(dbc, &key, &data,
- DB_SET | ((STD_LOCKING(dbc) && modify) ? DB_RMW : 0));
-
- /*
- * What we do next--whether or not we found a record for the
- * specified subdatabase--depends on what the specified action is.
- * Handle ret appropriately as the first statement of each case.
- */
- switch (action) {
- case MU_REMOVE:
- /*
- * We should have found something if we're removing it. Note
- * that in the common case where the DB we're asking to remove
- * doesn't exist, we won't get this far; __db_subdb_remove
- * will already have returned an error from __db_open.
- */
- if (ret != 0)
- goto err;
-
- /*
- * Delete the subdatabase entry first; if this fails,
- * we don't want to touch the actual subdb pages.
- */
- if ((ret = __dbc_del(dbc, 0)) != 0)
- goto err;
-
- /*
- * We're handling actual data, not on-page meta-data,
- * so it hasn't been converted to/from opposite
- * endian architectures. Do it explicitly, now.
- */
- memcpy(&sdbp->meta_pgno, data.data, sizeof(db_pgno_t));
- DB_NTOHL_SWAP(env, &sdbp->meta_pgno);
- if ((ret = __memp_fget(mdbp->mpf, &sdbp->meta_pgno,
- ip, dbc->txn, DB_MPOOL_DIRTY, &p)) != 0)
- goto err;
-
- /* Free the root on the master db if it was created. */
- if (TYPE(p) == P_BTREEMETA &&
- ((BTMETA *)p)->root != PGNO_INVALID) {
- if ((ret = __memp_fget(mdbp->mpf,
- &((BTMETA *)p)->root, ip, dbc->txn,
- DB_MPOOL_DIRTY, &r)) != 0)
- goto err;
-
- /* Free and put the page. */
- if ((ret = __db_free(dbc, r)) != 0) {
- r = NULL;
- goto err;
- }
- }
- /* Free and put the page. */
- if ((ret = __db_free(dbc, p)) != 0) {
- p = NULL;
- goto err;
- }
- p = NULL;
- break;
- case MU_RENAME:
- /* We should have found something if we're renaming it. */
- if (ret != 0)
- goto err;
-
- /*
- * Before we rename, we need to make sure we're not
- * overwriting another subdatabase, or else this operation
- * won't be undoable. Open a second cursor and check
- * for the existence of newname; it shouldn't appear under
- * us since we hold the metadata lock.
- */
- if ((ret = __db_cursor(mdbp, ip, txn, &ndbc,
- CDB_LOCKING(env) ? DB_WRITECURSOR : 0)) != 0)
- goto err;
- DB_SET_DBT(key, newname, strlen(newname));
-
- /*
- * We don't actually care what the meta page of the potentially-
- * overwritten DB is; we just care about existence.
- */
- memset(&ndata, 0, sizeof(ndata));
- F_SET(&ndata, DB_DBT_USERMEM | DB_DBT_PARTIAL);
-
- if ((ret = __dbc_get(ndbc, &key, &ndata, DB_SET)) == 0) {
- /* A subdb called newname exists. Bail. */
- ret = EEXIST;
- __db_errx(env, "rename: database %s exists", newname);
- goto err;
- } else if (ret != DB_NOTFOUND)
- goto err;
-
- /*
- * Now do the put first; we don't want to lose our only
- * reference to the subdb. Use the second cursor so the
- * first one continues to point to the old record.
- */
- if ((ret = __dbc_put(ndbc, &key, &data, DB_KEYFIRST)) != 0)
- goto err;
- if ((ret = __dbc_del(dbc, 0)) != 0) {
- /*
- * If the delete fails, try to delete the record
- * we just put, in case we're not txn-protected.
- */
- (void)__dbc_del(ndbc, 0);
- goto err;
- }
-
- break;
- case MU_OPEN:
- /*
- * Get the subdatabase information. If it already exists,
- * copy out the page number and we're done.
- */
- switch (ret) {
- case 0:
- if (LF_ISSET(DB_CREATE) && LF_ISSET(DB_EXCL)) {
- ret = EEXIST;
- goto err;
- }
- memcpy(&sdbp->meta_pgno, data.data, sizeof(db_pgno_t));
- DB_NTOHL_SWAP(env, &sdbp->meta_pgno);
- goto done;
- case DB_NOTFOUND:
- if (LF_ISSET(DB_CREATE))
- break;
- /*
- * No db_err, it is reasonable to remove a
- * nonexistent db.
- */
- ret = ENOENT;
- goto err;
- default:
- goto err;
- }
-
- /* Create a subdatabase. */
- if ((ret = __db_new(dbc,
- type == DB_HASH ? P_HASHMETA : P_BTREEMETA, NULL, &p)) != 0)
- goto err;
- sdbp->meta_pgno = PGNO(p);
-
- /*
- * XXX
- * We're handling actual data, not on-page meta-data, so it
- * hasn't been converted to/from opposite endian architectures.
- * Do it explicitly, now.
- */
- t_pgno = PGNO(p);
- DB_HTONL_SWAP(env, &t_pgno);
- memset(&ndata, 0, sizeof(ndata));
- ndata.data = &t_pgno;
- ndata.size = sizeof(db_pgno_t);
- if ((ret = __dbc_put(dbc, &key, &ndata, 0)) != 0)
- goto err;
- F_SET(sdbp, DB_AM_CREATED);
- break;
- }
-
-err:
-done: /*
- * If we allocated a page: if we're successful, mark the page dirty
- * and return it to the cache, otherwise, discard/free it.
- */
- if (p != NULL && (t_ret = __memp_fput(mdbp->mpf,
- dbc->thread_info, p, dbc->priority)) != 0 && ret == 0)
- ret = t_ret;
-
- /* Discard the cursor(s) and data. */
- if (data.data != NULL)
- __os_ufree(env, data.data);
- if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
- if (ndbc != NULL && (t_ret = __dbc_close(ndbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __env_setup --
- * Set up the underlying environment during a db_open.
- *
- * PUBLIC: int __env_setup __P((DB *,
- * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t, u_int32_t));
*/
-int
-__env_setup(dbp, txn, fname, dname, id, flags)
- DB *dbp;
- DB_TXN *txn;
- const char *fname, *dname;
- u_int32_t id, flags;
-{
- DB *ldbp;
- DB_ENV *dbenv;
- ENV *env;
- u_int32_t maxid;
- int ret;
-
- env = dbp->env;
- dbenv = env->dbenv;
- /* If we don't yet have an environment, it's time to create it. */
- if (!F_ISSET(env, ENV_OPEN_CALLED)) {
- /* Make sure we have at least DB_MINCACHE pages in our cache. */
- if (dbenv->mp_gbytes == 0 &&
- dbenv->mp_bytes < dbp->pgsize * DB_MINPAGECACHE &&
- (ret = __memp_set_cachesize(
- dbenv, 0, dbp->pgsize * DB_MINPAGECACHE, 0)) != 0)
- return (ret);
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)db.c 8.4 (Berkeley) 2/21/94";
+#endif /* LIBC_SCCS and not lint */
- if ((ret = __env_open(dbenv, NULL, DB_CREATE |
- DB_INIT_MPOOL | DB_PRIVATE | LF_ISSET(DB_THREAD), 0)) != 0)
- return (ret);
- }
+#include <sys/types.h>
- /* Join the underlying cache. */
- if ((!F_ISSET(dbp, DB_AM_INMEM) || dname == NULL) &&
- (ret = __env_mpool(dbp, fname, flags)) != 0)
- return (ret);
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
- /* We may need a per-thread mutex. */
- if (LF_ISSET(DB_THREAD) && (ret = __mutex_alloc(
- env, MTX_DB_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbp->mutex)) != 0)
- return (ret);
-
- /*
- * Set up a bookkeeping entry for this database in the log region,
- * if such a region exists. Note that even if we're in recovery
- * or a replication client, where we won't log registries, we'll
- * still need an FNAME struct, so LOGGING_ON is the correct macro.
- */
- if (LOGGING_ON(env) && dbp->log_filename == NULL
-#if !defined(DEBUG_ROP) && !defined(DEBUG_WOP) && !defined(DIAGNOSTIC)
- && (txn != NULL || F_ISSET(dbp, DB_AM_RECOVER))
-#endif
-#if !defined(DEBUG_ROP)
- && !F_ISSET(dbp, DB_AM_RDONLY)
-#endif
- ) {
- if ((ret = __dbreg_setup(dbp,
- F_ISSET(dbp, DB_AM_INMEM) ? dname : fname,
- F_ISSET(dbp, DB_AM_INMEM) ? NULL : dname, id)) != 0)
- return (ret);
-
- /*
- * If we're actively logging and our caller isn't a
- * recovery function that already did so, then assign
- * this dbp a log fileid.
- */
- if (DBENV_LOGGING(env) && !F_ISSET(dbp, DB_AM_RECOVER) &&
- (ret = __dbreg_new_id(dbp, txn)) != 0)
- return (ret);
- }
-
- /*
- * Insert ourselves into the ENV's dblist. We allocate a
- * unique ID to each {fileid, meta page number} pair, and to
- * each temporary file (since they all have a zero fileid).
- * This ID gives us something to use to tell which DB handles
- * go with which databases in all the cursor adjustment
- * routines, where we don't want to do a lot of ugly and
- * expensive memcmps.
- */
- MUTEX_LOCK(env, env->mtx_dblist);
- maxid = 0;
- TAILQ_FOREACH(ldbp, &env->dblist, dblistlinks) {
- /*
- * There are three cases: on-disk database (first clause),
- * named in-memory database (second clause), temporary database
- * (never matches; no clause).
- */
- if (!F_ISSET(dbp, DB_AM_INMEM)) {
- if (memcmp(ldbp->fileid, dbp->fileid, DB_FILE_ID_LEN)
- == 0 && ldbp->meta_pgno == dbp->meta_pgno)
- break;
- } else if (dname != NULL) {
- if (F_ISSET(ldbp, DB_AM_INMEM) &&
- ldbp->dname != NULL &&
- strcmp(ldbp->dname, dname) == 0)
- break;
- }
- if (ldbp->adj_fileid > maxid)
- maxid = ldbp->adj_fileid;
- }
-
- /*
- * If ldbp is NULL, we didn't find a match. Assign the dbp an
- * adj_fileid one higher than the largest we found, and
- * insert it at the head of the master dbp list.
- *
- * If ldbp is not NULL, it is a match for our dbp. Give dbp
- * the same ID that ldbp has, and add it after ldbp so they're
- * together in the list.
- */
- if (ldbp == NULL) {
- dbp->adj_fileid = maxid + 1;
- TAILQ_INSERT_HEAD(&env->dblist, dbp, dblistlinks);
- } else {
- dbp->adj_fileid = ldbp->adj_fileid;
- TAILQ_INSERT_AFTER(&env->dblist, ldbp, dbp, dblistlinks);
- }
- MUTEX_UNLOCK(env, env->mtx_dblist);
-
- return (0);
-}
+#include <db.h>
-/*
- * __env_mpool --
- * Set up the underlying environment cache during a db_open.
- *
- * PUBLIC: int __env_mpool __P((DB *, const char *, u_int32_t));
- */
-int
-__env_mpool(dbp, fname, flags)
- DB *dbp;
+DB *
+dbopen(fname, flags, mode, type, openinfo)
const char *fname;
- u_int32_t flags;
-{
- DBT pgcookie;
- DB_MPOOLFILE *mpf;
- DB_PGINFO pginfo;
- ENV *env;
- int fidset, ftype, ret;
- int32_t lsn_off;
- u_int8_t nullfid[DB_FILE_ID_LEN];
- u_int32_t clear_len;
-
- env = dbp->env;
-
- /* The LSN is the first entry on a DB page, byte offset 0. */
- lsn_off = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LSN_OFF_NOTSET : 0;
-
- /* It's possible that this database is already open. */
- if (F_ISSET(dbp, DB_AM_OPEN_CALLED))
- return (0);
-
- /*
- * If we need to pre- or post-process a file's pages on I/O, set the
- * file type. If it's a hash file, always call the pgin and pgout
- * routines. This means that hash files can never be mapped into
- * process memory. If it's a btree file and requires swapping, we
- * need to page the file in and out. This has to be right -- we can't
- * mmap files that are being paged in and out.
- */
- switch (dbp->type) {
- case DB_BTREE:
- case DB_RECNO:
- ftype = F_ISSET(dbp, DB_AM_SWAP | DB_AM_ENCRYPT | DB_AM_CHKSUM)
- ? DB_FTYPE_SET : DB_FTYPE_NOTSET;
- clear_len = CRYPTO_ON(env) ?
- (dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET) :
- DB_PAGE_DB_LEN;
- break;
- case DB_HASH:
- ftype = DB_FTYPE_SET;
- clear_len = CRYPTO_ON(env) ?
- (dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET) :
- DB_PAGE_DB_LEN;
- break;
- case DB_QUEUE:
- ftype = F_ISSET(dbp,
- DB_AM_SWAP | DB_AM_ENCRYPT | DB_AM_CHKSUM) ?
- DB_FTYPE_SET : DB_FTYPE_NOTSET;
-
- /*
- * If we came in here without a pagesize set, then we need
- * to mark the in-memory handle as having clear_len not
- * set, because we don't really know the clear length or
- * the page size yet (since the file doesn't yet exist).
- */
- clear_len = dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET;
- break;
- case DB_UNKNOWN:
- /*
- * If we're running in the verifier, our database might
- * be corrupt and we might not know its type--but we may
- * still want to be able to verify and salvage.
- *
- * If we can't identify the type, it's not going to be safe
- * to call __db_pgin--we pretty much have to give up all
- * hope of salvaging cross-endianness. Proceed anyway;
- * at worst, the database will just appear more corrupt
- * than it actually is, but at best, we may be able
- * to salvage some data even with no metadata page.
- */
- if (F_ISSET(dbp, DB_AM_VERIFYING)) {
- ftype = DB_FTYPE_NOTSET;
- clear_len = DB_PAGE_DB_LEN;
- break;
- }
-
- /*
- * This might be an in-memory file and we won't know its
- * file type until after we open it and read the meta-data
- * page.
- */
- if (F_ISSET(dbp, DB_AM_INMEM)) {
- clear_len = DB_CLEARLEN_NOTSET;
- ftype = DB_FTYPE_NOTSET;
- lsn_off = DB_LSN_OFF_NOTSET;
- break;
- }
- /* FALLTHROUGH */
- default:
- return (__db_unknown_type(env, "DB->open", dbp->type));
- }
-
- mpf = dbp->mpf;
-
- memset(nullfid, 0, DB_FILE_ID_LEN);
- fidset = memcmp(nullfid, dbp->fileid, DB_FILE_ID_LEN);
- if (fidset)
- (void)__memp_set_fileid(mpf, dbp->fileid);
-
- (void)__memp_set_clear_len(mpf, clear_len);
- (void)__memp_set_ftype(mpf, ftype);
- (void)__memp_set_lsn_offset(mpf, lsn_off);
-
- pginfo.db_pagesize = dbp->pgsize;
- pginfo.flags =
- F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP));
- pginfo.type = dbp->type;
- pgcookie.data = &pginfo;
- pgcookie.size = sizeof(DB_PGINFO);
- (void)__memp_set_pgcookie(mpf, &pgcookie);
-
-#ifndef DIAG_MVCC
- if (F_ISSET(env->dbenv, DB_ENV_MULTIVERSION))
-#endif
- if (F_ISSET(dbp, DB_AM_TXN) &&
- dbp->type != DB_QUEUE && dbp->type != DB_UNKNOWN)
- LF_SET(DB_MULTIVERSION);
-
- if ((ret = __memp_fopen(mpf, NULL, fname, &dbp->dirname,
- LF_ISSET(DB_CREATE | DB_DURABLE_UNKNOWN | DB_MULTIVERSION |
- DB_NOMMAP | DB_ODDFILESIZE | DB_RDONLY | DB_TRUNCATE) |
- (F_ISSET(env->dbenv, DB_ENV_DIRECT_DB) ? DB_DIRECT : 0) |
- (F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_TXN_NOT_DURABLE : 0),
- 0, dbp->pgsize)) != 0) {
- /*
- * The open didn't work; we need to reset the mpf,
- * retaining the in-memory semantics (if any).
- */
- (void)__memp_fclose(dbp->mpf, 0);
- (void)__memp_fcreate(env, &dbp->mpf);
- if (F_ISSET(dbp, DB_AM_INMEM))
- MAKE_INMEM(dbp);
- return (ret);
- }
-
- /*
- * Set the open flag. We use it to mean that the dbp has gone
- * through mpf setup, including dbreg_register. Also, below,
- * the underlying access method open functions may want to do
- * things like acquire cursors, so the open flag has to be set
- * before calling them.
- */
- F_SET(dbp, DB_AM_OPEN_CALLED);
- if (!fidset && fname != NULL) {
- (void)__memp_get_fileid(dbp->mpf, dbp->fileid);
- dbp->preserve_fid = 1;
- }
-
- return (0);
-}
-
-/*
- * __db_close --
- * DB->close method.
- *
- * PUBLIC: int __db_close __P((DB *, DB_TXN *, u_int32_t));
- */
-int
-__db_close(dbp, txn, flags)
- DB *dbp;
- DB_TXN *txn;
- u_int32_t flags;
-{
- ENV *env;
- int db_ref, deferred_close, ret, t_ret;
-
- env = dbp->env;
- deferred_close = ret = 0;
-
- /*
- * Validate arguments, but as a DB handle destructor, we can't fail.
- *
- * Check for consistent transaction usage -- ignore errors. Only
- * internal callers specify transactions, so it's a serious problem
- * if we get error messages.
- */
- if (txn != NULL)
- (void)__db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0);
-
- /* Refresh the structure and close any underlying resources. */
- ret = __db_refresh(dbp, txn, flags, &deferred_close, 0);
-
- /*
- * If we've deferred the close because the logging of the close failed,
- * return our failure right away without destroying the handle.
- */
- if (deferred_close)
- return (ret);
-
- /* !!!
- * This code has an apparent race between the moment we read and
- * decrement env->db_ref and the moment we check whether it's 0.
- * However, if the environment is DBLOCAL, the user shouldn't have a
- * reference to the env handle anyway; the only way we can get
- * multiple dbps sharing a local env is if we open them internally
- * during something like a subdatabase open. If any such thing is
- * going on while the user is closing the original dbp with a local
- * env, someone's already badly screwed up, so there's no reason
- * to bother engineering around this possibility.
- */
- MUTEX_LOCK(env, env->mtx_dblist);
- db_ref = --env->db_ref;
- MUTEX_UNLOCK(env, env->mtx_dblist);
- if (F_ISSET(env, ENV_DBLOCAL) && db_ref == 0 &&
- (t_ret = __env_close(env->dbenv, 0)) != 0 && ret == 0)
- ret = t_ret;
-
- /* Free the database handle. */
- memset(dbp, CLEAR_BYTE, sizeof(*dbp));
- __os_free(env, dbp);
-
- return (ret);
-}
-
-/*
- * __db_refresh --
- * Refresh the DB structure, releasing any allocated resources.
- * This does most of the work of closing files now because refresh
- * is what is used during abort processing (since we can't destroy
- * the actual handle) and during abort processing, we may have a
- * fully opened handle.
- *
- * PUBLIC: int __db_refresh __P((DB *, DB_TXN *, u_int32_t, int *, int));
- */
-int
-__db_refresh(dbp, txn, flags, deferred_closep, reuse)
- DB *dbp;
- DB_TXN *txn;
- u_int32_t flags;
- int *deferred_closep, reuse;
+ int flags, mode;
+ DBTYPE type;
+ const void *openinfo;
{
- DB *sdbp;
- DBC *dbc;
- DB_FOREIGN_INFO *f_info, *tmp;
- DB_LOCKER *locker;
- DB_LOCKREQ lreq;
- ENV *env;
- REGENV *renv;
- REGINFO *infop;
- u_int32_t save_flags;
- int resync, ret, t_ret;
-
- ret = 0;
- env = dbp->env;
- infop = env->reginfo;
- if (infop != NULL)
- renv = infop->primary;
- else
- renv = NULL;
-
- /*
- * If this dbp is not completely open, avoid trapping by trying to
- * sync without an mpool file.
- */
- if (dbp->mpf == NULL)
- LF_SET(DB_NOSYNC);
-
- /* If never opened, or not currently open, it's easy. */
- if (!F_ISSET(dbp, DB_AM_OPEN_CALLED))
- goto never_opened;
-
- /*
- * If we have any secondary indices, disassociate them from us.
- * We don't bother with the mutex here; it only protects some
- * of the ops that will make us core-dump mid-close anyway, and
- * if you're trying to do something with a secondary *while* you're
- * closing the primary, you deserve what you get. The disassociation
- * is mostly done just so we can close primaries and secondaries in
- * any order--but within one thread of control.
- */
- LIST_FOREACH(sdbp, &dbp->s_secondaries, s_links) {
- LIST_REMOVE(sdbp, s_links);
- if ((t_ret = __db_disassociate(sdbp)) != 0 && ret == 0)
- ret = t_ret;
- }
-
- /*
- * Disassociate ourself from any databases using us as a foreign key
- * database by clearing the referring db's pointer. Reclaim memory.
- */
- f_info = LIST_FIRST(&dbp->f_primaries);
- while (f_info != NULL) {
- tmp = LIST_NEXT(f_info, f_links);
- LIST_REMOVE(f_info, f_links);
- f_info->dbp->s_foreign = NULL;
- __os_free(env, f_info);
- f_info = tmp;
- }
-
- if (dbp->s_foreign != NULL &&
- (t_ret = __db_disassociate_foreign(dbp)) != 0 && ret == 0)
- ret = t_ret;
-
- /*
- * Sync the underlying access method. Do before closing the cursors
- * because DB->sync allocates cursors in order to write Recno backing
- * source text files.
- *
- * Sync is slow on some systems, notably Solaris filesystems where the
- * entire buffer cache is searched. If we're in recovery, don't flush
- * the file, it's not necessary.
- */
- if (!LF_ISSET(DB_NOSYNC) &&
- !F_ISSET(dbp, DB_AM_DISCARD | DB_AM_RECOVER) &&
- (t_ret = __db_sync(dbp)) != 0 && ret == 0)
- ret = t_ret;
-
- /*
- * Go through the active cursors and call the cursor recycle routine,
- * which resolves pending operations and moves the cursors onto the
- * free list. Then, walk the free list and call the cursor destroy
- * routine. Note that any failure on a close is considered "really
- * bad" and we just break out of the loop and force forward.
- */
- resync = TAILQ_FIRST(&dbp->active_queue) == NULL ? 0 : 1;
- while ((dbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
- if ((t_ret = __dbc_close(dbc)) != 0) {
- if (ret == 0)
- ret = t_ret;
- break;
- }
-
- while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL)
- if ((t_ret = __dbc_destroy(dbc)) != 0) {
- if (ret == 0)
- ret = t_ret;
- break;
- }
-
- /*
- * Close any outstanding join cursors. Join cursors destroy themselves
- * on close and have no separate destroy routine. We don't have to set
- * the resync flag here, because join cursors aren't write cursors.
- */
- while ((dbc = TAILQ_FIRST(&dbp->join_queue)) != NULL)
- if ((t_ret = __db_join_close(dbc)) != 0) {
- if (ret == 0)
- ret = t_ret;
- break;
- }
-
- /*
- * Sync the memory pool, even though we've already called DB->sync,
- * because closing cursors can dirty pages by deleting items they
- * referenced.
- *
- * Sync is slow on some systems, notably Solaris filesystems where the
- * entire buffer cache is searched. If we're in recovery, don't flush
- * the file, it's not necessary.
- */
- if (resync && !LF_ISSET(DB_NOSYNC) &&
- !F_ISSET(dbp, DB_AM_DISCARD | DB_AM_RECOVER) &&
- (t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0)
- ret = t_ret;
-
-never_opened:
- /*
- * At this point, we haven't done anything to render the DB handle
- * unusable, at least by a transaction abort. Take the opportunity
- * now to log the file close if we have initialized the logging
- * information. If this log fails and we're in a transaction,
- * we have to bail out of the attempted close; we'll need a dbp in
- * order to successfully abort the transaction, and we can't conjure
- * a new one up because we haven't gotten out the dbreg_register
- * record that represents the close. In this case, we put off
- * actually closing the dbp until we've performed the abort.
- */
- if (!reuse && LOGGING_ON(dbp->env) && dbp->log_filename != NULL) {
- /*
- * Discard the log file id, if any. We want to log the close
- * if and only if this is not a recovery dbp or a client dbp,
- * or a dead dbp handle.
- */
- DB_ASSERT(env, renv != NULL);
- if (F_ISSET(dbp, DB_AM_RECOVER) || IS_REP_CLIENT(env) ||
- dbp->timestamp != renv->rep_timestamp) {
- if ((t_ret = __dbreg_revoke_id(dbp,
- 0, DB_LOGFILEID_INVALID)) == 0 && ret == 0)
- ret = t_ret;
- if ((t_ret = __dbreg_teardown(dbp)) != 0 && ret == 0)
- ret = t_ret;
- } else {
- if ((t_ret = __dbreg_close_id(dbp,
- txn, DBREG_CLOSE)) != 0 && txn != NULL) {
- /*
- * We're in a txn and the attempt to log the
- * close failed; let the txn subsystem know
- * that we need to destroy this dbp once we're
- * done with the abort, then bail from the
- * close.
- *
- * Note that if the attempt to put off the
- * close -also- fails--which it won't unless
- * we're out of heap memory--we're really
- * screwed. Panic.
- */
- if ((ret =
- __txn_closeevent(env, txn, dbp)) != 0)
- return (__env_panic(env, ret));
- if (deferred_closep != NULL)
- *deferred_closep = 1;
- return (t_ret);
- }
- /*
- * If dbreg_close_id failed and we were not in a
- * transaction, then we need to finish this close
- * because the caller can't do anything with the
- * handle after we return an error. We rely on
- * dbreg_close_id to mark the entry in some manner
- * so that we do not do a clean shutdown of this
- * environment. If shutdown isn't clean, then the
- * application *must* run recovery and that will
- * generate the RCLOSE record.
- */
- }
-
- }
-
- /* Close any handle we've been holding since the open. */
- if (dbp->saved_open_fhp != NULL &&
- (t_ret = __os_closehandle(env, dbp->saved_open_fhp)) != 0 &&
- ret == 0)
- ret = t_ret;
-
- /*
- * Remove this DB handle from the ENV's dblist, if it's been added.
- *
- * Close our reference to the underlying cache while locked, we don't
- * want to race with a thread searching for our underlying cache link
- * while opening a DB handle.
- *
- * The DB handle may not yet have been added to the ENV list, don't
- * blindly call the underlying TAILQ_REMOVE macro. Explicitly reset
- * the field values to NULL so that we can't call TAILQ_REMOVE twice.
- */
- MUTEX_LOCK(env, env->mtx_dblist);
- if (!reuse &&
- (dbp->dblistlinks.tqe_next != NULL ||
- dbp->dblistlinks.tqe_prev != NULL)) {
- TAILQ_REMOVE(&env->dblist, dbp, dblistlinks);
- dbp->dblistlinks.tqe_next = NULL;
- dbp->dblistlinks.tqe_prev = NULL;
- }
-
- /* Close the memory pool file handle. */
- if (dbp->mpf != NULL) {
- if ((t_ret = __memp_fclose(dbp->mpf,
- F_ISSET(dbp, DB_AM_DISCARD) ? DB_MPOOL_DISCARD : 0)) != 0 &&
- ret == 0)
- ret = t_ret;
- dbp->mpf = NULL;
- if (reuse &&
- (t_ret = __memp_fcreate(env, &dbp->mpf)) != 0 &&
- ret == 0)
- ret = t_ret;
- }
-
- MUTEX_UNLOCK(env, env->mtx_dblist);
-
- /*
- * Call the access specific close function.
- *
- * We do this here rather than in __db_close as we need to do this when
- * aborting an open so that file descriptors are closed and abort of
- * renames can succeed on platforms that lock open files (such as
- * Windows). In particular, we need to ensure that all the extents
- * associated with a queue are closed so that queue renames can be
- * aborted.
- *
- * It is also important that we do this before releasing the handle
- * lock, because dbremove and dbrename assume that once they have the
- * handle lock, it is safe to modify the underlying file(s).
- *
- * !!!
- * Because of where these functions are called in the DB handle close
- * process, these routines can't do anything that would dirty pages or
- * otherwise affect closing down the database. Specifically, we can't
- * abort and recover any of the information they control.
- */
-#ifdef HAVE_PARTITION
- if (dbp->p_internal != NULL &&
- (t_ret = __partition_close(dbp, txn, flags)) != 0 && ret == 0)
- ret = t_ret;
-#endif
- if ((t_ret = __bam_db_close(dbp)) != 0 && ret == 0)
- ret = t_ret;
- if ((t_ret = __ham_db_close(dbp)) != 0 && ret == 0)
- ret = t_ret;
- if ((t_ret = __qam_db_close(dbp, dbp->flags)) != 0 && ret == 0)
- ret = t_ret;
-
- /*
- * !!!
- * At this point, the access-method specific information has been
- * freed. From now on, we can use the dbp, but not touch any
- * access-method specific data.
- */
-
- if (!reuse && dbp->locker != NULL) {
- /* We may have pending trade operations on this dbp. */
- if (txn == NULL)
- txn = dbp->cur_txn;
- if (IS_REAL_TXN(txn))
- __txn_remlock(env,
- txn, &dbp->handle_lock, dbp->locker);
-
- /* We may be holding the handle lock; release it. */
- lreq.op = DB_LOCK_PUT_ALL;
- lreq.obj = NULL;
- if ((t_ret = __lock_vec(env,
- dbp->locker, 0, &lreq, 1, NULL)) != 0 && ret == 0)
- ret = t_ret;
-
- if ((t_ret =
- __lock_id_free(env, dbp->locker)) != 0 && ret == 0)
- ret = t_ret;
- dbp->locker = NULL;
- LOCK_INIT(dbp->handle_lock);
- }
-
- /*
- * If this is a temporary file (un-named in-memory file), then
- * discard the locker ID allocated as the fileid.
- */
- if (LOCKING_ON(env) &&
- F_ISSET(dbp, DB_AM_INMEM) && !dbp->preserve_fid &&
- *(u_int32_t *)dbp->fileid != DB_LOCK_INVALIDID) {
- if ((t_ret = __lock_getlocker(env->lk_handle,
- *(u_int32_t *)dbp->fileid, 0, &locker)) == 0)
- t_ret = __lock_id_free(env, locker);
- if (ret == 0)
- ret = t_ret;
- }
-
- if (reuse) {
- /*
- * If we are reusing this dbp, then we're done now. Re-init
- * the handle, preserving important flags, and then return.
- * This code is borrowed from __db_init, which does more
- * than we can do here.
- */
- save_flags = F_ISSET(dbp, DB_AM_INMEM | DB_AM_TXN);
-
- if ((ret = __bam_db_create(dbp)) != 0)
- return (ret);
- if ((ret = __ham_db_create(dbp)) != 0)
- return (ret);
- if ((ret = __qam_db_create(dbp)) != 0)
- return (ret);
-
- /* Restore flags */
- dbp->flags = dbp->orig_flags | save_flags;
-
- if (FLD_ISSET(save_flags, DB_AM_INMEM)) {
- /*
- * If this is inmem, then it may have a fileid
- * even if it was never opened, and we need to
- * clear out that fileid.
- */
- memset(dbp->fileid, 0, sizeof(dbp->fileid));
- MAKE_INMEM(dbp);
+#define DB_FLAGS (DB_LOCK | DB_SHMEM | DB_TXN)
+#define USE_OPEN_FLAGS \
+ (O_CREAT | O_EXCL | O_EXLOCK | O_NONBLOCK | O_RDONLY | \
+ O_RDWR | O_SHLOCK | O_TRUNC)
+
+ if ((flags & ~(USE_OPEN_FLAGS | DB_FLAGS)) == 0)
+ switch (type) {
+ case DB_BTREE:
+ return (__bt_open(fname, flags & USE_OPEN_FLAGS,
+ mode, openinfo, flags & DB_FLAGS));
+ case DB_HASH:
+ return (__hash_open(fname, flags & USE_OPEN_FLAGS,
+ mode, openinfo, flags & DB_FLAGS));
+ case DB_RECNO:
+ return (__rec_open(fname, flags & USE_OPEN_FLAGS,
+ mode, openinfo, flags & DB_FLAGS));
}
- return (ret);
- }
-
- dbp->type = DB_UNKNOWN;
-
- /*
- * The thread mutex may have been invalidated in __dbreg_close_id if the
- * fname refcount did not go to 0. If not, discard the thread mutex.
- */
- if ((t_ret = __mutex_free(env, &dbp->mutex)) != 0 && ret == 0)
- ret = t_ret;
-
- /* Discard any memory allocated for the file and database names. */
- if (dbp->fname != NULL) {
- __os_free(dbp->env, dbp->fname);
- dbp->fname = NULL;
- }
- if (dbp->dname != NULL) {
- __os_free(dbp->env, dbp->dname);
- dbp->dname = NULL;
- }
-
- /* Discard any memory used to store returned data. */
- if (dbp->my_rskey.data != NULL)
- __os_free(dbp->env, dbp->my_rskey.data);
- if (dbp->my_rkey.data != NULL)
- __os_free(dbp->env, dbp->my_rkey.data);
- if (dbp->my_rdata.data != NULL)
- __os_free(dbp->env, dbp->my_rdata.data);
-
- /* For safety's sake; we may refresh twice. */
- memset(&dbp->my_rskey, 0, sizeof(DBT));
- memset(&dbp->my_rkey, 0, sizeof(DBT));
- memset(&dbp->my_rdata, 0, sizeof(DBT));
-
- /* Clear out fields that normally get set during open. */
- memset(dbp->fileid, 0, sizeof(dbp->fileid));
- dbp->adj_fileid = 0;
- dbp->meta_pgno = 0;
- dbp->cur_locker = NULL;
- dbp->cur_txn = NULL;
- dbp->associate_locker = NULL;
- dbp->cl_id = 0;
- dbp->open_flags = 0;
-
- /*
- * If we are being refreshed with a txn specified, then we need
- * to make sure that we clear out the lock handle field, because
- * releasing all the locks for this transaction will release this
- * lock and we don't want close to stumble upon this handle and
- * try to close it.
- */
- if (txn != NULL)
- LOCK_INIT(dbp->handle_lock);
-
- /* Reset flags to whatever the user configured. */
- dbp->flags = dbp->orig_flags;
-
- return (ret);
+ errno = EINVAL;
+ return (NULL);
}
-/*
- * __db_disassociate --
- * Destroy the association between a given secondary and its primary.
- */
static int
-__db_disassociate(sdbp)
- DB *sdbp;
-{
- DBC *dbc;
- int ret, t_ret;
-
- ret = 0;
-
- sdbp->s_callback = NULL;
- sdbp->s_primary = NULL;
- sdbp->get = sdbp->stored_get;
- sdbp->close = sdbp->stored_close;
-
- /*
- * Complain, but proceed, if we have any active cursors. (We're in
- * the middle of a close, so there's really no turning back.)
- */
- if (sdbp->s_refcnt != 1 ||
- TAILQ_FIRST(&sdbp->active_queue) != NULL ||
- TAILQ_FIRST(&sdbp->join_queue) != NULL) {
- __db_errx(sdbp->env,
- "Closing a primary DB while a secondary DB has active cursors is unsafe");
- ret = EINVAL;
- }
- sdbp->s_refcnt = 0;
-
- while ((dbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL)
- if ((t_ret = __dbc_destroy(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- F_CLR(sdbp, DB_AM_SECONDARY);
- return (ret);
-}
-
-/*
- * __db_disassociate_foreign --
- * Destroy the association between a given secondary and its foreign.
- */
-static int
-__db_disassociate_foreign(sdbp)
- DB *sdbp;
-{
- DB *fdbp;
- DB_FOREIGN_INFO *f_info, *tmp;
- int ret;
-
- if (sdbp->s_foreign == NULL)
- return (0);
- if ((ret = __os_malloc(sdbp->env, sizeof(DB_FOREIGN_INFO), &tmp)) != 0)
- return (ret);
-
- fdbp = sdbp->s_foreign;
- ret = 0;
- f_info = LIST_FIRST(&fdbp->f_primaries);
- while (f_info != NULL) {
- tmp = LIST_NEXT(f_info, f_links);
- if (f_info ->dbp == sdbp) {
- LIST_REMOVE(f_info, f_links);
- __os_free(sdbp->env, f_info);
- }
- f_info = tmp;
- }
-
- return (ret);
-}
-
-/*
- * __db_log_page
- * Log a meta-data or root page during a subdatabase create operation.
- *
- * PUBLIC: int __db_log_page __P((DB *, DB_TXN *, DB_LSN *, db_pgno_t, PAGE *));
- */
-int
-__db_log_page(dbp, txn, lsn, pgno, page)
- DB *dbp;
- DB_TXN *txn;
- DB_LSN *lsn;
- db_pgno_t pgno;
- PAGE *page;
+__dberr()
{
- DBT page_dbt;
- DB_LSN new_lsn;
- int ret;
-
- if (!LOGGING_ON(dbp->env) || txn == NULL)
- return (0);
-
- memset(&page_dbt, 0, sizeof(page_dbt));
- page_dbt.size = dbp->pgsize;
- page_dbt.data = page;
-
- ret = __crdel_metasub_log(dbp, txn, &new_lsn, 0, pgno, &page_dbt, lsn);
-
- if (ret == 0)
- page->lsn = new_lsn;
- return (ret);
+ return (RET_ERROR);
}
/*
- * __db_backup_name
- * Create the backup file name for a given file.
+ * __DBPANIC -- Stop.
*
- * PUBLIC: int __db_backup_name __P((ENV *,
- * PUBLIC: const char *, DB_TXN *, char **));
+ * Parameters:
+ * dbp: pointer to the DB structure.
*/
-#undef BACKUP_PREFIX
-#define BACKUP_PREFIX "__db."
-
-#undef MAX_INT_TO_HEX
-#define MAX_INT_TO_HEX 8
-
-int
-__db_backup_name(env, name, txn, backup)
- ENV *env;
- const char *name;
- DB_TXN *txn;
- char **backup;
-{
- u_int32_t id;
- size_t len;
- int ret;
- char *p, *retp;
-
- *backup = NULL;
-
- /*
- * Part of the name may be a full path, so we need to make sure that
- * we allocate enough space for it, even in the case where we don't
- * use the entire filename for the backup name.
- */
- len = strlen(name) + strlen(BACKUP_PREFIX) + 2 * MAX_INT_TO_HEX + 1;
- if ((ret = __os_malloc(env, len, &retp)) != 0)
- return (ret);
-
- /*
- * Create the name. Backup file names are in one of 2 forms: in a
- * transactional env "__db.TXNID.ID", where ID is a random number,
- * and in any other env "__db.FILENAME".
- *
- * In addition, the name passed may contain an env-relative path.
- * In that case, put the "__db." in the right place (in the last
- * component of the pathname).
- *
- * There are four cases here:
- * 1. simple path w/out transaction
- * 2. simple path + transaction
- * 3. multi-component path w/out transaction
- * 4. multi-component path + transaction
- */
- p = __db_rpath(name);
- if (IS_REAL_TXN(txn)) {
- __os_unique_id(env, &id);
- if (p == NULL) /* Case 2. */
- snprintf(retp, len, "%s%x.%x",
- BACKUP_PREFIX, txn->txnid, id);
- else /* Case 4. */
- snprintf(retp, len, "%.*s%x.%x",
- (int)(p - name) + 1, name, txn->txnid, id);
- } else {
- if (p == NULL) /* Case 1. */
- snprintf(retp, len, "%s%s", BACKUP_PREFIX, name);
- else /* Case 3. */
- snprintf(retp, len, "%.*s%s%s",
- (int)(p - name) + 1, name, BACKUP_PREFIX, p + 1);
- }
-
- *backup = retp;
- return (0);
-}
-
-#ifdef CONFIG_TEST
-/*
- * __db_testcopy
- * Create a copy of all backup files and our "main" DB.
- *
- * PUBLIC: #ifdef CONFIG_TEST
- * PUBLIC: int __db_testcopy __P((ENV *, DB *, const char *));
- * PUBLIC: #endif
- */
-int
-__db_testcopy(env, dbp, name)
- ENV *env;
- DB *dbp;
- const char *name;
-{
- DB_MPOOL *dbmp;
- DB_MPOOLFILE *mpf;
-
- DB_ASSERT(env, dbp != NULL || name != NULL);
-
- if (name == NULL) {
- dbmp = env->mp_handle;
- mpf = dbp->mpf;
- name = R_ADDR(dbmp->reginfo, mpf->mfp->path_off);
- }
-
- if (dbp != NULL && dbp->type == DB_QUEUE)
- return (__qam_testdocopy(dbp, name));
- else
-#ifdef HAVE_PARTITION
- if (dbp != NULL && DB_IS_PARTITIONED(dbp))
- return (__part_testdocopy(dbp, name));
- else
-#endif
- return (__db_testdocopy(env, name));
-}
-
-static int
-__qam_testdocopy(dbp, name)
+void
+__dbpanic(dbp)
DB *dbp;
- const char *name;
{
- DB_THREAD_INFO *ip;
- QUEUE_FILELIST *filelist, *fp;
- int ret;
- char buf[DB_MAXPATHLEN], *dir;
-
- filelist = NULL;
- if ((ret = __db_testdocopy(dbp->env, name)) != 0)
- return (ret);
-
- /* Call ENV_GET_THREAD_INFO to get a valid DB_THREAD_INFO */
- ENV_GET_THREAD_INFO(dbp->env, ip);
- if (dbp->mpf != NULL &&
- (ret = __qam_gen_filelist(dbp, ip, &filelist)) != 0)
- goto done;
-
- if (filelist == NULL)
- return (0);
- dir = ((QUEUE *)dbp->q_internal)->dir;
- for (fp = filelist; fp->mpf != NULL; fp++) {
- snprintf(buf, sizeof(buf),
- QUEUE_EXTENT, dir, PATH_SEPARATOR[0], name, fp->id);
- if ((ret = __db_testdocopy(dbp->env, buf)) != 0)
- return (ret);
- }
-
-done: __os_free(dbp->env, filelist);
- return (0);
-}
-
-/*
- * __db_testdocopy
- * Create a copy of all backup files and our "main" DB.
- * PUBLIC: int __db_testdocopy __P((ENV *, const char *));
- */
-int
-__db_testdocopy(env, name)
- ENV *env;
- const char *name;
-{
- size_t len;
- int dircnt, i, ret;
- char *copy, **namesp, *p, *real_name;
-
- dircnt = 0;
- copy = NULL;
- namesp = NULL;
-
- /* Create the real backing file name. */
- if ((ret = __db_appname(env,
- DB_APP_DATA, name, NULL, &real_name)) != 0)
- return (ret);
-
- /*
- * !!!
- * There are tests that attempt to copy non-existent files. I'd guess
- * it's a testing bug, but I don't have time to figure it out. Block
- * the case here.
- */
- if (__os_exists(env, real_name, NULL) != 0) {
- __os_free(env, real_name);
- return (0);
- }
-
- /*
- * Copy the file itself.
- *
- * Allocate space for the file name, including adding an ".afterop" and
- * trailing nul byte.
- */
- len = strlen(real_name) + sizeof(".afterop");
- if ((ret = __os_malloc(env, len, &copy)) != 0)
- goto err;
- snprintf(copy, len, "%s.afterop", real_name);
- if ((ret = __db_makecopy(env, real_name, copy)) != 0)
- goto err;
-
- /*
- * Get the directory path to call __os_dirlist().
- */
- if ((p = __db_rpath(real_name)) != NULL)
- *p = '\0';
- if ((ret = __os_dirlist(env, real_name, 0, &namesp, &dircnt)) != 0)
- goto err;
-
- /*
- * Walk the directory looking for backup files. Backup file names in
- * transactional environments are of the form:
- *
- * BACKUP_PREFIX.TXNID.ID
- */
- for (i = 0; i < dircnt; i++) {
- /* Check for a related backup file name. */
- if (strncmp(
- namesp[i], BACKUP_PREFIX, sizeof(BACKUP_PREFIX) - 1) != 0)
- continue;
- p = namesp[i] + sizeof(BACKUP_PREFIX);
- p += strspn(p, "0123456789ABCDEFabcdef");
- if (*p != '.')
- continue;
- ++p;
- p += strspn(p, "0123456789ABCDEFabcdef");
- if (*p != '\0')
- continue;
-
- /*
- * Copy the backup file.
- *
- * Allocate space for the file name, including adding a
- * ".afterop" and trailing nul byte.
- */
- if (real_name != NULL) {
- __os_free(env, real_name);
- real_name = NULL;
- }
- if ((ret = __db_appname(env,
- DB_APP_DATA, namesp[i], NULL, &real_name)) != 0)
- goto err;
- if (copy != NULL) {
- __os_free(env, copy);
- copy = NULL;
- }
- len = strlen(real_name) + sizeof(".afterop");
- if ((ret = __os_malloc(env, len, &copy)) != 0)
- goto err;
- snprintf(copy, len, "%s.afterop", real_name);
- if ((ret = __db_makecopy(env, real_name, copy)) != 0)
- goto err;
- }
-
-err: if (namesp != NULL)
- __os_dirfree(env, namesp, dircnt);
- if (copy != NULL)
- __os_free(env, copy);
- if (real_name != NULL)
- __os_free(env, real_name);
- return (ret);
-}
-
-static int
-__db_makecopy(env, src, dest)
- ENV *env;
- const char *src, *dest;
-{
- DB_FH *rfhp, *wfhp;
- size_t rcnt, wcnt;
- int ret;
- char *buf;
-
- rfhp = wfhp = NULL;
-
- if ((ret = __os_malloc(env, 64 * 1024, &buf)) != 0)
- goto err;
-
- if ((ret = __os_open(env, src, 0,
- DB_OSO_RDONLY, DB_MODE_600, &rfhp)) != 0)
- goto err;
- if ((ret = __os_open(env, dest, 0,
- DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &wfhp)) != 0)
- goto err;
-
- for (;;) {
- if ((ret =
- __os_read(env, rfhp, buf, sizeof(buf), &rcnt)) != 0)
- goto err;
- if (rcnt == 0)
- break;
- if ((ret =
- __os_write(env, wfhp, buf, sizeof(buf), &wcnt)) != 0)
- goto err;
- }
-
- if (0) {
-err: __db_err(env, ret, "__db_makecopy: %s -> %s", src, dest);
- }
-
- if (buf != NULL)
- __os_free(env, buf);
- if (rfhp != NULL)
- (void)__os_closehandle(env, rfhp);
- if (wfhp != NULL)
- (void)__os_closehandle(env, wfhp);
- return (ret);
+ /* The only thing that can succeed is a close. */
+ dbp->del = (int (*)())__dberr;
+ dbp->fd = (int (*)())__dberr;
+ dbp->get = (int (*)())__dberr;
+ dbp->put = (int (*)())__dberr;
+ dbp->seq = (int (*)())__dberr;
+ dbp->sync = (int (*)())__dberr;
}
-#endif
diff --git a/db/db.src b/db/db.src
deleted file mode 100644
index 2136b79..0000000
--- a/db/db.src
+++ /dev/null
@@ -1,328 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-DBPRIVATE
-PREFIX __db
-
-INCLUDE #include "db_int.h"
-INCLUDE #include "dbinc/crypto.h"
-INCLUDE #include "dbinc/db_page.h"
-INCLUDE #include "dbinc/db_dispatch.h"
-INCLUDE #include "dbinc/db_am.h"
-INCLUDE #include "dbinc/log.h"
-INCLUDE #include "dbinc/txn.h"
-INCLUDE
-
-/*
- * addrem -- Add or remove an entry from a duplicate page.
- *
- * opcode: identifies if this is an add or delete.
- * fileid: file identifier of the file being modified.
- * pgno: duplicate page number.
- * indx: location at which to insert or delete.
- * nbytes: number of bytes added/removed to/from the page.
- * hdr: header for the data item.
- * dbt: data that is deleted or is to be added.
- * pagelsn: former lsn of the page.
- *
- * If the hdr was NULL then, the dbt is a regular B_KEYDATA.
- * If the dbt was NULL then the hdr is a complete item to be
- * pasted on the page.
- */
-BEGIN addrem 42 41
-ARG opcode u_int32_t lu
-DB fileid int32_t ld
-ARG pgno db_pgno_t lu
-ARG indx u_int32_t lu
-ARG nbytes u_int32_t lu
-DBT hdr DBT s
-DBT dbt DBT s
-POINTER pagelsn DB_LSN * lu
-END
-
-/*
- * big -- Handles addition and deletion of big key/data items.
- *
- * opcode: identifies get/put.
- * fileid: file identifier of the file being modified.
- * pgno: page onto which data is being added/removed.
- * prev_pgno: the page before the one we are logging.
- * next_pgno: the page after the one we are logging.
- * dbt: data being written onto the page.
- * pagelsn: former lsn of the orig_page.
- * prevlsn: former lsn of the prev_pgno.
- * nextlsn: former lsn of the next_pgno. This is not currently used, but
- * may be used later if we actually do overwrites of big key/
- * data items in place.
- */
-BEGIN big 42 43
-ARG opcode u_int32_t lu
-DB fileid int32_t ld
-ARG pgno db_pgno_t lu
-ARG prev_pgno db_pgno_t lu
-ARG next_pgno db_pgno_t lu
-DBT dbt DBT s
-POINTER pagelsn DB_LSN * lu
-POINTER prevlsn DB_LSN * lu
-POINTER nextlsn DB_LSN * lu
-END
-
-/*
- * ovref -- Handles increment/decrement of overflow page reference count.
- *
- * fileid: identifies the file being modified.
- * pgno: page number whose ref count is being incremented/decremented.
- * adjust: the adjustment being made.
- * lsn: the page's original lsn.
- */
-BEGIN ovref 42 44
-DB fileid int32_t ld
-ARG pgno db_pgno_t lu
-ARG adjust int32_t ld
-POINTER lsn DB_LSN * lu
-END
-
-/*
- * relink -- Handles relinking around a page.
- *
- * opcode: indicates if this is an addpage or delete page
- * pgno: the page being changed.
- * lsn the page's original lsn.
- * prev: the previous page.
- * lsn_prev: the previous page's original lsn.
- * next: the next page.
- * lsn_next: the previous page's original lsn.
- */
-BEGIN_COMPAT relink 42 45
-ARG opcode u_int32_t lu
-DB fileid int32_t ld
-ARG pgno db_pgno_t lu
-POINTER lsn DB_LSN * lu
-ARG prev db_pgno_t lu
-POINTER lsn_prev DB_LSN * lu
-ARG next db_pgno_t lu
-POINTER lsn_next DB_LSN * lu
-END
-
-/*
- * Debug -- log an operation upon entering an access method.
- * op: Operation (cursor, c_close, c_get, c_put, c_del,
- * get, put, delete).
- * fileid: identifies the file being acted upon.
- * key: key paramater
- * data: data parameter
- * flags: flags parameter
- */
-BEGIN debug 42 47
-DBT op DBT s
-ARG fileid int32_t ld
-DBT key DBT s
-DBT data DBT s
-ARG arg_flags u_int32_t lu
-END
-
-/*
- * noop -- do nothing, but get an LSN.
- */
-BEGIN noop 42 48
-DB fileid int32_t ld
-ARG pgno db_pgno_t lu
-POINTER prevlsn DB_LSN * lu
-END
-
-/*
- * pg_alloc: used to record allocating a new page.
- *
- * meta_lsn: the original lsn of the page reference by meta_pgno.
- * meta_pgno the page pointing at the allocated page in the free list.
- * If the list is unsorted this is the metadata page.
- * page_lsn: the allocated page's original lsn.
- * pgno: the page allocated.
- * ptype: the type of the page allocated.
- * next: the next page on the free list.
- * last_pgno: the last page in the file after this op (4.3+).
- */
-BEGIN_COMPAT pg_alloc 42 49
-DB fileid int32_t ld
-POINTER meta_lsn DB_LSN * lu
-ARG meta_pgno db_pgno_t lu
-POINTER page_lsn DB_LSN * lu
-ARG pgno db_pgno_t lu
-ARG ptype u_int32_t lu
-ARG next db_pgno_t lu
-END
-
-BEGIN pg_alloc 43 49
-DB fileid int32_t ld
-POINTER meta_lsn DB_LSN * lu
-ARG meta_pgno db_pgno_t lu
-POINTER page_lsn DB_LSN * lu
-ARG pgno db_pgno_t lu
-ARG ptype u_int32_t lu
-ARG next db_pgno_t lu
-ARG last_pgno db_pgno_t lu
-END
-
-/*
- * pg_free: used to record freeing a page.
- * If we are maintaining a sorted free list (during compact) meta_pgno
- * will be non-zero and refer to the page that preceeds the one we are freeing
- * in the free list. Meta_lsn will then be the lsn of that page.
- *
- * pgno: the page being freed.
- * meta_lsn: the meta-data page's original lsn.
- * meta_pgno: the meta-data page number.
- * header: the header from the free'd page.
- * next: the previous next pointer on the metadata page.
- * last_pgno: the last page in the file before this op (4.3+).
- */
-BEGIN_COMPAT pg_free 42 50
-DB fileid int32_t ld
-ARG pgno db_pgno_t lu
-POINTER meta_lsn DB_LSN * lu
-ARG meta_pgno db_pgno_t lu
-PGDBT header DBT s
-ARG next db_pgno_t lu
-END
-
-BEGIN pg_free 43 50
-DB fileid int32_t ld
-ARG pgno db_pgno_t lu
-POINTER meta_lsn DB_LSN * lu
-ARG meta_pgno db_pgno_t lu
-PGDBT header DBT s
-ARG next db_pgno_t lu
-ARG last_pgno db_pgno_t lu
-END
-
-/*
- * cksum --
- * This log record is written when we're unable to checksum a page,
- * before returning DB_RUNRECOVERY. This log record causes normal
- * recovery to itself return DB_RUNRECOVERY, as only catastrophic
- * recovery can fix things.
- */
-BEGIN cksum 42 51
-END
-
-/*
- * pg_freedata: used to record freeing a page with data on it.
- *
- * pgno: the page being freed.
- * meta_lsn: the meta-data page's original lsn.
- * meta_pgno: the meta-data page number.
- * header: the header and index entries from the free'd page.
- * data: the data from the free'd page.
- * next: the previous next pointer on the metadata page.
- * last_pgno: the last page in the file before this op (4.3+).
- */
-BEGIN_COMPAT pg_freedata 42 52
-DB fileid int32_t ld
-ARG pgno db_pgno_t lu
-POINTER meta_lsn DB_LSN * lu
-ARG meta_pgno db_pgno_t lu
-PGDBT header DBT s
-ARG next db_pgno_t lu
-PGDDBT data DBT s
-END
-
-BEGIN pg_freedata 43 52
-DB fileid int32_t ld
-ARG pgno db_pgno_t lu
-POINTER meta_lsn DB_LSN * lu
-ARG meta_pgno db_pgno_t lu
-PGDBT header DBT s
-ARG next db_pgno_t lu
-ARG last_pgno db_pgno_t lu
-PGDDBT data DBT s
-END
-
-/*
- * pg_prepare: used to record an aborted page in a prepared transaction.
- *
- * pgno: the page being freed.
- */
-X BEGIN pg_prepare 42 53
-X DB fileid int32_t ld
-X ARG pgno db_pgno_t lu
-X END
-
-/*
- * pg_new: used to record a new page put on the free list.
- *
- * pgno: the page being freed.
- * meta_lsn: the meta-data page's original lsn.
- * meta_pgno: the meta-data page number.
- * header: the header from the free'd page.
- * next: the previous next pointer on the metadata page.
- */
-X BEGIN pg_new 42 54
-X DB fileid int32_t ld
-X ARG pgno db_pgno_t lu
-X POINTER meta_lsn DB_LSN * lu
-X ARG meta_pgno db_pgno_t lu
-X PGDBT header DBT s
-X ARG next db_pgno_t lu
-X END
-
-/*
- * pg_init: used to reinitialize a page during truncate.
- *
- * pgno: the page being initialized.
- * header: the header from the page.
- * data: data that used to be on the page.
- */
-BEGIN pg_init 43 60
-DB fileid int32_t ld
-ARG pgno db_pgno_t lu
-PGDBT header DBT s
-PGDDBT data DBT s
-END
-
-/*
- * pg_sort: sort the free list
- *
- * meta: meta page number
- * meta_lsn: lsn on meta page.
- * last_free: page number of new last free page.
- * last_lsn; lsn of last free page.
- * last_pgno: current last page number.
- * list: list of pages and lsns to sort.
- */
-BEGIN_COMPAT pg_sort 44 61
-DB fileid int32_t ld
-ARG meta db_pgno_t lu
-POINTER meta_lsn DB_LSN * lu
-ARG last_free db_pgno_t lu
-POINTER last_lsn DB_LSN * lu
-ARG last_pgno db_pgno_t lu
-DBT list DBT s
-END
-
-
-/*
- * pg_truc: truncate the free list
- *
- * meta: meta page number
- * meta_lsn: lsn on meta page.
- * last_free: page number of new last free page.
- * last_lsn; lsn of last free page.
- * last_pgno: current last page number.
- * list: list of pages and lsns on free list.
- */
-BEGIN pg_trunc 49 66
-DB fileid int32_t ld
-ARG meta db_pgno_t lu
-POINTER meta_lsn DB_LSN * lu
-ARG last_free db_pgno_t lu
-POINTER last_lsn DB_LSN * lu
-ARG next_free db_pgno_t lu
-ARG last_pgno db_pgno_t lu
-DBT list DBT s
-END
-
diff --git a/db/db_am.c b/db/db_am.c
deleted file mode 100644
index c453ea9..0000000
--- a/db/db_am.c
+++ /dev/null
@@ -1,1015 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1998-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/btree.h"
-#include "dbinc/hash.h"
-#include "dbinc/lock.h"
-#include "dbinc/log.h"
-#include "dbinc/mp.h"
-#include "dbinc/partition.h"
-#include "dbinc/qam.h"
-#include "dbinc/txn.h"
-
-static int __db_secondary_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
-static int __dbc_set_priority __P((DBC *, DB_CACHE_PRIORITY));
-static int __dbc_get_priority __P((DBC *, DB_CACHE_PRIORITY* ));
-
-/*
- * __db_cursor_int --
- * Internal routine to create a cursor.
- *
- * PUBLIC: int __db_cursor_int __P((DB *, DB_THREAD_INFO *,
- * PUBLIC: DB_TXN *, DBTYPE, db_pgno_t, int, DB_LOCKER *, DBC **));
- */
-int
-__db_cursor_int(dbp, ip, txn, dbtype, root, flags, locker, dbcp)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- DBTYPE dbtype;
- db_pgno_t root;
- int flags;
- DB_LOCKER *locker;
- DBC **dbcp;
-{
- DBC *dbc;
- DBC_INTERNAL *cp;
- ENV *env;
- db_threadid_t tid;
- int allocated, ret;
- pid_t pid;
-
- env = dbp->env;
- allocated = 0;
-
- /*
- * If dbcp is non-NULL it is assumed to point to an area to initialize
- * as a cursor.
- *
- * Take one from the free list if it's available. Take only the
- * right type. With off page dups we may have different kinds
- * of cursors on the queue for a single database.
- */
- MUTEX_LOCK(env, dbp->mutex);
-
-#ifndef HAVE_NO_DB_REFCOUNT
- /*
- * If this DBP is being logged then refcount the log filename
- * relative to this transaction. We do this here because we have
- * the dbp->mutex which protects the refcount. We want to avoid
- * calling the function if we are duplicating a cursor. This includes
- * the case of creating an off page duplicate cursor. If we know this
- * cursor will not be used in an update, we could avoid this,
- * but we don't have that information.
- */
- if (txn != NULL && !LF_ISSET(DBC_OPD | DBC_DUPLICATE)
- && !F_ISSET(dbp, DB_AM_RECOVER) &&
- dbp->log_filename != NULL && !IS_REP_CLIENT(env) &&
- (ret = __txn_record_fname(env, txn, dbp->log_filename)) != 0) {
- MUTEX_UNLOCK(env, dbp->mutex);
- return (ret);
- }
-
-#endif
-
- TAILQ_FOREACH(dbc, &dbp->free_queue, links)
- if (dbtype == dbc->dbtype) {
- TAILQ_REMOVE(&dbp->free_queue, dbc, links);
- F_CLR(dbc, ~DBC_OWN_LID);
- break;
- }
- MUTEX_UNLOCK(env, dbp->mutex);
-
- if (dbc == NULL) {
- if ((ret = __os_calloc(env, 1, sizeof(DBC), &dbc)) != 0)
- return (ret);
- allocated = 1;
- dbc->flags = 0;
-
- dbc->dbp = dbp;
- dbc->dbenv = dbp->dbenv;
- dbc->env = dbp->env;
-
- /* Set up locking information. */
- if (LOCKING_ON(env)) {
- /*
- * If we are not threaded, we share a locker ID among
- * all cursors opened in the environment handle,
- * allocating one if this is the first cursor.
- *
- * This relies on the fact that non-threaded DB handles
- * always have non-threaded environment handles, since
- * we set DB_THREAD on DB handles created with threaded
- * environment handles.
- */
- if (!DB_IS_THREADED(dbp)) {
- if (env->env_lref == NULL && (ret =
- __lock_id(env, NULL, &env->env_lref)) != 0)
- goto err;
- dbc->lref = env->env_lref;
- } else {
- if ((ret =
- __lock_id(env, NULL, &dbc->lref)) != 0)
- goto err;
- F_SET(dbc, DBC_OWN_LID);
- }
-
- /*
- * In CDB, secondary indices should share a lock file
- * ID with the primary; otherwise we're susceptible
- * to deadlocks. We also use __db_cursor_int rather
- * than __db_cursor to create secondary update cursors
- * in c_put and c_del; these won't acquire a new lock.
- *
- * !!!
- * Since this is in the one-time cursor allocation
- * code, we need to be sure to destroy, not just
- * close, all cursors in the secondary when we
- * associate.
- */
- if (CDB_LOCKING(env) &&
- F_ISSET(dbp, DB_AM_SECONDARY))
- memcpy(dbc->lock.fileid,
- dbp->s_primary->fileid, DB_FILE_ID_LEN);
- else
- memcpy(dbc->lock.fileid,
- dbp->fileid, DB_FILE_ID_LEN);
-
- if (CDB_LOCKING(env)) {
- if (F_ISSET(env->dbenv, DB_ENV_CDB_ALLDB)) {
- /*
- * If we are doing a single lock per
- * environment, set up the global
- * lock object just like we do to
- * single thread creates.
- */
- DB_ASSERT(env, sizeof(db_pgno_t) ==
- sizeof(u_int32_t));
- dbc->lock_dbt.size = sizeof(u_int32_t);
- dbc->lock_dbt.data = &dbc->lock.pgno;
- dbc->lock.pgno = 0;
- } else {
- dbc->lock_dbt.size = DB_FILE_ID_LEN;
- dbc->lock_dbt.data = dbc->lock.fileid;
- }
- } else {
- dbc->lock.type = DB_PAGE_LOCK;
- dbc->lock_dbt.size = sizeof(dbc->lock);
- dbc->lock_dbt.data = &dbc->lock;
- }
- }
- /* Init the DBC internal structure. */
-#ifdef HAVE_PARTITION
- if (DB_IS_PARTITIONED(dbp)) {
- if ((ret = __partc_init(dbc)) != 0)
- goto err;
- } else
-#endif
- switch (dbtype) {
- case DB_BTREE:
- case DB_RECNO:
- if ((ret = __bamc_init(dbc, dbtype)) != 0)
- goto err;
- break;
- case DB_HASH:
- if ((ret = __hamc_init(dbc)) != 0)
- goto err;
- break;
- case DB_QUEUE:
- if ((ret = __qamc_init(dbc)) != 0)
- goto err;
- break;
- case DB_UNKNOWN:
- default:
- ret = __db_unknown_type(env, "DB->cursor", dbtype);
- goto err;
- }
-
- cp = dbc->internal;
- }
-
- /* Refresh the DBC structure. */
- dbc->dbtype = dbtype;
- RESET_RET_MEM(dbc);
- dbc->set_priority = __dbc_set_priority;
- dbc->get_priority = __dbc_get_priority;
- dbc->priority = dbp->priority;
-
- if ((dbc->txn = txn) != NULL)
- dbc->locker = txn->locker;
- else if (LOCKING_ON(env)) {
- /*
- * There are certain cases in which we want to create a
- * new cursor with a particular locker ID that is known
- * to be the same as (and thus not conflict with) an
- * open cursor.
- *
- * The most obvious case is cursor duplication; when we
- * call DBC->dup or __dbc_idup, we want to use the original
- * cursor's locker ID.
- *
- * Another case is when updating secondary indices. Standard
- * CDB locking would mean that we might block ourself: we need
- * to open an update cursor in the secondary while an update
- * cursor in the primary is open, and when the secondary and
- * primary are subdatabases or we're using env-wide locking,
- * this is disastrous.
- *
- * In these cases, our caller will pass a nonzero locker
- * ID into this function. Use this locker ID instead of
- * the default as the locker ID for our new cursor.
- */
- if (locker != NULL)
- dbc->locker = locker;
- else {
- /*
- * If we are threaded then we need to set the
- * proper thread id into the locker.
- */
- if (DB_IS_THREADED(dbp)) {
- env->dbenv->thread_id(env->dbenv, &pid, &tid);
- __lock_set_thread_id(dbc->lref, pid, tid);
- }
- dbc->locker = dbc->lref;
- }
- }
-
- /*
- * These fields change when we are used as a secondary index, so
- * if the DB is a secondary, make sure they're set properly just
- * in case we opened some cursors before we were associated.
- *
- * __dbc_get is used by all access methods, so this should be safe.
- */
- if (F_ISSET(dbp, DB_AM_SECONDARY))
- dbc->get = dbc->c_get = __dbc_secondary_get_pp;
-
- if (LF_ISSET(DB_CURSOR_BULK) && dbtype == DB_BTREE)
- F_SET(dbc, DBC_BULK);
- if (LF_ISSET(DB_CURSOR_TRANSIENT))
- F_SET(dbc, DBC_TRANSIENT);
- if (LF_ISSET(DBC_OPD))
- F_SET(dbc, DBC_OPD);
- if (F_ISSET(dbp, DB_AM_RECOVER))
- F_SET(dbc, DBC_RECOVER);
- if (F_ISSET(dbp, DB_AM_COMPENSATE))
- F_SET(dbc, DBC_DONTLOCK);
-#ifdef HAVE_REPLICATION
- /*
- * If we are replicating from a down rev version then we must
- * use old locking protocols.
- */
- if (LOGGING_ON(env) &&
- ((LOG *)env->lg_handle->
- reginfo.primary)->persist.version < DB_LOGVERSION_LATCHING)
- F_SET(dbc, DBC_DOWNREV);
-#endif
-
- /* Refresh the DBC internal structure. */
- cp = dbc->internal;
- cp->opd = NULL;
- cp->pdbc = NULL;
-
- cp->indx = 0;
- cp->page = NULL;
- cp->pgno = PGNO_INVALID;
- cp->root = root;
- cp->stream_start_pgno = cp->stream_curr_pgno = PGNO_INVALID;
- cp->stream_off = 0;
-
- if (DB_IS_PARTITIONED(dbp)) {
- DBC_PART_REFRESH(dbc);
- } else switch (dbtype) {
- case DB_BTREE:
- case DB_RECNO:
- if ((ret = __bamc_refresh(dbc)) != 0)
- goto err;
- break;
- case DB_HASH:
- case DB_QUEUE:
- break;
- case DB_UNKNOWN:
- default:
- ret = __db_unknown_type(env, "DB->cursor", dbp->type);
- goto err;
- }
-
- /*
- * The transaction keeps track of how many cursors were opened within
- * it to catch application errors where the cursor isn't closed when
- * the transaction is resolved.
- */
- if (txn != NULL)
- ++txn->cursors;
- if (ip != NULL)
- dbc->thread_info = ip;
- else if (txn != NULL)
- dbc->thread_info = txn->thread_info;
- else
- ENV_GET_THREAD_INFO(env, dbc->thread_info);
-
- MUTEX_LOCK(env, dbp->mutex);
- TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links);
- F_SET(dbc, DBC_ACTIVE);
- MUTEX_UNLOCK(env, dbp->mutex);
-
- *dbcp = dbc;
- return (0);
-
-err: if (allocated)
- __os_free(env, dbc);
- return (ret);
-}
-
-/*
- * __db_put --
- * Store a key/data pair.
- *
- * PUBLIC: int __db_put __P((DB *,
- * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, u_int32_t));
- */
-int
-__db_put(dbp, ip, txn, key, data, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- DBT *key, *data;
- u_int32_t flags;
-{
- DBC *dbc;
- DBT tdata, tkey;
- ENV *env;
- void *bulk_kptr, *bulk_ptr;
- db_recno_t recno;
- u_int32_t cursor_flags;
- int ret, t_ret;
-
- env = dbp->env;
-
- /*
- * See the comment in __db_get() regarding DB_CURSOR_TRANSIENT.
- *
- * Note that the get in the DB_NOOVERWRITE case is safe to do with this
- * flag set; if it errors in any way other than DB_NOTFOUND, we're
- * going to close the cursor without doing anything else, and if it
- * returns DB_NOTFOUND then it's safe to do a c_put(DB_KEYLAST) even if
- * an access method moved the cursor, since that's not
- * position-dependent.
- */
- cursor_flags = DB_WRITELOCK;
- if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY))
- cursor_flags |= DB_CURSOR_BULK;
- else
- cursor_flags |= DB_CURSOR_TRANSIENT;
- if ((ret = __db_cursor(dbp, ip, txn, &dbc, cursor_flags)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, txn, "DB->put", key, data, flags);
-
- SET_RET_MEM(dbc, dbp);
-
- if (flags == DB_APPEND && !DB_IS_PRIMARY(dbp)) {
- /*
- * If there is an append callback, the value stored in
- * data->data may be replaced and then freed. To avoid
- * passing a freed pointer back to the user, just operate
- * on a copy of the data DBT.
- */
- tdata = *data;
-
- /*
- * Append isn't a normal put operation; call the appropriate
- * access method's append function.
- */
- switch (dbp->type) {
- case DB_QUEUE:
- if ((ret = __qam_append(dbc, key, &tdata)) != 0)
- goto err;
- break;
- case DB_RECNO:
- if ((ret = __ram_append(dbc, key, &tdata)) != 0)
- goto err;
- break;
- case DB_BTREE:
- case DB_HASH:
- case DB_UNKNOWN:
- default:
- /* The interface should prevent this. */
- DB_ASSERT(env,
- dbp->type == DB_QUEUE || dbp->type == DB_RECNO);
-
- ret = __db_ferr(env, "DB->put", 0);
- goto err;
- }
-
- /*
- * The append callback, if one exists, may have allocated
- * a new tdata.data buffer. If so, free it.
- */
- FREE_IF_NEEDED(env, &tdata);
-
- /* No need for a cursor put; we're done. */
-#ifdef HAVE_COMPRESSION
- } else if (DB_IS_COMPRESSED(dbp) && !F_ISSET(dbp, DB_AM_SECONDARY) &&
- !DB_IS_PRIMARY(dbp) && LIST_FIRST(&dbp->f_primaries) == NULL) {
- ret = __dbc_put(dbc, key, data, flags);
-#endif
- } else if (LF_ISSET(DB_MULTIPLE)) {
- ret = 0;
- memset(&tkey, 0, sizeof(tkey));
- if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) {
- tkey.data = &recno;
- tkey.size = sizeof(recno);
- }
- memset(&tdata, 0, sizeof(tdata));
- DB_MULTIPLE_INIT(bulk_kptr, key);
- DB_MULTIPLE_INIT(bulk_ptr, data);
- key->doff = 0;
- while (ret == 0) {
- if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO)
- DB_MULTIPLE_RECNO_NEXT(bulk_kptr, key,
- recno, tdata.data, tdata.size);
- else
- DB_MULTIPLE_NEXT(bulk_kptr, key,
- tkey.data, tkey.size);
- DB_MULTIPLE_NEXT(bulk_ptr, data,
- tdata.data, tdata.size);
- if (bulk_kptr == NULL || bulk_ptr == NULL)
- break;
- ret = __dbc_put(dbc, &tkey, &tdata,
- LF_ISSET(DB_OPFLAGS_MASK));
- if (ret == 0)
- ++key->doff;
- }
- } else if (LF_ISSET(DB_MULTIPLE_KEY)) {
- ret = 0;
- memset(&tkey, 0, sizeof(tkey));
- if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) {
- tkey.data = &recno;
- tkey.size = sizeof(recno);
- }
- memset(&tdata, 0, sizeof(tdata));
- DB_MULTIPLE_INIT(bulk_ptr, key);
- while (ret == 0) {
- if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO)
- DB_MULTIPLE_RECNO_NEXT(bulk_ptr, key, recno,
- tdata.data, tdata.size);
- else
- DB_MULTIPLE_KEY_NEXT(bulk_ptr, key, tkey.data,
- tkey.size, tdata.data, tdata.size);
- if (bulk_ptr == NULL)
- break;
- ret = __dbc_put(dbc, &tkey, &tdata,
- LF_ISSET(DB_OPFLAGS_MASK));
- if (ret == 0)
- ++key->doff;
- }
- } else
- ret = __dbc_put(dbc, key, data, flags);
-
-err: /* Close the cursor. */
- if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __db_del --
- * Delete the items referenced by a key.
- *
- * PUBLIC: int __db_del __P((DB *,
- * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, u_int32_t));
- */
-int
-__db_del(dbp, ip, txn, key, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- DBT *key;
- u_int32_t flags;
-{
- DBC *dbc;
- DBT data, tkey;
- void *bulk_ptr;
- db_recno_t recno;
- u_int32_t cursor_flags, f_init, f_next;
- int ret, t_ret;
-
- COMPQUIET(bulk_ptr, NULL);
- /* Allocate a cursor. */
- cursor_flags = DB_WRITELOCK;
- if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY))
- cursor_flags |= DB_CURSOR_BULK;
- if ((ret = __db_cursor(dbp, ip, txn, &dbc, cursor_flags)) != 0)
- goto err;
-
- DEBUG_LWRITE(dbc, txn, "DB->del", key, NULL, flags);
-
-#ifdef HAVE_COMPRESSION
- if (DB_IS_COMPRESSED(dbp) && !F_ISSET(dbp, DB_AM_SECONDARY) &&
- !DB_IS_PRIMARY(dbp) && LIST_FIRST(&dbp->f_primaries) == NULL) {
- F_SET(dbc, DBC_TRANSIENT);
- ret = __dbc_bulk_del(dbc, key, flags);
- goto err;
- }
-#endif
-
- /*
- * Walk a cursor through the key/data pairs, deleting as we go. Set
- * the DB_DBT_USERMEM flag, as this might be a threaded application
- * and the flags checking will catch us. We don't actually want the
- * keys or data, set DB_DBT_ISSET. We rely on __dbc_get to clear
- * this.
- */
- memset(&data, 0, sizeof(data));
- F_SET(&data, DB_DBT_USERMEM);
- tkey = *key;
-
- f_init = LF_ISSET(DB_MULTIPLE_KEY) ? DB_GET_BOTH : DB_SET;
- f_next = DB_NEXT_DUP;
-
- /*
- * If locking (and we haven't already acquired CDB locks), set the
- * read-modify-write flag.
- */
- if (STD_LOCKING(dbc)) {
- f_init |= DB_RMW;
- f_next |= DB_RMW;
- }
-
- if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
- if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) {
- memset(&tkey, 0, sizeof(tkey));
- tkey.data = &recno;
- tkey.size = sizeof(recno);
- }
- DB_MULTIPLE_INIT(bulk_ptr, key);
- /* We return the number of keys deleted in doff. */
- key->doff = 0;
-bulk_next: if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO)
- DB_MULTIPLE_RECNO_NEXT(bulk_ptr, key,
- recno, data.data, data.size);
- else if (LF_ISSET(DB_MULTIPLE))
- DB_MULTIPLE_NEXT(bulk_ptr, key, tkey.data, tkey.size);
- else
- DB_MULTIPLE_KEY_NEXT(bulk_ptr, key,
- tkey.data, tkey.size, data.data, data.size);
- if (bulk_ptr == NULL)
- goto err;
- }
-
- /* We're not interested in the data -- do not return it. */
- F_SET(&tkey, DB_DBT_ISSET);
- F_SET(&data, DB_DBT_ISSET);
-
- /*
- * Optimize the simple cases. For all AMs if we don't have secondaries
- * and are not a secondary and we aren't a foreign database and there
- * are no dups then we can avoid a bunch of overhead. For queue we
- * don't need to fetch the record since we delete by direct calculation
- * from the record number.
- *
- * Hash permits an optimization in DB->del: since on-page duplicates are
- * stored in a single HKEYDATA structure, it's possible to delete an
- * entire set of them at once, and as the HKEYDATA has to be rebuilt
- * and re-put each time it changes, this is much faster than deleting
- * the duplicates one by one. Thus, if not pointing at an off-page
- * duplicate set, and we're not using secondary indices (in which case
- * we'd have to examine the items one by one anyway), let hash do this
- * "quick delete".
- *
- * !!!
- * Note that this is the only application-executed delete call in
- * Berkeley DB that does not go through the __dbc_del function.
- * If anything other than the delete itself (like a secondary index
- * update) has to happen there in a particular situation, the
- * conditions here should be modified not to use these optimizations.
- * The ordinary AM-independent alternative will work just fine;
- * it'll just be slower.
- */
- if (!F_ISSET(dbp, DB_AM_SECONDARY) && !DB_IS_PRIMARY(dbp) &&
- LIST_FIRST(&dbp->f_primaries) == NULL) {
-#ifdef HAVE_QUEUE
- if (dbp->type == DB_QUEUE) {
- ret = __qam_delete(dbc, &tkey, flags);
- goto next;
- }
-#endif
-
- /* Fetch the first record. */
- if ((ret = __dbc_get(dbc, &tkey, &data, f_init)) != 0)
- goto err;
-
-#ifdef HAVE_HASH
- /*
- * Hash "quick delete" removes all on-page duplicates. We
- * can't do that if deleting specific key/data pairs.
- */
- if (dbp->type == DB_HASH && !LF_ISSET(DB_MULTIPLE_KEY)) {
- DBC *sdbc;
- sdbc = dbc;
-#ifdef HAVE_PARTITION
- if (F_ISSET(dbc, DBC_PARTITIONED))
- sdbc =
- ((PART_CURSOR*)dbc->internal)->sub_cursor;
-#endif
- if (sdbc->internal->opd == NULL) {
- ret = __ham_quick_delete(sdbc);
- goto next;
- }
- }
-#endif
-
- if (!F_ISSET(dbp, DB_AM_DUP)) {
- ret = dbc->am_del(dbc, 0);
- goto next;
- }
- } else if ((ret = __dbc_get(dbc, &tkey, &data, f_init)) != 0)
- goto err;
-
- /* Walk through the set of key/data pairs, deleting as we go. */
- for (;;) {
- if ((ret = __dbc_del(dbc, flags)) != 0)
- break;
- /*
- * With DB_MULTIPLE_KEY, the application has specified the
- * exact records they want deleted. We don't need to walk
- * through a set of duplicates.
- */
- if (LF_ISSET(DB_MULTIPLE_KEY))
- break;
-
- F_SET(&tkey, DB_DBT_ISSET);
- F_SET(&data, DB_DBT_ISSET);
- if ((ret = __dbc_get(dbc, &tkey, &data, f_next)) != 0) {
- if (ret == DB_NOTFOUND)
- ret = 0;
- break;
- }
- }
-
-next: if (ret == 0 && LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
- ++key->doff;
- goto bulk_next;
- }
-err: /* Discard the cursor. */
- if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __db_sync --
- * Flush the database cache.
- *
- * PUBLIC: int __db_sync __P((DB *));
- */
-int
-__db_sync(dbp)
- DB *dbp;
-{
- int ret, t_ret;
-
- ret = 0;
-
- /* If the database was read-only, we're done. */
- if (F_ISSET(dbp, DB_AM_RDONLY))
- return (0);
-
- /* If it's a Recno tree, write the backing source text file. */
- if (dbp->type == DB_RECNO)
- ret = __ram_writeback(dbp);
-
- /* If the database was never backed by a database file, we're done. */
- if (F_ISSET(dbp, DB_AM_INMEM))
- return (ret);
-#ifdef HAVE_PARTITION
- if (DB_IS_PARTITIONED(dbp))
- ret = __partition_sync(dbp);
- else
-#endif
- if (dbp->type == DB_QUEUE)
- ret = __qam_sync(dbp);
- else
- /* Flush any dirty pages from the cache to the backing file. */
- if ((t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __db_associate --
- * Associate another database as a secondary index to this one.
- *
- * PUBLIC: int __db_associate __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB *,
- * PUBLIC: int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t));
- */
-int
-__db_associate(dbp, ip, txn, sdbp, callback, flags)
- DB *dbp, *sdbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- int (*callback) __P((DB *, const DBT *, const DBT *, DBT *));
- u_int32_t flags;
-{
- DBC *pdbc, *sdbc;
- DBT key, data, skey, *tskeyp;
- ENV *env;
- int build, ret, t_ret;
- u_int32_t nskey;
-
- env = dbp->env;
- pdbc = sdbc = NULL;
- ret = 0;
-
- memset(&skey, 0, sizeof(DBT));
- nskey = 0;
- tskeyp = NULL;
-
- /*
- * Check to see if the secondary is empty -- and thus if we should
- * build it -- before we link it in and risk making it show up in other
- * threads. Do this first so that the databases remain unassociated on
- * error.
- */
- build = 0;
- if (LF_ISSET(DB_CREATE)) {
- if ((ret = __db_cursor(sdbp, ip, txn, &sdbc, 0)) != 0)
- goto err;
-
- /*
- * We don't care about key or data; we're just doing
- * an existence check.
- */
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
- F_SET(&key, DB_DBT_PARTIAL | DB_DBT_USERMEM);
- F_SET(&data, DB_DBT_PARTIAL | DB_DBT_USERMEM);
- if ((ret = __dbc_get(sdbc, &key, &data,
- (STD_LOCKING(sdbc) ? DB_RMW : 0) |
- DB_FIRST)) == DB_NOTFOUND) {
- build = 1;
- ret = 0;
- }
-
- if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0)
- ret = t_ret;
-
- /* Reset for later error check. */
- sdbc = NULL;
-
- if (ret != 0)
- goto err;
- }
-
- /*
- * Set up the database handle as a secondary.
- */
- sdbp->s_callback = callback;
- sdbp->s_primary = dbp;
-
- sdbp->stored_get = sdbp->get;
- sdbp->get = __db_secondary_get;
-
- sdbp->stored_close = sdbp->close;
- sdbp->close = __db_secondary_close_pp;
-
- F_SET(sdbp, DB_AM_SECONDARY);
-
- if (LF_ISSET(DB_IMMUTABLE_KEY))
- FLD_SET(sdbp->s_assoc_flags, DB_ASSOC_IMMUTABLE_KEY);
-
- /*
- * Add the secondary to the list on the primary. Do it here
- * so that we see any updates that occur while we're walking
- * the primary.
- */
- MUTEX_LOCK(env, dbp->mutex);
-
- /* See __db_s_next for an explanation of secondary refcounting. */
- DB_ASSERT(env, sdbp->s_refcnt == 0);
- sdbp->s_refcnt = 1;
- LIST_INSERT_HEAD(&dbp->s_secondaries, sdbp, s_links);
- MUTEX_UNLOCK(env, dbp->mutex);
-
- if (build) {
- /*
- * We loop through the primary, putting each item we
- * find into the new secondary.
- *
- * If we're using CDB, opening these two cursors puts us
- * in a bit of a locking tangle: CDB locks are done on the
- * primary, so that we stay deadlock-free, but that means
- * that updating the secondary while we have a read cursor
- * open on the primary will self-block. To get around this,
- * we force the primary cursor to use the same locker ID
- * as the secondary, so they won't conflict. This should
- * be harmless even if we're not using CDB.
- */
- if ((ret = __db_cursor(sdbp, ip, txn, &sdbc,
- CDB_LOCKING(sdbp->env) ? DB_WRITECURSOR : 0)) != 0)
- goto err;
- if ((ret = __db_cursor_int(dbp, ip,
- txn, dbp->type, PGNO_INVALID, 0, sdbc->locker, &pdbc)) != 0)
- goto err;
-
- /* Lock out other threads, now that we have a locker. */
- dbp->associate_locker = sdbc->locker;
-
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
- while ((ret = __dbc_get(pdbc, &key, &data, DB_NEXT)) == 0) {
- if ((ret = callback(sdbp, &key, &data, &skey)) != 0) {
- if (ret == DB_DONOTINDEX)
- continue;
- goto err;
- }
- if (F_ISSET(&skey, DB_DBT_MULTIPLE)) {
-#ifdef DIAGNOSTIC
- __db_check_skeyset(sdbp, &skey);
-#endif
- nskey = skey.size;
- tskeyp = (DBT *)skey.data;
- } else {
- nskey = 1;
- tskeyp = &skey;
- }
- SWAP_IF_NEEDED(sdbp, &key);
- for (; nskey > 0; nskey--, tskeyp++) {
- if ((ret = __dbc_put(sdbc,
- tskeyp, &key, DB_UPDATE_SECONDARY)) != 0)
- goto err;
- FREE_IF_NEEDED(env, tskeyp);
- }
- SWAP_IF_NEEDED(sdbp, &key);
- FREE_IF_NEEDED(env, &skey);
- }
- if (ret == DB_NOTFOUND)
- ret = 0;
- }
-
-err: if (sdbc != NULL && (t_ret = __dbc_close(sdbc)) != 0 && ret == 0)
- ret = t_ret;
-
- if (pdbc != NULL && (t_ret = __dbc_close(pdbc)) != 0 && ret == 0)
- ret = t_ret;
-
- dbp->associate_locker = NULL;
-
- for (; nskey > 0; nskey--, tskeyp++)
- FREE_IF_NEEDED(env, tskeyp);
- FREE_IF_NEEDED(env, &skey);
-
- return (ret);
-}
-
-/*
- * __db_secondary_get --
- * This wrapper function for DB->pget() is the DB->get() function
- * on a database which has been made into a secondary index.
- */
-static int
-__db_secondary_get(sdbp, txn, skey, data, flags)
- DB *sdbp;
- DB_TXN *txn;
- DBT *skey, *data;
- u_int32_t flags;
-{
- DB_ASSERT(sdbp->env, F_ISSET(sdbp, DB_AM_SECONDARY));
- return (__db_pget_pp(sdbp, txn, skey, NULL, data, flags));
-}
-
-/*
- * __db_secondary_close --
- * Wrapper function for DB->close() which we use on secondaries to
- * manage refcounting and make sure we don't close them underneath
- * a primary that is updating.
- *
- * PUBLIC: int __db_secondary_close __P((DB *, u_int32_t));
- */
-int
-__db_secondary_close(sdbp, flags)
- DB *sdbp;
- u_int32_t flags;
-{
- DB *primary;
- ENV *env;
- int doclose;
-
- doclose = 0;
- primary = sdbp->s_primary;
- env = primary->env;
-
- MUTEX_LOCK(env, primary->mutex);
- /*
- * Check the refcount--if it was at 1 when we were called, no
- * thread is currently updating this secondary through the primary,
- * so it's safe to close it for real.
- *
- * If it's not safe to do the close now, we do nothing; the
- * database will actually be closed when the refcount is decremented,
- * which can happen in either __db_s_next or __db_s_done.
- */
- DB_ASSERT(env, sdbp->s_refcnt != 0);
- if (--sdbp->s_refcnt == 0) {
- LIST_REMOVE(sdbp, s_links);
- /* We don't want to call close while the mutex is held. */
- doclose = 1;
- }
- MUTEX_UNLOCK(env, primary->mutex);
-
- /*
- * sdbp->close is this function; call the real one explicitly if
- * need be.
- */
- return (doclose ? __db_close(sdbp, NULL, flags) : 0);
-}
-
-/*
- * __db_associate_foreign --
- * Associate this database (fdbp) as a foreign constraint to another
- * database (pdbp). That is, dbp's keys appear as foreign key values in
- * pdbp.
- *
- * PUBLIC: int __db_associate_foreign __P((DB *, DB *,
- * PUBLIC: int (*)(DB *, const DBT *, DBT *, const DBT *, int *),
- * PUBLIC: u_int32_t));
- */
-int
-__db_associate_foreign(fdbp, pdbp, callback, flags)
- DB *fdbp, *pdbp;
- int (*callback)(DB *, const DBT *, DBT *, const DBT *, int *);
- u_int32_t flags;
-{
- DB_FOREIGN_INFO *f_info;
- ENV *env;
- int ret;
-
- env = fdbp->env;
- ret = 0;
-
- if ((ret = __os_malloc(env, sizeof(DB_FOREIGN_INFO), &f_info)) != 0) {
- return ret;
- }
- memset(f_info, 0, sizeof(DB_FOREIGN_INFO));
-
- f_info->dbp = pdbp;
- f_info->callback = callback;
-
- /*
- * It might be wise to filter this, but for now the flags only
- * set the delete action type.
- */
- FLD_SET(f_info->flags, flags);
-
- /*
- * Add f_info to the foreign database's list of primaries. That is to
- * say, fdbp->f_primaries lists all databases for which fdbp is a
- * foreign constraint.
- */
- MUTEX_LOCK(env, fdbp->mutex);
- LIST_INSERT_HEAD(&fdbp->f_primaries, f_info, f_links);
- MUTEX_UNLOCK(env, fdbp->mutex);
-
- /*
- * Associate fdbp as pdbp's foreign db, for referential integrity
- * checks. We don't allow the foreign db to be changed, because we
- * currently have no way of removing pdbp from the old foreign db's list
- * of primaries.
- */
- if (pdbp->s_foreign != NULL)
- return (EINVAL);
- pdbp->s_foreign = fdbp;
-
- return (ret);
-}
-
-static int
-__dbc_set_priority(dbc, priority)
- DBC *dbc;
- DB_CACHE_PRIORITY priority;
-{
- dbc->priority = priority;
- return (0);
-}
-
-static int
-__dbc_get_priority(dbc, priority)
- DBC *dbc;
- DB_CACHE_PRIORITY *priority;
-{
- *priority = dbc->priority;
- return (0);
-}
diff --git a/db/db_auto.c b/db/db_auto.c
deleted file mode 100644
index 2ce4199..0000000
--- a/db/db_auto.c
+++ /dev/null
@@ -1,3267 +0,0 @@
-/* Do not edit: automatically built by gen_rec.awk. */
-
-#include "db_config.h"
-#include "db_int.h"
-#include "dbinc/crypto.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_dispatch.h"
-#include "dbinc/db_am.h"
-#include "dbinc/log.h"
-#include "dbinc/txn.h"
-
-/*
- * PUBLIC: int __db_addrem_read __P((ENV *, DB **, void *, void *,
- * PUBLIC: __db_addrem_args **));
- */
-int
-__db_addrem_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_addrem_args **argpp;
-{
- __db_addrem_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_addrem_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &argp->opcode, bp);
- bp += sizeof(argp->opcode);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_32(env, &argp->indx, bp);
- bp += sizeof(argp->indx);
-
- LOGCOPY_32(env, &argp->nbytes, bp);
- bp += sizeof(argp->nbytes);
-
- memset(&argp->hdr, 0, sizeof(argp->hdr));
- LOGCOPY_32(env,&argp->hdr.size, bp);
- bp += sizeof(u_int32_t);
- argp->hdr.data = bp;
- bp += argp->hdr.size;
-
- memset(&argp->dbt, 0, sizeof(argp->dbt));
- LOGCOPY_32(env,&argp->dbt.size, bp);
- bp += sizeof(u_int32_t);
- argp->dbt.data = bp;
- bp += argp->dbt.size;
-
- LOGCOPY_TOLSN(env, &argp->pagelsn, bp);
- bp += sizeof(DB_LSN);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_addrem_log __P((DB *, DB_TXN *, DB_LSN *,
- * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, u_int32_t, u_int32_t,
- * PUBLIC: const DBT *, const DBT *, DB_LSN *));
- */
-int
-__db_addrem_log(dbp, txnp, ret_lsnp, flags,
- opcode, pgno, indx, nbytes, hdr,
- dbt, pagelsn)
- DB *dbp;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- u_int32_t opcode;
- db_pgno_t pgno;
- u_int32_t indx;
- u_int32_t nbytes;
- const DBT *hdr;
- const DBT *dbt;
- DB_LSN * pagelsn;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- ENV *env;
- u_int32_t zero, uinttmp, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- env = dbp->env;
- rlsnp = ret_lsnp;
- rectype = DB___db_addrem;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
- F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- DB_ASSERT(env, dbp->log_filename != NULL);
- if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
- (ret = __dbreg_lazy_id(dbp)) != 0)
- return (ret);
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t) + (hdr == NULL ? 0 : hdr->size)
- + sizeof(u_int32_t) + (dbt == NULL ? 0 : dbt->size)
- + sizeof(*pagelsn);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, bp, &opcode);
- bp += sizeof(opcode);
-
- uinttmp = (u_int32_t)dbp->log_filename->id;
- LOGCOPY_32(env, bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- LOGCOPY_32(env, bp, &indx);
- bp += sizeof(indx);
-
- LOGCOPY_32(env, bp, &nbytes);
- bp += sizeof(nbytes);
-
- if (hdr == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &hdr->size);
- bp += sizeof(hdr->size);
- memcpy(bp, hdr->data, hdr->size);
- bp += hdr->size;
- }
-
- if (dbt == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &dbt->size);
- bp += sizeof(dbt->size);
- memcpy(bp, dbt->data, dbt->size);
- bp += dbt->size;
- }
-
- if (pagelsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(pagelsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, pagelsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, pagelsn);
- } else
- memset(bp, 0, sizeof(*pagelsn));
- bp += sizeof(*pagelsn);
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__db_addrem_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_big_read __P((ENV *, DB **, void *, void *,
- * PUBLIC: __db_big_args **));
- */
-int
-__db_big_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_big_args **argpp;
-{
- __db_big_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_big_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &argp->opcode, bp);
- bp += sizeof(argp->opcode);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->prev_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->next_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->dbt, 0, sizeof(argp->dbt));
- LOGCOPY_32(env,&argp->dbt.size, bp);
- bp += sizeof(u_int32_t);
- argp->dbt.data = bp;
- bp += argp->dbt.size;
-
- LOGCOPY_TOLSN(env, &argp->pagelsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_TOLSN(env, &argp->prevlsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_TOLSN(env, &argp->nextlsn, bp);
- bp += sizeof(DB_LSN);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_big_log __P((DB *, DB_TXN *, DB_LSN *,
- * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, db_pgno_t, db_pgno_t,
- * PUBLIC: const DBT *, DB_LSN *, DB_LSN *, DB_LSN *));
- */
-int
-__db_big_log(dbp, txnp, ret_lsnp, flags,
- opcode, pgno, prev_pgno, next_pgno, dbt,
- pagelsn, prevlsn, nextlsn)
- DB *dbp;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- u_int32_t opcode;
- db_pgno_t pgno;
- db_pgno_t prev_pgno;
- db_pgno_t next_pgno;
- const DBT *dbt;
- DB_LSN * pagelsn;
- DB_LSN * prevlsn;
- DB_LSN * nextlsn;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- ENV *env;
- u_int32_t zero, uinttmp, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- env = dbp->env;
- rlsnp = ret_lsnp;
- rectype = DB___db_big;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
- F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- DB_ASSERT(env, dbp->log_filename != NULL);
- if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
- (ret = __dbreg_lazy_id(dbp)) != 0)
- return (ret);
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t) + (dbt == NULL ? 0 : dbt->size)
- + sizeof(*pagelsn)
- + sizeof(*prevlsn)
- + sizeof(*nextlsn);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, bp, &opcode);
- bp += sizeof(opcode);
-
- uinttmp = (u_int32_t)dbp->log_filename->id;
- LOGCOPY_32(env, bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)prev_pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)next_pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (dbt == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &dbt->size);
- bp += sizeof(dbt->size);
- memcpy(bp, dbt->data, dbt->size);
- bp += dbt->size;
- }
-
- if (pagelsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(pagelsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, pagelsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, pagelsn);
- } else
- memset(bp, 0, sizeof(*pagelsn));
- bp += sizeof(*pagelsn);
-
- if (prevlsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(prevlsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, prevlsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, prevlsn);
- } else
- memset(bp, 0, sizeof(*prevlsn));
- bp += sizeof(*prevlsn);
-
- if (nextlsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(nextlsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, nextlsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, nextlsn);
- } else
- memset(bp, 0, sizeof(*nextlsn));
- bp += sizeof(*nextlsn);
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__db_big_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_ovref_read __P((ENV *, DB **, void *, void *,
- * PUBLIC: __db_ovref_args **));
- */
-int
-__db_ovref_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_ovref_args **argpp;
-{
- __db_ovref_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_ovref_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->adjust = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->lsn, bp);
- bp += sizeof(DB_LSN);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_ovref_log __P((DB *, DB_TXN *, DB_LSN *,
- * PUBLIC: u_int32_t, db_pgno_t, int32_t, DB_LSN *));
- */
-int
-__db_ovref_log(dbp, txnp, ret_lsnp, flags, pgno, adjust, lsn)
- DB *dbp;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- db_pgno_t pgno;
- int32_t adjust;
- DB_LSN * lsn;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- ENV *env;
- u_int32_t uinttmp, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- env = dbp->env;
- rlsnp = ret_lsnp;
- rectype = DB___db_ovref;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
- F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- DB_ASSERT(env, dbp->log_filename != NULL);
- if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
- (ret = __dbreg_lazy_id(dbp)) != 0)
- return (ret);
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(*lsn);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- uinttmp = (u_int32_t)dbp->log_filename->id;
- LOGCOPY_32(env, bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)adjust;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (lsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(lsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, lsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, lsn);
- } else
- memset(bp, 0, sizeof(*lsn));
- bp += sizeof(*lsn);
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__db_ovref_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_relink_42_read __P((ENV *, DB **, void *,
- * PUBLIC: void *, __db_relink_42_args **));
- */
-int
-__db_relink_42_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_relink_42_args **argpp;
-{
- __db_relink_42_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_relink_42_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &argp->opcode, bp);
- bp += sizeof(argp->opcode);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->prev = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->lsn_prev, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->next = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->lsn_next, bp);
- bp += sizeof(DB_LSN);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_debug_read __P((ENV *, void *, __db_debug_args **));
- */
-int
-__db_debug_read(env, recbuf, argpp)
- ENV *env;
- void *recbuf;
- __db_debug_args **argpp;
-{
- __db_debug_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_debug_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- memset(&argp->op, 0, sizeof(argp->op));
- LOGCOPY_32(env,&argp->op.size, bp);
- bp += sizeof(u_int32_t);
- argp->op.data = bp;
- bp += argp->op.size;
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->key, 0, sizeof(argp->key));
- LOGCOPY_32(env,&argp->key.size, bp);
- bp += sizeof(u_int32_t);
- argp->key.data = bp;
- bp += argp->key.size;
-
- memset(&argp->data, 0, sizeof(argp->data));
- LOGCOPY_32(env,&argp->data.size, bp);
- bp += sizeof(u_int32_t);
- argp->data.data = bp;
- bp += argp->data.size;
-
- LOGCOPY_32(env, &argp->arg_flags, bp);
- bp += sizeof(argp->arg_flags);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_debug_log __P((ENV *, DB_TXN *, DB_LSN *,
- * PUBLIC: u_int32_t, const DBT *, int32_t, const DBT *, const DBT *,
- * PUBLIC: u_int32_t));
- */
-int
-__db_debug_log(env, txnp, ret_lsnp, flags,
- op, fileid, key, data, arg_flags)
- ENV *env;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- const DBT *op;
- int32_t fileid;
- const DBT *key;
- const DBT *data;
- u_int32_t arg_flags;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- u_int32_t zero, uinttmp, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- rlsnp = ret_lsnp;
- rectype = DB___db_debug;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t) + (op == NULL ? 0 : op->size)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t) + (key == NULL ? 0 : key->size)
- + sizeof(u_int32_t) + (data == NULL ? 0 : data->size)
- + sizeof(u_int32_t);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- if (op == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &op->size);
- bp += sizeof(op->size);
- memcpy(bp, op->data, op->size);
- bp += op->size;
- }
-
- uinttmp = (u_int32_t)fileid;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (key == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &key->size);
- bp += sizeof(key->size);
- memcpy(bp, key->data, key->size);
- bp += key->size;
- }
-
- if (data == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &data->size);
- bp += sizeof(data->size);
- memcpy(bp, data->data, data->size);
- bp += data->size;
- }
-
- LOGCOPY_32(env, bp, &arg_flags);
- bp += sizeof(arg_flags);
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__db_debug_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_noop_read __P((ENV *, DB **, void *, void *,
- * PUBLIC: __db_noop_args **));
- */
-int
-__db_noop_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_noop_args **argpp;
-{
- __db_noop_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_noop_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->prevlsn, bp);
- bp += sizeof(DB_LSN);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_noop_log __P((DB *, DB_TXN *, DB_LSN *,
- * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *));
- */
-int
-__db_noop_log(dbp, txnp, ret_lsnp, flags, pgno, prevlsn)
- DB *dbp;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- db_pgno_t pgno;
- DB_LSN * prevlsn;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- ENV *env;
- u_int32_t uinttmp, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- env = dbp->env;
- rlsnp = ret_lsnp;
- rectype = DB___db_noop;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
- F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- DB_ASSERT(env, dbp->log_filename != NULL);
- if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
- (ret = __dbreg_lazy_id(dbp)) != 0)
- return (ret);
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(*prevlsn);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- uinttmp = (u_int32_t)dbp->log_filename->id;
- LOGCOPY_32(env, bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (prevlsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(prevlsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, prevlsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, prevlsn);
- } else
- memset(bp, 0, sizeof(*prevlsn));
- bp += sizeof(*prevlsn);
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__db_noop_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_alloc_42_read __P((ENV *, DB **, void *,
- * PUBLIC: void *, __db_pg_alloc_42_args **));
- */
-int
-__db_pg_alloc_42_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_pg_alloc_42_args **argpp;
-{
- __db_pg_alloc_42_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_pg_alloc_42_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_TOLSN(env, &argp->meta_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->meta_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->page_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_32(env, &argp->ptype, bp);
- bp += sizeof(argp->ptype);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->next = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_alloc_read __P((ENV *, DB **, void *, void *,
- * PUBLIC: __db_pg_alloc_args **));
- */
-int
-__db_pg_alloc_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_pg_alloc_args **argpp;
-{
- __db_pg_alloc_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_pg_alloc_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_TOLSN(env, &argp->meta_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->meta_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->page_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_32(env, &argp->ptype, bp);
- bp += sizeof(argp->ptype);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->next = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->last_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_alloc_log __P((DB *, DB_TXN *, DB_LSN *,
- * PUBLIC: u_int32_t, DB_LSN *, db_pgno_t, DB_LSN *, db_pgno_t, u_int32_t,
- * PUBLIC: db_pgno_t, db_pgno_t));
- */
-int
-__db_pg_alloc_log(dbp, txnp, ret_lsnp, flags, meta_lsn, meta_pgno, page_lsn, pgno, ptype,
- next, last_pgno)
- DB *dbp;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- DB_LSN * meta_lsn;
- db_pgno_t meta_pgno;
- DB_LSN * page_lsn;
- db_pgno_t pgno;
- u_int32_t ptype;
- db_pgno_t next;
- db_pgno_t last_pgno;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- ENV *env;
- u_int32_t uinttmp, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- env = dbp->env;
- rlsnp = ret_lsnp;
- rectype = DB___db_pg_alloc;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
- F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- DB_ASSERT(env, dbp->log_filename != NULL);
- if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
- (ret = __dbreg_lazy_id(dbp)) != 0)
- return (ret);
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t)
- + sizeof(*meta_lsn)
- + sizeof(u_int32_t)
- + sizeof(*page_lsn)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- uinttmp = (u_int32_t)dbp->log_filename->id;
- LOGCOPY_32(env, bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (meta_lsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(meta_lsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, meta_lsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, meta_lsn);
- } else
- memset(bp, 0, sizeof(*meta_lsn));
- bp += sizeof(*meta_lsn);
-
- uinttmp = (u_int32_t)meta_pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (page_lsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(page_lsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, page_lsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, page_lsn);
- } else
- memset(bp, 0, sizeof(*page_lsn));
- bp += sizeof(*page_lsn);
-
- uinttmp = (u_int32_t)pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- LOGCOPY_32(env, bp, &ptype);
- bp += sizeof(ptype);
-
- uinttmp = (u_int32_t)next;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)last_pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__db_pg_alloc_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_free_42_read __P((ENV *, DB **, void *,
- * PUBLIC: void *, __db_pg_free_42_args **));
- */
-int
-__db_pg_free_42_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_pg_free_42_args **argpp;
-{
- __db_pg_free_42_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_pg_free_42_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->meta_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->meta_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->header, 0, sizeof(argp->header));
- LOGCOPY_32(env,&argp->header.size, bp);
- bp += sizeof(u_int32_t);
- argp->header.data = bp;
- bp += argp->header.size;
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->next = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_free_read __P((ENV *, DB **, void *, void *,
- * PUBLIC: __db_pg_free_args **));
- */
-int
-__db_pg_free_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_pg_free_args **argpp;
-{
- __db_pg_free_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_pg_free_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->meta_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->meta_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->header, 0, sizeof(argp->header));
- LOGCOPY_32(env,&argp->header.size, bp);
- bp += sizeof(u_int32_t);
- argp->header.data = bp;
- bp += argp->header.size;
- if (LOG_SWAPPED(env) && dbpp != NULL && *dbpp != NULL) {
- int t_ret;
- if ((t_ret = __db_pageswap(*dbpp, (PAGE *)argp->header.data,
- (size_t)argp->header.size, NULL, 1)) != 0)
- return (t_ret);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->next = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->last_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_free_log __P((DB *, DB_TXN *, DB_LSN *,
- * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, const DBT *,
- * PUBLIC: db_pgno_t, db_pgno_t));
- */
-int
-__db_pg_free_log(dbp, txnp, ret_lsnp, flags, pgno, meta_lsn, meta_pgno, header, next,
- last_pgno)
- DB *dbp;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- db_pgno_t pgno;
- DB_LSN * meta_lsn;
- db_pgno_t meta_pgno;
- const DBT *header;
- db_pgno_t next;
- db_pgno_t last_pgno;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- ENV *env;
- u_int32_t zero, uinttmp, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- env = dbp->env;
- rlsnp = ret_lsnp;
- rectype = DB___db_pg_free;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
- F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- DB_ASSERT(env, dbp->log_filename != NULL);
- if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
- (ret = __dbreg_lazy_id(dbp)) != 0)
- return (ret);
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(*meta_lsn)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t) + (header == NULL ? 0 : header->size)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- uinttmp = (u_int32_t)dbp->log_filename->id;
- LOGCOPY_32(env, bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (meta_lsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(meta_lsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, meta_lsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, meta_lsn);
- } else
- memset(bp, 0, sizeof(*meta_lsn));
- bp += sizeof(*meta_lsn);
-
- uinttmp = (u_int32_t)meta_pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (header == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &header->size);
- bp += sizeof(header->size);
- memcpy(bp, header->data, header->size);
- if (LOG_SWAPPED(env))
- if ((ret = __db_pageswap(dbp,
- (PAGE *)bp, (size_t)header->size, (DBT *)NULL, 0)) != 0)
- return (ret);
- bp += header->size;
- }
-
- uinttmp = (u_int32_t)next;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)last_pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__db_pg_free_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_cksum_read __P((ENV *, void *, __db_cksum_args **));
- */
-int
-__db_cksum_read(env, recbuf, argpp)
- ENV *env;
- void *recbuf;
- __db_cksum_args **argpp;
-{
- __db_cksum_args *argp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_cksum_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_cksum_log __P((ENV *, DB_TXN *, DB_LSN *, u_int32_t));
- */
-int
-__db_cksum_log(env, txnp, ret_lsnp, flags)
- ENV *env;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- u_int32_t rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- rlsnp = ret_lsnp;
- rectype = DB___db_cksum;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__db_cksum_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_freedata_42_read __P((ENV *, DB **, void *,
- * PUBLIC: void *, __db_pg_freedata_42_args **));
- */
-int
-__db_pg_freedata_42_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_pg_freedata_42_args **argpp;
-{
- __db_pg_freedata_42_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_pg_freedata_42_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->meta_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->meta_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->header, 0, sizeof(argp->header));
- LOGCOPY_32(env,&argp->header.size, bp);
- bp += sizeof(u_int32_t);
- argp->header.data = bp;
- bp += argp->header.size;
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->next = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->data, 0, sizeof(argp->data));
- LOGCOPY_32(env,&argp->data.size, bp);
- bp += sizeof(u_int32_t);
- argp->data.data = bp;
- bp += argp->data.size;
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_freedata_read __P((ENV *, DB **, void *,
- * PUBLIC: void *, __db_pg_freedata_args **));
- */
-int
-__db_pg_freedata_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_pg_freedata_args **argpp;
-{
- __db_pg_freedata_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_pg_freedata_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->meta_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->meta_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->header, 0, sizeof(argp->header));
- LOGCOPY_32(env,&argp->header.size, bp);
- bp += sizeof(u_int32_t);
- argp->header.data = bp;
- bp += argp->header.size;
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->next = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->last_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->data, 0, sizeof(argp->data));
- LOGCOPY_32(env,&argp->data.size, bp);
- bp += sizeof(u_int32_t);
- argp->data.data = bp;
- bp += argp->data.size;
- if (LOG_SWAPPED(env) && dbpp != NULL && *dbpp != NULL) {
- int t_ret;
- if ((t_ret = __db_pageswap(*dbpp,
- (PAGE *)argp->header.data, (size_t)argp->header.size,
- &argp->data, 1)) != 0)
- return (t_ret);
- }
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_freedata_log __P((DB *, DB_TXN *, DB_LSN *,
- * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, const DBT *,
- * PUBLIC: db_pgno_t, db_pgno_t, const DBT *));
- */
-int
-__db_pg_freedata_log(dbp, txnp, ret_lsnp, flags, pgno, meta_lsn, meta_pgno, header, next,
- last_pgno, data)
- DB *dbp;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- db_pgno_t pgno;
- DB_LSN * meta_lsn;
- db_pgno_t meta_pgno;
- const DBT *header;
- db_pgno_t next;
- db_pgno_t last_pgno;
- const DBT *data;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- ENV *env;
- u_int32_t zero, uinttmp, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- env = dbp->env;
- rlsnp = ret_lsnp;
- rectype = DB___db_pg_freedata;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
- F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- DB_ASSERT(env, dbp->log_filename != NULL);
- if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
- (ret = __dbreg_lazy_id(dbp)) != 0)
- return (ret);
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(*meta_lsn)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t) + (header == NULL ? 0 : header->size)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t) + (data == NULL ? 0 : data->size);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- uinttmp = (u_int32_t)dbp->log_filename->id;
- LOGCOPY_32(env, bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (meta_lsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(meta_lsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, meta_lsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, meta_lsn);
- } else
- memset(bp, 0, sizeof(*meta_lsn));
- bp += sizeof(*meta_lsn);
-
- uinttmp = (u_int32_t)meta_pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (header == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &header->size);
- bp += sizeof(header->size);
- memcpy(bp, header->data, header->size);
- if (LOG_SWAPPED(env))
- if ((ret = __db_pageswap(dbp,
- (PAGE *)bp, (size_t)header->size, (DBT *)data, 0)) != 0)
- return (ret);
- bp += header->size;
- }
-
- uinttmp = (u_int32_t)next;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)last_pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (data == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &data->size);
- bp += sizeof(data->size);
- memcpy(bp, data->data, data->size);
- if (LOG_SWAPPED(env) && F_ISSET(data, DB_DBT_APPMALLOC))
- __os_free(env, data->data);
- bp += data->size;
- }
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__db_pg_freedata_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_init_read __P((ENV *, DB **, void *, void *,
- * PUBLIC: __db_pg_init_args **));
- */
-int
-__db_pg_init_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_pg_init_args **argpp;
-{
- __db_pg_init_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_pg_init_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->header, 0, sizeof(argp->header));
- LOGCOPY_32(env,&argp->header.size, bp);
- bp += sizeof(u_int32_t);
- argp->header.data = bp;
- bp += argp->header.size;
-
- memset(&argp->data, 0, sizeof(argp->data));
- LOGCOPY_32(env,&argp->data.size, bp);
- bp += sizeof(u_int32_t);
- argp->data.data = bp;
- bp += argp->data.size;
- if (LOG_SWAPPED(env) && dbpp != NULL && *dbpp != NULL) {
- int t_ret;
- if ((t_ret = __db_pageswap(*dbpp,
- (PAGE *)argp->header.data, (size_t)argp->header.size,
- &argp->data, 1)) != 0)
- return (t_ret);
- }
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_init_log __P((DB *, DB_TXN *, DB_LSN *,
- * PUBLIC: u_int32_t, db_pgno_t, const DBT *, const DBT *));
- */
-int
-__db_pg_init_log(dbp, txnp, ret_lsnp, flags, pgno, header, data)
- DB *dbp;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- db_pgno_t pgno;
- const DBT *header;
- const DBT *data;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- ENV *env;
- u_int32_t zero, uinttmp, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- env = dbp->env;
- rlsnp = ret_lsnp;
- rectype = DB___db_pg_init;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
- F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- DB_ASSERT(env, dbp->log_filename != NULL);
- if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
- (ret = __dbreg_lazy_id(dbp)) != 0)
- return (ret);
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t) + (header == NULL ? 0 : header->size)
- + sizeof(u_int32_t) + (data == NULL ? 0 : data->size);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- uinttmp = (u_int32_t)dbp->log_filename->id;
- LOGCOPY_32(env, bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (header == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &header->size);
- bp += sizeof(header->size);
- memcpy(bp, header->data, header->size);
- if (LOG_SWAPPED(env))
- if ((ret = __db_pageswap(dbp,
- (PAGE *)bp, (size_t)header->size, (DBT *)data, 0)) != 0)
- return (ret);
- bp += header->size;
- }
-
- if (data == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &data->size);
- bp += sizeof(data->size);
- memcpy(bp, data->data, data->size);
- if (LOG_SWAPPED(env) && F_ISSET(data, DB_DBT_APPMALLOC))
- __os_free(env, data->data);
- bp += data->size;
- }
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__db_pg_init_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_sort_44_read __P((ENV *, DB **, void *,
- * PUBLIC: void *, __db_pg_sort_44_args **));
- */
-int
-__db_pg_sort_44_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_pg_sort_44_args **argpp;
-{
- __db_pg_sort_44_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_pg_sort_44_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->meta = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->meta_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->last_free = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->last_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->last_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->list, 0, sizeof(argp->list));
- LOGCOPY_32(env,&argp->list.size, bp);
- bp += sizeof(u_int32_t);
- argp->list.data = bp;
- bp += argp->list.size;
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_trunc_read __P((ENV *, DB **, void *, void *,
- * PUBLIC: __db_pg_trunc_args **));
- */
-int
-__db_pg_trunc_read(env, dbpp, td, recbuf, argpp)
- ENV *env;
- DB **dbpp;
- void *td;
- void *recbuf;
- __db_pg_trunc_args **argpp;
-{
- __db_pg_trunc_args *argp;
- u_int32_t uinttmp;
- u_int8_t *bp;
- int ret;
-
- if ((ret = __os_malloc(env,
- sizeof(__db_pg_trunc_args) + sizeof(DB_TXN), &argp)) != 0)
- return (ret);
- bp = recbuf;
- argp->txnp = (DB_TXN *)&argp[1];
- memset(argp->txnp, 0, sizeof(DB_TXN));
-
- argp->txnp->td = td;
- LOGCOPY_32(env, &argp->type, bp);
- bp += sizeof(argp->type);
-
- LOGCOPY_32(env, &argp->txnp->txnid, bp);
- bp += sizeof(argp->txnp->txnid);
-
- LOGCOPY_TOLSN(env, &argp->prev_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->fileid = (int32_t)uinttmp;
- bp += sizeof(uinttmp);
- if (dbpp != NULL) {
- *dbpp = NULL;
- ret = __dbreg_id_to_db(
- env, argp->txnp, dbpp, argp->fileid, 1);
- }
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->meta = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->meta_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->last_free = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_TOLSN(env, &argp->last_lsn, bp);
- bp += sizeof(DB_LSN);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->next_free = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- LOGCOPY_32(env, &uinttmp, bp);
- argp->last_pgno = (db_pgno_t)uinttmp;
- bp += sizeof(uinttmp);
-
- memset(&argp->list, 0, sizeof(argp->list));
- LOGCOPY_32(env,&argp->list.size, bp);
- bp += sizeof(u_int32_t);
- argp->list.data = bp;
- bp += argp->list.size;
-
- *argpp = argp;
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_pg_trunc_log __P((DB *, DB_TXN *, DB_LSN *,
- * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, db_pgno_t,
- * PUBLIC: db_pgno_t, const DBT *));
- */
-int
-__db_pg_trunc_log(dbp, txnp, ret_lsnp, flags, meta, meta_lsn, last_free, last_lsn, next_free,
- last_pgno, list)
- DB *dbp;
- DB_TXN *txnp;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- db_pgno_t meta;
- DB_LSN * meta_lsn;
- db_pgno_t last_free;
- DB_LSN * last_lsn;
- db_pgno_t next_free;
- db_pgno_t last_pgno;
- const DBT *list;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn, *rlsnp;
- DB_TXNLOGREC *lr;
- ENV *env;
- u_int32_t zero, uinttmp, rectype, txn_num;
- u_int npad;
- u_int8_t *bp;
- int is_durable, ret;
-
- COMPQUIET(lr, NULL);
-
- env = dbp->env;
- rlsnp = ret_lsnp;
- rectype = DB___db_pg_trunc;
- npad = 0;
- ret = 0;
-
- if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
- F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
- if (txnp == NULL)
- return (0);
- is_durable = 0;
- } else
- is_durable = 1;
-
- if (txnp == NULL) {
- txn_num = 0;
- lsnp = &null_lsn;
- null_lsn.file = null_lsn.offset = 0;
- } else {
- if (TAILQ_FIRST(&txnp->kids) != NULL &&
- (ret = __txn_activekids(env, rectype, txnp)) != 0)
- return (ret);
- /*
- * We need to assign begin_lsn while holding region mutex.
- * That assignment is done inside the DbEnv->log_put call,
- * so pass in the appropriate memory location to be filled
- * in by the log_put code.
- */
- DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
- txn_num = txnp->txnid;
- }
-
- DB_ASSERT(env, dbp->log_filename != NULL);
- if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
- (ret = __dbreg_lazy_id(dbp)) != 0)
- return (ret);
-
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(*meta_lsn)
- + sizeof(u_int32_t)
- + sizeof(*last_lsn)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t)
- + sizeof(u_int32_t) + (list == NULL ? 0 : list->size);
- if (CRYPTO_ON(env)) {
- npad = env->crypto_handle->adj_size(logrec.size);
- logrec.size += npad;
- }
-
- if (is_durable || txnp == NULL) {
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0)
- return (ret);
- } else {
- if ((ret = __os_malloc(env,
- logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
- return (ret);
-#ifdef DIAGNOSTIC
- if ((ret =
- __os_malloc(env, logrec.size, &logrec.data)) != 0) {
- __os_free(env, lr);
- return (ret);
- }
-#else
- logrec.data = lr->data;
-#endif
- }
- if (npad > 0)
- memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
-
- bp = logrec.data;
-
- LOGCOPY_32(env, bp, &rectype);
- bp += sizeof(rectype);
-
- LOGCOPY_32(env, bp, &txn_num);
- bp += sizeof(txn_num);
-
- LOGCOPY_FROMLSN(env, bp, lsnp);
- bp += sizeof(DB_LSN);
-
- uinttmp = (u_int32_t)dbp->log_filename->id;
- LOGCOPY_32(env, bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)meta;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (meta_lsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(meta_lsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, meta_lsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, meta_lsn);
- } else
- memset(bp, 0, sizeof(*meta_lsn));
- bp += sizeof(*meta_lsn);
-
- uinttmp = (u_int32_t)last_free;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (last_lsn != NULL) {
- if (txnp != NULL) {
- LOG *lp = env->lg_handle->reginfo.primary;
- if (LOG_COMPARE(last_lsn, &lp->lsn) >= 0 && (ret =
- __log_check_page_lsn(env, dbp, last_lsn)) != 0)
- return (ret);
- }
- LOGCOPY_FROMLSN(env, bp, last_lsn);
- } else
- memset(bp, 0, sizeof(*last_lsn));
- bp += sizeof(*last_lsn);
-
- uinttmp = (u_int32_t)next_free;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- uinttmp = (u_int32_t)last_pgno;
- LOGCOPY_32(env,bp, &uinttmp);
- bp += sizeof(uinttmp);
-
- if (list == NULL) {
- zero = 0;
- LOGCOPY_32(env, bp, &zero);
- bp += sizeof(u_int32_t);
- } else {
- LOGCOPY_32(env, bp, &list->size);
- bp += sizeof(list->size);
- memcpy(bp, list->data, list->size);
- bp += list->size;
- }
-
- DB_ASSERT(env,
- (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
-
- if (is_durable || txnp == NULL) {
- if ((ret = __log_put(env, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
- *lsnp = *rlsnp;
- if (rlsnp != ret_lsnp)
- *ret_lsnp = *rlsnp;
- }
- } else {
- ret = 0;
-#ifdef DIAGNOSTIC
- /*
- * Set the debug bit if we are going to log non-durable
- * transactions so they will be ignored by recovery.
- */
- memcpy(lr->data, logrec.data, logrec.size);
- rectype |= DB_debug_FLAG;
- LOGCOPY_32(env, logrec.data, &rectype);
-
- if (!IS_REP_CLIENT(env))
- ret = __log_put(env,
- rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
-#endif
- STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
- F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
- LSN_NOT_LOGGED(*ret_lsnp);
- }
-
-#ifdef LOG_DIAGNOSTIC
- if (ret != 0)
- (void)__db_pg_trunc_print(env,
- (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
-#endif
-
-#ifdef DIAGNOSTIC
- __os_free(env, logrec.data);
-#else
- if (is_durable || txnp == NULL)
- __os_free(env, logrec.data);
-#endif
- return (ret);
-}
-
-/*
- * PUBLIC: int __db_init_recover __P((ENV *, DB_DISTAB *));
- */
-int
-__db_init_recover(env, dtabp)
- ENV *env;
- DB_DISTAB *dtabp;
-{
- int ret;
-
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_addrem_recover, DB___db_addrem)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_big_recover, DB___db_big)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_ovref_recover, DB___db_ovref)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_debug_recover, DB___db_debug)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_noop_recover, DB___db_noop)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_pg_alloc_recover, DB___db_pg_alloc)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_pg_free_recover, DB___db_pg_free)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_cksum_recover, DB___db_cksum)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_pg_freedata_recover, DB___db_pg_freedata)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_pg_init_recover, DB___db_pg_init)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_pg_trunc_recover, DB___db_pg_trunc)) != 0)
- return (ret);
- return (0);
-}
diff --git a/db/db_autop.c b/db/db_autop.c
deleted file mode 100644
index f3b0635..0000000
--- a/db/db_autop.c
+++ /dev/null
@@ -1,802 +0,0 @@
-/* Do not edit: automatically built by gen_rec.awk. */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/crypto.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_dispatch.h"
-#include "dbinc/db_am.h"
-#include "dbinc/log.h"
-#include "dbinc/txn.h"
-
-/*
- * PUBLIC: int __db_addrem_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_addrem_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_addrem_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_addrem_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_addrem%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\topcode: %lu\n", (u_long)argp->opcode);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\tindx: %lu\n", (u_long)argp->indx);
- (void)printf("\tnbytes: %lu\n", (u_long)argp->nbytes);
- (void)printf("\thdr: ");
- for (i = 0; i < argp->hdr.size; i++) {
- ch = ((u_int8_t *)argp->hdr.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tdbt: ");
- for (i = 0; i < argp->dbt.size; i++) {
- ch = ((u_int8_t *)argp->dbt.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tpagelsn: [%lu][%lu]\n",
- (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_big_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_big_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_big_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_big_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_big%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\topcode: %lu\n", (u_long)argp->opcode);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\tprev_pgno: %lu\n", (u_long)argp->prev_pgno);
- (void)printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno);
- (void)printf("\tdbt: ");
- for (i = 0; i < argp->dbt.size; i++) {
- ch = ((u_int8_t *)argp->dbt.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tpagelsn: [%lu][%lu]\n",
- (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
- (void)printf("\tprevlsn: [%lu][%lu]\n",
- (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset);
- (void)printf("\tnextlsn: [%lu][%lu]\n",
- (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_ovref_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_ovref_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_ovref_args *argp;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_ovref_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_ovref%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\tadjust: %ld\n", (long)argp->adjust);
- (void)printf("\tlsn: [%lu][%lu]\n",
- (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_relink_42_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_relink_42_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_relink_42_args *argp;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_relink_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_relink_42%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\topcode: %lu\n", (u_long)argp->opcode);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\tlsn: [%lu][%lu]\n",
- (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
- (void)printf("\tprev: %lu\n", (u_long)argp->prev);
- (void)printf("\tlsn_prev: [%lu][%lu]\n",
- (u_long)argp->lsn_prev.file, (u_long)argp->lsn_prev.offset);
- (void)printf("\tnext: %lu\n", (u_long)argp->next);
- (void)printf("\tlsn_next: [%lu][%lu]\n",
- (u_long)argp->lsn_next.file, (u_long)argp->lsn_next.offset);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_debug_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_debug_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_debug_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret = __db_debug_read(env, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_debug%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\top: ");
- for (i = 0; i < argp->op.size; i++) {
- ch = ((u_int8_t *)argp->op.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tkey: ");
- for (i = 0; i < argp->key.size; i++) {
- ch = ((u_int8_t *)argp->key.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tdata: ");
- for (i = 0; i < argp->data.size; i++) {
- ch = ((u_int8_t *)argp->data.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\targ_flags: %lu\n", (u_long)argp->arg_flags);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_noop_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_noop_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_noop_args *argp;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_noop_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_noop%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\tprevlsn: [%lu][%lu]\n",
- (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_pg_alloc_42_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_pg_alloc_42_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_pg_alloc_42_args *argp;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_pg_alloc_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_pg_alloc_42%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno);
- (void)printf("\tpage_lsn: [%lu][%lu]\n",
- (u_long)argp->page_lsn.file, (u_long)argp->page_lsn.offset);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\tptype: %lu\n", (u_long)argp->ptype);
- (void)printf("\tnext: %lu\n", (u_long)argp->next);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_pg_alloc_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_pg_alloc_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_pg_alloc_args *argp;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_pg_alloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_pg_alloc%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno);
- (void)printf("\tpage_lsn: [%lu][%lu]\n",
- (u_long)argp->page_lsn.file, (u_long)argp->page_lsn.offset);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\tptype: %lu\n", (u_long)argp->ptype);
- (void)printf("\tnext: %lu\n", (u_long)argp->next);
- (void)printf("\tlast_pgno: %lu\n", (u_long)argp->last_pgno);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_pg_free_42_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_pg_free_42_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_pg_free_42_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_pg_free_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_pg_free_42%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno);
- (void)printf("\theader: ");
- for (i = 0; i < argp->header.size; i++) {
- ch = ((u_int8_t *)argp->header.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tnext: %lu\n", (u_long)argp->next);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_pg_free_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_pg_free_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_pg_free_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_pg_free_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_pg_free%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno);
- (void)printf("\theader: ");
- for (i = 0; i < argp->header.size; i++) {
- ch = ((u_int8_t *)argp->header.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tnext: %lu\n", (u_long)argp->next);
- (void)printf("\tlast_pgno: %lu\n", (u_long)argp->last_pgno);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_cksum_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_cksum_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_cksum_args *argp;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_cksum%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_pg_freedata_42_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_pg_freedata_42_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_pg_freedata_42_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_pg_freedata_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_pg_freedata_42%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno);
- (void)printf("\theader: ");
- for (i = 0; i < argp->header.size; i++) {
- ch = ((u_int8_t *)argp->header.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tnext: %lu\n", (u_long)argp->next);
- (void)printf("\tdata: ");
- for (i = 0; i < argp->data.size; i++) {
- ch = ((u_int8_t *)argp->data.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_pg_freedata_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_pg_freedata_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_pg_freedata_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_pg_freedata_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_pg_freedata%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno);
- (void)printf("\theader: ");
- for (i = 0; i < argp->header.size; i++) {
- ch = ((u_int8_t *)argp->header.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tnext: %lu\n", (u_long)argp->next);
- (void)printf("\tlast_pgno: %lu\n", (u_long)argp->last_pgno);
- (void)printf("\tdata: ");
- for (i = 0; i < argp->data.size; i++) {
- ch = ((u_int8_t *)argp->data.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_pg_init_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_pg_init_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_pg_init_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_pg_init_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_pg_init%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
- (void)printf("\theader: ");
- for (i = 0; i < argp->header.size; i++) {
- ch = ((u_int8_t *)argp->header.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\tdata: ");
- for (i = 0; i < argp->data.size; i++) {
- ch = ((u_int8_t *)argp->data.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_pg_sort_44_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_pg_sort_44_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_pg_sort_44_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_pg_sort_44_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_pg_sort_44%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tmeta: %lu\n", (u_long)argp->meta);
- (void)printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- (void)printf("\tlast_free: %lu\n", (u_long)argp->last_free);
- (void)printf("\tlast_lsn: [%lu][%lu]\n",
- (u_long)argp->last_lsn.file, (u_long)argp->last_lsn.offset);
- (void)printf("\tlast_pgno: %lu\n", (u_long)argp->last_pgno);
- (void)printf("\tlist: ");
- for (i = 0; i < argp->list.size; i++) {
- ch = ((u_int8_t *)argp->list.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_pg_trunc_print __P((ENV *, DBT *, DB_LSN *,
- * PUBLIC: db_recops, void *));
- */
-int
-__db_pg_trunc_print(env, dbtp, lsnp, notused2, notused3)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __db_pg_trunc_args *argp;
- u_int32_t i;
- int ch;
- int ret;
-
- notused2 = DB_TXN_PRINT;
- notused3 = NULL;
-
- if ((ret =
- __db_pg_trunc_read(env, NULL, NULL, dbtp->data, &argp)) != 0)
- return (ret);
- (void)printf(
- "[%lu][%lu]__db_pg_trunc%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file, (u_long)lsnp->offset,
- (argp->type & DB_debug_FLAG) ? "_debug" : "",
- (u_long)argp->type,
- (u_long)argp->txnp->txnid,
- (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
- (void)printf("\tfileid: %ld\n", (long)argp->fileid);
- (void)printf("\tmeta: %lu\n", (u_long)argp->meta);
- (void)printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- (void)printf("\tlast_free: %lu\n", (u_long)argp->last_free);
- (void)printf("\tlast_lsn: [%lu][%lu]\n",
- (u_long)argp->last_lsn.file, (u_long)argp->last_lsn.offset);
- (void)printf("\tnext_free: %lu\n", (u_long)argp->next_free);
- (void)printf("\tlast_pgno: %lu\n", (u_long)argp->last_pgno);
- (void)printf("\tlist: ");
- for (i = 0; i < argp->list.size; i++) {
- ch = ((u_int8_t *)argp->list.data)[i];
- printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
- }
- (void)printf("\n");
- (void)printf("\n");
- __os_free(env, argp);
- return (0);
-}
-
-/*
- * PUBLIC: int __db_init_print __P((ENV *, DB_DISTAB *));
- */
-int
-__db_init_print(env, dtabp)
- ENV *env;
- DB_DISTAB *dtabp;
-{
- int ret;
-
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_addrem_print, DB___db_addrem)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_big_print, DB___db_big)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_ovref_print, DB___db_ovref)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_debug_print, DB___db_debug)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_noop_print, DB___db_noop)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_pg_alloc_print, DB___db_pg_alloc)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_pg_free_print, DB___db_pg_free)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_cksum_print, DB___db_cksum)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_pg_freedata_print, DB___db_pg_freedata)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_pg_init_print, DB___db_pg_init)) != 0)
- return (ret);
- if ((ret = __db_add_recovery_int(env, dtabp,
- __db_pg_trunc_print, DB___db_pg_trunc)) != 0)
- return (ret);
- return (0);
-}
diff --git a/db/db_cam.c b/db/db_cam.c
deleted file mode 100644
index 4c1322d..0000000
--- a/db/db_cam.c
+++ /dev/null
@@ -1,3460 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/btree.h"
-#include "dbinc/hash.h"
-#include "dbinc/lock.h"
-#include "dbinc/mp.h"
-#include "dbinc/partition.h"
-#include "dbinc/qam.h"
-#include "dbinc/txn.h"
-
-static int __db_s_count __P((DB *));
-static int __db_wrlock_err __P((ENV *));
-static int __dbc_del_foreign __P((DBC *));
-static int __dbc_del_oldskey __P((DB *, DBC *, DBT *, DBT *, DBT *));
-static int __dbc_del_secondary __P((DBC *));
-static int __dbc_pget_recno __P((DBC *, DBT *, DBT *, u_int32_t));
-static inline int __dbc_put_append __P((DBC *,
- DBT *, DBT *, u_int32_t *, u_int32_t));
-static inline int __dbc_put_fixed_len __P((DBC *, DBT *, DBT *));
-static inline int __dbc_put_partial __P((DBC *,
- DBT *, DBT *, DBT *, DBT *, u_int32_t *, u_int32_t));
-static int __dbc_put_primary __P((DBC *, DBT *, DBT *, u_int32_t));
-static inline int __dbc_put_resolve_key __P((DBC *,
- DBT *, DBT *, u_int32_t *, u_int32_t));
-static inline int __dbc_put_secondaries __P((DBC *,
- DBT *, DBT *, DBT *, int, DBT *, u_int32_t *));
-
-#define CDB_LOCKING_INIT(env, dbc) \
- /* \
- * If we are running CDB, this had better be either a write \
- * cursor or an immediate writer. If it's a regular writer, \
- * that means we have an IWRITE lock and we need to upgrade \
- * it to a write lock. \
- */ \
- if (CDB_LOCKING(env)) { \
- if (!F_ISSET(dbc, DBC_WRITECURSOR | DBC_WRITER)) \
- return (__db_wrlock_err(env)); \
- \
- if (F_ISSET(dbc, DBC_WRITECURSOR) && \
- (ret = __lock_get(env, \
- (dbc)->locker, DB_LOCK_UPGRADE, &(dbc)->lock_dbt, \
- DB_LOCK_WRITE, &(dbc)->mylock)) != 0) \
- return (ret); \
- }
-#define CDB_LOCKING_DONE(env, dbc) \
- /* Release the upgraded lock. */ \
- if (F_ISSET(dbc, DBC_WRITECURSOR)) \
- (void)__lock_downgrade( \
- env, &(dbc)->mylock, DB_LOCK_IWRITE, 0);
-
-#define SET_READ_LOCKING_FLAGS(dbc, var) do { \
- var = 0; \
- if (!F_ISSET(dbc, DBC_READ_COMMITTED | DBC_READ_UNCOMMITTED)) { \
- if (LF_ISSET(DB_READ_COMMITTED)) \
- var = DBC_READ_COMMITTED | DBC_WAS_READ_COMMITTED; \
- if (LF_ISSET(DB_READ_UNCOMMITTED)) \
- var = DBC_READ_UNCOMMITTED; \
- } \
- LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED); \
-} while (0)
-
-/*
- * __dbc_close --
- * DBC->close.
- *
- * PUBLIC: int __dbc_close __P((DBC *));
- */
-int
-__dbc_close(dbc)
- DBC *dbc;
-{
- DB *dbp;
- DBC *opd;
- DBC_INTERNAL *cp;
- DB_TXN *txn;
- ENV *env;
- int ret, t_ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
- cp = dbc->internal;
- opd = cp->opd;
- ret = 0;
-
- /*
- * Remove the cursor(s) from the active queue. We may be closing two
- * cursors at once here, a top-level one and a lower-level, off-page
- * duplicate one. The access-method specific cursor close routine must
- * close both of them in a single call.
- *
- * !!!
- * Cursors must be removed from the active queue before calling the
- * access specific cursor close routine, btree depends on having that
- * order of operations.
- */
- MUTEX_LOCK(env, dbp->mutex);
-
- if (opd != NULL) {
- DB_ASSERT(env, F_ISSET(opd, DBC_ACTIVE));
- F_CLR(opd, DBC_ACTIVE);
- TAILQ_REMOVE(&dbp->active_queue, opd, links);
- }
- DB_ASSERT(env, F_ISSET(dbc, DBC_ACTIVE));
- F_CLR(dbc, DBC_ACTIVE);
- TAILQ_REMOVE(&dbp->active_queue, dbc, links);
-
- MUTEX_UNLOCK(env, dbp->mutex);
-
- /* Call the access specific cursor close routine. */
- if ((t_ret =
- dbc->am_close(dbc, PGNO_INVALID, NULL)) != 0 && ret == 0)
- ret = t_ret;
-
- /*
- * Release the lock after calling the access method specific close
- * routine, a Btree cursor may have had pending deletes.
- */
- if (CDB_LOCKING(env)) {
- /*
- * Also, be sure not to free anything if mylock.off is
- * INVALID; in some cases, such as idup'ed read cursors
- * and secondary update cursors, a cursor in a CDB
- * environment may not have a lock at all.
- */
- if ((t_ret = __LPUT(dbc, dbc->mylock)) != 0 && ret == 0)
- ret = t_ret;
-
- /* For safety's sake, since this is going on the free queue. */
- memset(&dbc->mylock, 0, sizeof(dbc->mylock));
- if (opd != NULL)
- memset(&opd->mylock, 0, sizeof(opd->mylock));
- }
-
- if ((txn = dbc->txn) != NULL)
- txn->cursors--;
-
- /* Move the cursor(s) to the free queue. */
- MUTEX_LOCK(env, dbp->mutex);
- if (opd != NULL) {
- if (txn != NULL)
- txn->cursors--;
- TAILQ_INSERT_TAIL(&dbp->free_queue, opd, links);
- opd = NULL;
- }
- TAILQ_INSERT_TAIL(&dbp->free_queue, dbc, links);
- MUTEX_UNLOCK(env, dbp->mutex);
-
- if (txn != NULL && F_ISSET(txn, TXN_PRIVATE) && txn->cursors == 0 &&
- (t_ret = __txn_commit(txn, 0)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __dbc_destroy --
- * Destroy the cursor, called after DBC->close.
- *
- * PUBLIC: int __dbc_destroy __P((DBC *));
- */
-int
-__dbc_destroy(dbc)
- DBC *dbc;
-{
- DB *dbp;
- ENV *env;
- int ret, t_ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- /* Remove the cursor from the free queue. */
- MUTEX_LOCK(env, dbp->mutex);
- TAILQ_REMOVE(&dbp->free_queue, dbc, links);
- MUTEX_UNLOCK(env, dbp->mutex);
-
- /* Free up allocated memory. */
- if (dbc->my_rskey.data != NULL)
- __os_free(env, dbc->my_rskey.data);
- if (dbc->my_rkey.data != NULL)
- __os_free(env, dbc->my_rkey.data);
- if (dbc->my_rdata.data != NULL)
- __os_free(env, dbc->my_rdata.data);
-
- /* Call the access specific cursor destroy routine. */
- ret = dbc->am_destroy == NULL ? 0 : dbc->am_destroy(dbc);
-
- /*
- * Release the lock id for this cursor.
- */
- if (LOCKING_ON(env) &&
- F_ISSET(dbc, DBC_OWN_LID) &&
- (t_ret = __lock_id_free(env, dbc->lref)) != 0 && ret == 0)
- ret = t_ret;
-
- __os_free(env, dbc);
-
- return (ret);
-}
-
-/*
- * __dbc_cmp --
- * Compare the position of two cursors. Return whether two cursors are
- * pointing to the same key/data pair.
- *
- * result == 0 if both cursors refer to the same item.
- * result == 1 otherwise
- *
- * PUBLIC: int __dbc_cmp __P((DBC *, DBC *, int *));
- */
-int
-__dbc_cmp(dbc, other_dbc, result)
- DBC *dbc, *other_dbc;
- int *result;
-{
- DBC *curr_dbc, *curr_odbc;
- DBC_INTERNAL *dbc_int, *odbc_int;
- ENV *env;
- int ret;
-
- env = dbc->env;
- ret = 0;
-
-#ifdef HAVE_PARTITION
- if (DB_IS_PARTITIONED(dbc->dbp)) {
- dbc = ((PART_CURSOR *)dbc->internal)->sub_cursor;
- other_dbc = ((PART_CURSOR *)other_dbc->internal)->sub_cursor;
- }
- /* Both cursors must still be valid. */
- if (dbc == NULL || other_dbc == NULL) {
- __db_errx(env,
-"Both cursors must be initialized before calling DBC->cmp.");
- return (EINVAL);
- }
-
- if (dbc->dbp != other_dbc->dbp) {
- *result = 1;
- return (0);
- }
-#endif
-
-#ifdef HAVE_COMPRESSION
- if (DB_IS_COMPRESSED(dbc->dbp))
- return (__bamc_compress_cmp(dbc, other_dbc, result));
-#endif
-
- curr_dbc = dbc;
- curr_odbc = other_dbc;
- dbc_int = dbc->internal;
- odbc_int = other_dbc->internal;
-
- /* Both cursors must be on valid positions. */
- if (dbc_int->pgno == PGNO_INVALID || odbc_int->pgno == PGNO_INVALID) {
- __db_errx(env,
-"Both cursors must be initialized before calling DBC->cmp.");
- return (EINVAL);
- }
-
- /*
- * Use a loop since cursors can be nested. Off page duplicate
- * sets can only be nested one level deep, so it is safe to use a
- * while (true) loop.
- */
- while (1) {
- if (dbc_int->pgno == odbc_int->pgno &&
- dbc_int->indx == odbc_int->indx) {
- /*
- * If one cursor is sitting on an off page duplicate
- * set, the other will be pointing to the same set. Be
- * careful, and check anyway.
- */
- if (dbc_int->opd != NULL && odbc_int->opd != NULL) {
- curr_dbc = dbc_int->opd;
- curr_odbc = odbc_int->opd;
- dbc_int = dbc_int->opd->internal;
- odbc_int= odbc_int->opd->internal;
- continue;
- } else if (dbc_int->opd == NULL &&
- odbc_int->opd == NULL)
- *result = 0;
- else {
- __db_errx(env,
- "DBCursor->cmp mismatched off page duplicate cursor pointers.");
- return (EINVAL);
- }
-
- switch (curr_dbc->dbtype) {
- case DB_HASH:
- /*
- * Make sure that on-page duplicate data
- * indexes match, and that the deleted
- * flags are consistent.
- */
- ret = __hamc_cmp(curr_dbc, curr_odbc, result);
- break;
- case DB_BTREE:
- case DB_RECNO:
- /*
- * Check for consisted deleted flags on btree
- * specific cursors.
- */
- ret = __bamc_cmp(curr_dbc, curr_odbc, result);
- break;
- default:
- /* NO-OP break out. */
- break;
- }
- } else
- *result = 1;
- return (ret);
- }
- /* NOTREACHED. */
- return (ret);
-}
-
-/*
- * __dbc_count --
- * Return a count of duplicate data items.
- *
- * PUBLIC: int __dbc_count __P((DBC *, db_recno_t *));
- */
-int
-__dbc_count(dbc, recnop)
- DBC *dbc;
- db_recno_t *recnop;
-{
- ENV *env;
- int ret;
-
- env = dbc->env;
-
-#ifdef HAVE_PARTITION
- if (DB_IS_PARTITIONED(dbc->dbp))
- dbc = ((PART_CURSOR *)dbc->internal)->sub_cursor;
-#endif
- /*
- * Cursor Cleanup Note:
- * All of the cursors passed to the underlying access methods by this
- * routine are not duplicated and will not be cleaned up on return.
- * So, pages/locks that the cursor references must be resolved by the
- * underlying functions.
- */
- switch (dbc->dbtype) {
- case DB_QUEUE:
- case DB_RECNO:
- *recnop = 1;
- break;
- case DB_HASH:
- if (dbc->internal->opd == NULL) {
- if ((ret = __hamc_count(dbc, recnop)) != 0)
- return (ret);
- break;
- }
- /* FALLTHROUGH */
- case DB_BTREE:
-#ifdef HAVE_COMPRESSION
- if (DB_IS_COMPRESSED(dbc->dbp))
- return (__bamc_compress_count(dbc, recnop));
-#endif
- if ((ret = __bamc_count(dbc, recnop)) != 0)
- return (ret);
- break;
- case DB_UNKNOWN:
- default:
- return (__db_unknown_type(env, "__dbc_count", dbc->dbtype));
- }
- return (0);
-}
-
-/*
- * __dbc_del --
- * DBC->del.
- *
- * PUBLIC: int __dbc_del __P((DBC *, u_int32_t));
- */
-int
-__dbc_del(dbc, flags)
- DBC *dbc;
- u_int32_t flags;
-{
- DB *dbp;
- ENV *env;
- int ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- CDB_LOCKING_INIT(env, dbc);
-
- /*
- * If we're a secondary index, and DB_UPDATE_SECONDARY isn't set
- * (which it only is if we're being called from a primary update),
- * then we need to call through to the primary and delete the item.
- *
- * Note that this will delete the current item; we don't need to
- * delete it ourselves as well, so we can just goto done.
- */
- if (flags != DB_UPDATE_SECONDARY && F_ISSET(dbp, DB_AM_SECONDARY)) {
- ret = __dbc_del_secondary(dbc);
- goto done;
- }
-
- /*
- * If we are a foreign db, go through and check any foreign key
- * constraints first, which will make rolling back changes on an abort
- * simpler.
- */
- if (LIST_FIRST(&dbp->f_primaries) != NULL &&
- (ret = __dbc_del_foreign(dbc)) != 0)
- goto done;
-
- /*
- * If we are a primary and have secondary indices, go through
- * and delete any secondary keys that point at the current record.
- */
- if (DB_IS_PRIMARY(dbp) &&
- (ret = __dbc_del_primary(dbc)) != 0)
- goto done;
-
-#ifdef HAVE_COMPRESSION
- if (DB_IS_COMPRESSED(dbp))
- ret = __bamc_compress_del(dbc, flags);
- else
-#endif
- ret = __dbc_idel(dbc, flags);
-
-done: CDB_LOCKING_DONE(env, dbc);
-
- return (ret);
-}
-
-/*
- * __dbc_del --
- * Implemenation of DBC->del.
- *
- * PUBLIC: int __dbc_idel __P((DBC *, u_int32_t));
- */
-int
-__dbc_idel(dbc, flags)
- DBC *dbc;
- u_int32_t flags;
-{
- DB *dbp;
- DBC *opd;
- int ret, t_ret;
-
- COMPQUIET(flags, 0);
-
- dbp = dbc->dbp;
-
- /*
- * Cursor Cleanup Note:
- * All of the cursors passed to the underlying access methods by this
- * routine are not duplicated and will not be cleaned up on return.
- * So, pages/locks that the cursor references must be resolved by the
- * underlying functions.
- */
-
- /*
- * Off-page duplicate trees are locked in the primary tree, that is,
- * we acquire a write lock in the primary tree and no locks in the
- * off-page dup tree. If the del operation is done in an off-page
- * duplicate tree, call the primary cursor's upgrade routine first.
- */
- opd = dbc->internal->opd;
- if (opd == NULL)
- ret = dbc->am_del(dbc, flags);
- else if ((ret = dbc->am_writelock(dbc)) == 0)
- ret = opd->am_del(opd, flags);
-
- /*
- * If this was an update that is supporting dirty reads
- * then we may have just swapped our read for a write lock
- * which is held by the surviving cursor. We need
- * to explicitly downgrade this lock. The closed cursor
- * may only have had a read lock.
- */
- if (F_ISSET(dbp, DB_AM_READ_UNCOMMITTED) &&
- dbc->internal->lock_mode == DB_LOCK_WRITE) {
- if ((t_ret =
- __TLPUT(dbc, dbc->internal->lock)) != 0 && ret == 0)
- ret = t_ret;
- if (t_ret == 0)
- dbc->internal->lock_mode = DB_LOCK_WWRITE;
- if (dbc->internal->page != NULL && (t_ret =
- __memp_shared(dbp->mpf, dbc->internal->page)) != 0 &&
- ret == 0)
- ret = t_ret;
- }
-
- return (ret);
-}
-
-#ifdef HAVE_COMPRESSION
-/*
- * __dbc_bulk_del --
- * Bulk del for a cursor.
- *
- * Only implemented for compressed BTrees. In this file in order to
- * use the CDB_LOCKING_* macros.
- *
- * PUBLIC: #ifdef HAVE_COMPRESSION
- * PUBLIC: int __dbc_bulk_del __P((DBC *, DBT *, u_int32_t));
- * PUBLIC: #endif
- */
-int
-__dbc_bulk_del(dbc, key, flags)
- DBC *dbc;
- DBT *key;
- u_int32_t flags;
-{
- ENV *env;
- int ret;
-
- env = dbc->env;
-
- DB_ASSERT(env, DB_IS_COMPRESSED(dbc->dbp));
-
- CDB_LOCKING_INIT(env, dbc);
-
- ret = __bamc_compress_bulk_del(dbc, key, flags);
-
- CDB_LOCKING_DONE(env, dbc);
-
- return (ret);
-}
-#endif
-
-/*
- * __dbc_dup --
- * Duplicate a cursor
- *
- * PUBLIC: int __dbc_dup __P((DBC *, DBC **, u_int32_t));
- */
-int
-__dbc_dup(dbc_orig, dbcp, flags)
- DBC *dbc_orig;
- DBC **dbcp;
- u_int32_t flags;
-{
- DBC *dbc_n, *dbc_nopd;
- int ret;
-
- dbc_n = dbc_nopd = NULL;
-
- /* Allocate a new cursor and initialize it. */
- if ((ret = __dbc_idup(dbc_orig, &dbc_n, flags)) != 0)
- goto err;
- *dbcp = dbc_n;
-
- /*
- * If the cursor references an off-page duplicate tree, allocate a
- * new cursor for that tree and initialize it.
- */
- if (dbc_orig->internal->opd != NULL) {
- if ((ret =
- __dbc_idup(dbc_orig->internal->opd, &dbc_nopd, flags)) != 0)
- goto err;
- dbc_n->internal->opd = dbc_nopd;
- dbc_nopd->internal->pdbc = dbc_n;
- }
- return (0);
-
-err: if (dbc_n != NULL)
- (void)__dbc_close(dbc_n);
- if (dbc_nopd != NULL)
- (void)__dbc_close(dbc_nopd);
-
- return (ret);
-}
-
-/*
- * __dbc_idup --
- * Internal version of __dbc_dup.
- *
- * PUBLIC: int __dbc_idup __P((DBC *, DBC **, u_int32_t));
- */
-int
-__dbc_idup(dbc_orig, dbcp, flags)
- DBC *dbc_orig, **dbcp;
- u_int32_t flags;
-{
- DB *dbp;
- DBC *dbc_n;
- DBC_INTERNAL *int_n, *int_orig;
- ENV *env;
- int ret;
-
- dbp = dbc_orig->dbp;
- dbc_n = *dbcp;
- env = dbp->env;
-
- if ((ret = __db_cursor_int(dbp, dbc_orig->thread_info,
- dbc_orig->txn, dbc_orig->dbtype, dbc_orig->internal->root,
- F_ISSET(dbc_orig, DBC_OPD) | DBC_DUPLICATE,
- dbc_orig->locker, &dbc_n)) != 0)
- return (ret);
-
- /* Position the cursor if requested, acquiring the necessary locks. */
- if (LF_ISSET(DB_POSITION)) {
- int_n = dbc_n->internal;
- int_orig = dbc_orig->internal;
-
- dbc_n->flags |= dbc_orig->flags & ~DBC_OWN_LID;
-
- int_n->indx = int_orig->indx;
- int_n->pgno = int_orig->pgno;
- int_n->root = int_orig->root;
- int_n->lock_mode = int_orig->lock_mode;
-
- int_n->stream_start_pgno = int_orig->stream_start_pgno;
- int_n->stream_off = int_orig->stream_off;
- int_n->stream_curr_pgno = int_orig->stream_curr_pgno;
-
- switch (dbc_orig->dbtype) {
- case DB_QUEUE:
- if ((ret = __qamc_dup(dbc_orig, dbc_n)) != 0)
- goto err;
- break;
- case DB_BTREE:
- case DB_RECNO:
- if ((ret = __bamc_dup(dbc_orig, dbc_n, flags)) != 0)
- goto err;
- break;
- case DB_HASH:
- if ((ret = __hamc_dup(dbc_orig, dbc_n)) != 0)
- goto err;
- break;
- case DB_UNKNOWN:
- default:
- ret = __db_unknown_type(env,
- "__dbc_idup", dbc_orig->dbtype);
- goto err;
- }
- } else if (F_ISSET(dbc_orig, DBC_BULK)) {
- /*
- * For bulk cursors, remember what page were on, even if we
- * don't know that the next operation will be nearby.
- */
- dbc_n->internal->pgno = dbc_orig->internal->pgno;
- }
-
- /* Copy the locking flags to the new cursor. */
- F_SET(dbc_n, F_ISSET(dbc_orig, DBC_BULK |
- DBC_READ_COMMITTED | DBC_READ_UNCOMMITTED | DBC_WRITECURSOR));
-
- /*
- * If we're in CDB and this isn't an offpage dup cursor, then
- * we need to get a lock for the duplicated cursor.
- */
- if (CDB_LOCKING(env) && !F_ISSET(dbc_n, DBC_OPD) &&
- (ret = __lock_get(env, dbc_n->locker, 0,
- &dbc_n->lock_dbt, F_ISSET(dbc_orig, DBC_WRITECURSOR) ?
- DB_LOCK_IWRITE : DB_LOCK_READ, &dbc_n->mylock)) != 0)
- goto err;
-
- dbc_n->priority = dbc_orig->priority;
- dbc_n->internal->pdbc = dbc_orig->internal->pdbc;
- *dbcp = dbc_n;
- return (0);
-
-err: (void)__dbc_close(dbc_n);
- return (ret);
-}
-
-/*
- * __dbc_newopd --
- * Create a new off-page duplicate cursor.
- *
- * PUBLIC: int __dbc_newopd __P((DBC *, db_pgno_t, DBC *, DBC **));
- */
-int
-__dbc_newopd(dbc_parent, root, oldopd, dbcp)
- DBC *dbc_parent;
- db_pgno_t root;
- DBC *oldopd;
- DBC **dbcp;
-{
- DB *dbp;
- DBC *opd;
- DBTYPE dbtype;
- int ret;
-
- dbp = dbc_parent->dbp;
- dbtype = (dbp->dup_compare == NULL) ? DB_RECNO : DB_BTREE;
-
- /*
- * On failure, we want to default to returning the old off-page dup
- * cursor, if any; our caller can't be left with a dangling pointer
- * to a freed cursor. On error the only allowable behavior is to
- * close the cursor (and the old OPD cursor it in turn points to), so
- * this should be safe.
- */
- *dbcp = oldopd;
-
- if ((ret = __db_cursor_int(dbp, dbc_parent->thread_info,
- dbc_parent->txn,
- dbtype, root, DBC_OPD, dbc_parent->locker, &opd)) != 0)
- return (ret);
-
- opd->priority = dbc_parent->priority;
- opd->internal->pdbc = dbc_parent;
- *dbcp = opd;
-
- /*
- * Check to see if we already have an off-page dup cursor that we've
- * passed in. If we do, close it. It'd be nice to use it again
- * if it's a cursor belonging to the right tree, but if we're doing
- * a cursor-relative operation this might not be safe, so for now
- * we'll take the easy way out and always close and reopen.
- *
- * Note that under no circumstances do we want to close the old
- * cursor without returning a valid new one; we don't want to
- * leave the main cursor in our caller with a non-NULL pointer
- * to a freed off-page dup cursor.
- */
- if (oldopd != NULL && (ret = __dbc_close(oldopd)) != 0)
- return (ret);
-
- return (0);
-}
-
-/*
- * __dbc_get --
- * Get using a cursor.
- *
- * PUBLIC: int __dbc_get __P((DBC *, DBT *, DBT *, u_int32_t));
- */
-int
-__dbc_get(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
-#ifdef HAVE_PARTITION
- if (F_ISSET(dbc, DBC_PARTITIONED))
- return (__partc_get(dbc, key, data, flags));
-#endif
-
-#ifdef HAVE_COMPRESSION
- if (DB_IS_COMPRESSED(dbc->dbp))
- return (__bamc_compress_get(dbc, key, data, flags));
-#endif
-
- return (__dbc_iget(dbc, key, data, flags));
-}
-
-/*
- * __dbc_iget --
- * Implementation of get using a cursor.
- *
- * PUBLIC: int __dbc_iget __P((DBC *, DBT *, DBT *, u_int32_t));
- */
-int
-__dbc_iget(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- DB *dbp;
- DBC *ddbc, *dbc_n, *opd;
- DBC_INTERNAL *cp, *cp_n;
- DB_MPOOLFILE *mpf;
- ENV *env;
- db_pgno_t pgno;
- db_indx_t indx_off;
- u_int32_t multi, orig_ulen, tmp_flags, tmp_read_locking, tmp_rmw;
- u_int8_t type;
- int key_small, ret, t_ret;
-
- COMPQUIET(orig_ulen, 0);
-
- key_small = 0;
-
- /*
- * Cursor Cleanup Note:
- * All of the cursors passed to the underlying access methods by this
- * routine are duplicated cursors. On return, any referenced pages
- * will be discarded, and, if the cursor is not intended to be used
- * again, the close function will be called. So, pages/locks that
- * the cursor references do not need to be resolved by the underlying
- * functions.
- */
- dbp = dbc->dbp;
- env = dbp->env;
- mpf = dbp->mpf;
- dbc_n = NULL;
- opd = NULL;
-
- /* Clear OR'd in additional bits so we can check for flag equality. */
- tmp_rmw = LF_ISSET(DB_RMW);
- LF_CLR(DB_RMW);
-
- SET_READ_LOCKING_FLAGS(dbc, tmp_read_locking);
-
- multi = LF_ISSET(DB_MULTIPLE|DB_MULTIPLE_KEY);
- LF_CLR(DB_MULTIPLE|DB_MULTIPLE_KEY);
-
- /*
- * Return a cursor's record number. It has nothing to do with the
- * cursor get code except that it was put into the interface.
- */
- if (flags == DB_GET_RECNO) {
- if (tmp_rmw)
- F_SET(dbc, DBC_RMW);
- F_SET(dbc, tmp_read_locking);
- ret = __bamc_rget(dbc, data);
- if (tmp_rmw)
- F_CLR(dbc, DBC_RMW);
- /* Clear the temp flags, but leave WAS_READ_COMMITTED. */
- F_CLR(dbc, tmp_read_locking & ~DBC_WAS_READ_COMMITTED);
- return (ret);
- }
-
- if (flags == DB_CONSUME || flags == DB_CONSUME_WAIT)
- CDB_LOCKING_INIT(env, dbc);
-
- /* Don't return the key or data if it was passed to us. */
- if (!DB_RETURNS_A_KEY(dbp, flags))
- F_SET(key, DB_DBT_ISSET);
- if (flags == DB_GET_BOTH &&
- (dbp->dup_compare == NULL || dbp->dup_compare == __bam_defcmp))
- F_SET(data, DB_DBT_ISSET);
-
- /*
- * If we have an off-page duplicates cursor, and the operation applies
- * to it, perform the operation. Duplicate the cursor and call the
- * underlying function.
- *
- * Off-page duplicate trees are locked in the primary tree, that is,
- * we acquire a write lock in the primary tree and no locks in the
- * off-page dup tree. If the DB_RMW flag was specified and the get
- * operation is done in an off-page duplicate tree, call the primary
- * cursor's upgrade routine first.
- */
- cp = dbc->internal;
- if (cp->opd != NULL &&
- (flags == DB_CURRENT || flags == DB_GET_BOTHC ||
- flags == DB_NEXT || flags == DB_NEXT_DUP ||
- flags == DB_PREV || flags == DB_PREV_DUP)) {
- if (tmp_rmw && (ret = dbc->am_writelock(dbc)) != 0)
- goto err;
- if (F_ISSET(dbc, DBC_TRANSIENT))
- opd = cp->opd;
- else if ((ret = __dbc_idup(cp->opd, &opd, DB_POSITION)) != 0)
- goto err;
-
- if ((ret = opd->am_get(opd, key, data, flags, NULL)) == 0)
- goto done;
- /*
- * Another cursor may have deleted all of the off-page
- * duplicates, so for operations that are moving a cursor, we
- * need to skip the empty tree and retry on the parent cursor.
- */
- if (ret == DB_NOTFOUND &&
- (flags == DB_PREV || flags == DB_NEXT)) {
- ret = __dbc_close(opd);
- opd = NULL;
- if (F_ISSET(dbc, DBC_TRANSIENT))
- cp->opd = NULL;
- }
- if (ret != 0)
- goto err;
- } else if (cp->opd != NULL && F_ISSET(dbc, DBC_TRANSIENT)) {
- if ((ret = __dbc_close(cp->opd)) != 0)
- goto err;
- cp->opd = NULL;
- }
-
- /*
- * Perform an operation on the main cursor. Duplicate the cursor,
- * upgrade the lock as required, and call the underlying function.
- */
- switch (flags) {
- case DB_CURRENT:
- case DB_GET_BOTHC:
- case DB_NEXT:
- case DB_NEXT_DUP:
- case DB_NEXT_NODUP:
- case DB_PREV:
- case DB_PREV_DUP:
- case DB_PREV_NODUP:
- tmp_flags = DB_POSITION;
- break;
- default:
- tmp_flags = 0;
- break;
- }
-
- /*
- * If this cursor is going to be closed immediately, we don't
- * need to take precautions to clean it up on error.
- */
- if (F_ISSET(dbc, DBC_TRANSIENT | DBC_PARTITIONED))
- dbc_n = dbc;
- else {
- ret = __dbc_idup(dbc, &dbc_n, tmp_flags);
-
- if (ret != 0)
- goto err;
- COPY_RET_MEM(dbc, dbc_n);
- }
-
- if (tmp_rmw)
- F_SET(dbc_n, DBC_RMW);
- F_SET(dbc_n, tmp_read_locking);
-
- switch (multi) {
- case DB_MULTIPLE:
- F_SET(dbc_n, DBC_MULTIPLE);
- break;
- case DB_MULTIPLE_KEY:
- F_SET(dbc_n, DBC_MULTIPLE_KEY);
- break;
- case DB_MULTIPLE | DB_MULTIPLE_KEY:
- F_SET(dbc_n, DBC_MULTIPLE|DBC_MULTIPLE_KEY);
- break;
- case 0:
- default:
- break;
- }
-
-retry: pgno = PGNO_INVALID;
- ret = dbc_n->am_get(dbc_n, key, data, flags, &pgno);
- if (tmp_rmw)
- F_CLR(dbc_n, DBC_RMW);
- /*
- * Clear the temporary locking flags in the new cursor. The user's
- * (old) cursor needs to have the WAS_READ_COMMITTED flag because this
- * is used on the next call on that cursor.
- */
- F_CLR(dbc_n, tmp_read_locking);
- F_SET(dbc, tmp_read_locking & DBC_WAS_READ_COMMITTED);
- F_CLR(dbc_n, DBC_MULTIPLE|DBC_MULTIPLE_KEY);
- if (ret != 0)
- goto err;
-
- cp_n = dbc_n->internal;
-
- /*
- * We may be referencing a new off-page duplicates tree. Acquire
- * a new cursor and call the underlying function.
- */
- if (pgno != PGNO_INVALID) {
- if ((ret = __dbc_newopd(dbc,
- pgno, cp_n->opd, &cp_n->opd)) != 0)
- goto err;
-
- switch (flags) {
- case DB_FIRST:
- case DB_NEXT:
- case DB_NEXT_NODUP:
- case DB_SET:
- case DB_SET_RECNO:
- case DB_SET_RANGE:
- tmp_flags = DB_FIRST;
- break;
- case DB_LAST:
- case DB_PREV:
- case DB_PREV_NODUP:
- tmp_flags = DB_LAST;
- break;
- case DB_GET_BOTH:
- case DB_GET_BOTHC:
- case DB_GET_BOTH_RANGE:
- tmp_flags = flags;
- break;
- default:
- ret = __db_unknown_flag(env, "__dbc_get", flags);
- goto err;
- }
- ret = cp_n->opd->am_get(cp_n->opd, key, data, tmp_flags, NULL);
- /*
- * Another cursor may have deleted all of the off-page
- * duplicates, so for operations that are moving a cursor, we
- * need to skip the empty tree and retry on the parent cursor.
- */
- if (ret == DB_NOTFOUND) {
- switch (flags) {
- case DB_FIRST:
- case DB_NEXT:
- case DB_NEXT_NODUP:
- flags = DB_NEXT;
- break;
- case DB_LAST:
- case DB_PREV:
- case DB_PREV_NODUP:
- flags = DB_PREV;
- break;
- default:
- goto err;
- }
-
- ret = __dbc_close(cp_n->opd);
- cp_n->opd = NULL;
- if (ret == 0)
- goto retry;
- }
- if (ret != 0)
- goto err;
- }
-
-done: /*
- * Return a key/data item. The only exception is that we don't return
- * a key if the user already gave us one, that is, if the DB_SET flag
- * was set. The DB_SET flag is necessary. In a Btree, the user's key
- * doesn't have to be the same as the key stored the tree, depending on
- * the magic performed by the comparison function. As we may not have
- * done any key-oriented operation here, the page reference may not be
- * valid. Fill it in as necessary. We don't have to worry about any
- * locks, the cursor must already be holding appropriate locks.
- *
- * XXX
- * If not a Btree and DB_SET_RANGE is set, we shouldn't return a key
- * either, should we?
- */
- cp_n = dbc_n == NULL ? dbc->internal : dbc_n->internal;
- if (!F_ISSET(key, DB_DBT_ISSET)) {
- if (cp_n->page == NULL && (ret = __memp_fget(mpf, &cp_n->pgno,
- dbc->thread_info, dbc->txn, 0, &cp_n->page)) != 0)
- goto err;
-
- if ((ret = __db_ret(dbc, cp_n->page, cp_n->indx, key,
- &dbc->rkey->data, &dbc->rkey->ulen)) != 0) {
- /*
- * If the key DBT is too small, we still want to return
- * the size of the data. Otherwise applications are
- * forced to check each one with a separate call. We
- * don't want to copy the data, so we set the ulen to
- * zero before calling __db_ret.
- */
- if (ret == DB_BUFFER_SMALL &&
- F_ISSET(data, DB_DBT_USERMEM)) {
- key_small = 1;
- orig_ulen = data->ulen;
- data->ulen = 0;
- } else
- goto err;
- }
- }
- if (multi != 0 && dbc->am_bulk != NULL) {
- /*
- * Even if fetching from the OPD cursor we need a duplicate
- * primary cursor if we are going after multiple keys.
- */
- if (dbc_n == NULL) {
- /*
- * Non-"_KEY" DB_MULTIPLE doesn't move the main cursor,
- * so it's safe to just use dbc, unless the cursor
- * has an open off-page duplicate cursor whose state
- * might need to be preserved.
- */
- if ((!(multi & DB_MULTIPLE_KEY) &&
- dbc->internal->opd == NULL) ||
- F_ISSET(dbc, DBC_TRANSIENT | DBC_PARTITIONED))
- dbc_n = dbc;
- else {
- if ((ret = __dbc_idup(dbc,
- &dbc_n, DB_POSITION)) != 0)
- goto err;
- if ((ret = dbc_n->am_get(dbc_n,
- key, data, DB_CURRENT, &pgno)) != 0)
- goto err;
- }
- cp_n = dbc_n->internal;
- }
-
- /*
- * If opd is set then we dupped the opd that we came in with.
- * When we return we may have a new opd if we went to another
- * key.
- */
- if (opd != NULL) {
- DB_ASSERT(env, cp_n->opd == NULL);
- cp_n->opd = opd;
- opd = NULL;
- }
-
- /*
- * Bulk get doesn't use __db_retcopy, so data.size won't
- * get set up unless there is an error. Assume success
- * here. This is the only call to am_bulk, and it avoids
- * setting it exactly the same everywhere. If we have an
- * DB_BUFFER_SMALL error, it'll get overwritten with the
- * needed value.
- */
- data->size = data->ulen;
- ret = dbc_n->am_bulk(dbc_n, data, flags | multi);
- } else if (!F_ISSET(data, DB_DBT_ISSET)) {
- ddbc = opd != NULL ? opd :
- cp_n->opd != NULL ? cp_n->opd : dbc_n;
- cp = ddbc->internal;
- if (cp->page == NULL &&
- (ret = __memp_fget(mpf, &cp->pgno,
- dbc->thread_info, ddbc->txn, 0, &cp->page)) != 0)
- goto err;
-
- type = TYPE(cp->page);
- indx_off = ((type == P_LBTREE ||
- type == P_HASH || type == P_HASH_UNSORTED) ? O_INDX : 0);
- ret = __db_ret(ddbc, cp->page, cp->indx + indx_off,
- data, &dbc->rdata->data, &dbc->rdata->ulen);
- }
-
-err: /* Don't pass DB_DBT_ISSET back to application level, error or no. */
- F_CLR(key, DB_DBT_ISSET);
- F_CLR(data, DB_DBT_ISSET);
-
- /* Cleanup and cursor resolution. */
- if (opd != NULL) {
- /*
- * To support dirty reads we must reget the write lock
- * if we have just stepped off a deleted record.
- * Since the OPD cursor does not know anything
- * about the referencing page or cursor we need
- * to peek at the OPD cursor and get the lock here.
- */
- if (F_ISSET(dbp, DB_AM_READ_UNCOMMITTED) &&
- F_ISSET((BTREE_CURSOR *)
- dbc->internal->opd->internal, C_DELETED))
- if ((t_ret =
- dbc->am_writelock(dbc)) != 0 && ret == 0)
- ret = t_ret;
- if ((t_ret = __dbc_cleanup(
- dbc->internal->opd, opd, ret)) != 0 && ret == 0)
- ret = t_ret;
- }
-
- if (key_small) {
- data->ulen = orig_ulen;
- if (ret == 0)
- ret = DB_BUFFER_SMALL;
- }
-
- if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 &&
- (ret == 0 || ret == DB_BUFFER_SMALL))
- ret = t_ret;
-
- if (flags == DB_CONSUME || flags == DB_CONSUME_WAIT)
- CDB_LOCKING_DONE(env, dbc);
- return (ret);
-}
-
-/* Internal flags shared by the dbc_put functions. */
-#define DBC_PUT_RMW 0x001
-#define DBC_PUT_NODEL 0x002
-#define DBC_PUT_HAVEREC 0x004
-
-/*
- * __dbc_put_resolve_key --
- * Get the current key and data so that we can correctly update the
- * secondary and foreign databases.
- */
-static inline int
-__dbc_put_resolve_key(dbc, oldkey, olddata, put_statep, flags)
- DBC *dbc;
- DBT *oldkey, *olddata;
- u_int32_t flags, *put_statep;
-{
- DB *dbp;
- ENV *env;
- int ret, rmw;
-
- dbp = dbc->dbp;
- env = dbp->env;
- rmw = FLD_ISSET(*put_statep, DBC_PUT_RMW) ? DB_RMW : 0;
-
- DB_ASSERT(env, flags == DB_CURRENT);
- COMPQUIET(flags, 0);
-
- /*
- * This is safe to do on the cursor we already have;
- * error or no, it won't move.
- *
- * We use DB_RMW for all of these gets because we'll be
- * writing soon enough in the "normal" put code. In
- * transactional databases we'll hold those write locks
- * even if we close the cursor we're reading with.
- *
- * The DB_KEYEMPTY return needs special handling -- if the
- * cursor is on a deleted key, we return DB_NOTFOUND.
- */
- memset(oldkey, 0, sizeof(DBT));
- if ((ret = __dbc_get(dbc, oldkey, olddata, rmw | DB_CURRENT)) != 0)
- return (ret == DB_KEYEMPTY ? DB_NOTFOUND : ret);
-
- /* Record that we've looked for the old record. */
- FLD_SET(*put_statep, DBC_PUT_HAVEREC);
- return (0);
-}
-
-/*
- * __dbc_put_append --
- * Handle an append to a primary.
- */
-static inline int
-__dbc_put_append(dbc, key, data, put_statep, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags, *put_statep;
-{
- DB *dbp;
- ENV *env;
- DBC *dbc_n;
- DBT tdata;
- int ret, t_ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
- ret = 0;
- dbc_n = NULL;
-
- DB_ASSERT(env, flags == DB_APPEND);
- COMPQUIET(flags, 0);
-
- /*
- * With DB_APPEND, we need to do the insert to populate the key value.
- * So we swap the 'normal' order of updating secondary / verifying
- * foreign databases and inserting.
- *
- * If there is an append callback, the value stored in data->data may
- * be replaced and then freed. To avoid passing a freed pointer back
- * to the user, just operate on a copy of the data DBT.
- */
- tdata = *data;
-
- /*
- * If this cursor is going to be closed immediately, we don't
- * need to take precautions to clean it up on error.
- */
- if (F_ISSET(dbc, DBC_TRANSIENT))
- dbc_n = dbc;
- else if ((ret = __dbc_idup(dbc, &dbc_n, 0)) != 0)
- goto err;
-
- /*
- * Append isn't a normal put operation; call the appropriate access
- * method's append function.
- */
- switch (dbp->type) {
- case DB_QUEUE:
- if ((ret = __qam_append(dbc_n, key, &tdata)) != 0)
- goto err;
- break;
- case DB_RECNO:
- if ((ret = __ram_append(dbc_n, key, &tdata)) != 0)
- goto err;
- break;
- default:
- /* The interface should prevent this. */
- DB_ASSERT(env,
- dbp->type == DB_QUEUE || dbp->type == DB_RECNO);
-
- ret = __db_ferr(env, "DBC->put", 0);
- goto err;
- }
-
- /*
- * The append callback, if one exists, may have allocated a new
- * tdata.data buffer. If so, free it.
- */
- FREE_IF_NEEDED(env, &tdata);
-
- /*
- * The key value may have been generated by the above operation, but
- * not set in the data buffer. Make sure it is there so that secondary
- * updates can complete.
- */
- if ((ret = __dbt_usercopy(env, key)) != 0)
- goto err;
-
- /* An append cannot be replacing an existing item. */
- FLD_SET(*put_statep, DBC_PUT_NODEL);
-
-err: if (dbc_n != NULL &&
- (t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
-}
-
-/*
- * __dbc_put_partial --
- * Ensure that the data item we are using is complete and correct.
- * Otherwise we could break the secondary constraints.
- */
-static inline int
-__dbc_put_partial(dbc, pkey, data, orig_data, out_data, put_statep, flags)
- DBC *dbc;
- DBT *pkey, *data, *orig_data, *out_data;
- u_int32_t *put_statep, flags;
-{
- DB *dbp;
- DBC *pdbc;
- ENV *env;
- int ret, rmw, t_ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
- ret = t_ret = 0;
- rmw = FLD_ISSET(*put_statep, DBC_PUT_RMW) ? DB_RMW : 0;
-
- if (!FLD_ISSET(*put_statep, DBC_PUT_HAVEREC) &&
- !FLD_ISSET(*put_statep, DBC_PUT_NODEL)) {
- /*
- * We're going to have to search the tree for the
- * specified key. Dup a cursor (so we have the same
- * locking info) and do a c_get.
- */
- if ((ret = __dbc_idup(dbc, &pdbc, 0)) != 0)
- return (ret);
-
- /*
- * When doing a put with DB_CURRENT, partial data items have
- * already been resolved.
- */
- DB_ASSERT(env, flags != DB_CURRENT);
-
- F_SET(pkey, DB_DBT_ISSET);
- ret = __dbc_get(pdbc, pkey, orig_data, rmw | DB_SET);
- if (ret == DB_KEYEMPTY || ret == DB_NOTFOUND) {
- FLD_SET(*put_statep, DBC_PUT_NODEL);
- ret = 0;
- }
- if ((t_ret = __dbc_close(pdbc)) != 0)
- ret = t_ret;
- if (ret != 0)
- return (ret);
-
- FLD_SET(*put_statep, DBC_PUT_HAVEREC);
- }
-
- COMPQUIET(flags, 0);
-
- /*
- * Now build the new datum from orig_data and the partial data
- * we were given. It's okay to do this if no record was
- * returned above: a partial put on an empty record is allowed,
- * if a little strange. The data is zero-padded.
- */
- return (__db_buildpartial(dbp, orig_data, data, out_data));
-}
-
-/*
- * __dbc_put_fixed_len --
- * Handle padding for fixed-length records.
- */
-static inline int
-__dbc_put_fixed_len(dbc, data, out_data)
- DBC *dbc;
- DBT *data, *out_data;
-{
- DB *dbp;
- ENV *env;
- int re_pad, ret;
- u_int32_t re_len, size;
-
- dbp = dbc->dbp;
- env = dbp->env;
- ret = 0;
-
- /*
- * Handle fixed-length records. If the primary database has
- * fixed-length records, we need to pad out the datum before
- * we pass it into the callback function; we always index the
- * "real" record.
- */
- if (dbp->type == DB_QUEUE) {
- re_len = ((QUEUE *)dbp->q_internal)->re_len;
- re_pad = ((QUEUE *)dbp->q_internal)->re_pad;
- } else {
- re_len = ((BTREE *)dbp->bt_internal)->re_len;
- re_pad = ((BTREE *)dbp->bt_internal)->re_pad;
- }
-
- size = data->size;
- if (size > re_len) {
- ret = __db_rec_toobig(env, size, re_len);
- return (ret);
- } else if (size < re_len) {
- /*
- * If we're not doing a partial put, copy data->data into
- * out_data->data, then pad out out_data->data. This overrides
- * the assignment made above, which is used in the more common
- * case when padding is not needed.
- *
- * If we're doing a partial put, the data we want are already
- * in out_data.data; we just need to pad.
- */
- if (F_ISSET(data, DB_DBT_PARTIAL)) {
- if ((ret = __os_realloc(
- env, re_len, &out_data->data)) != 0)
- return (ret);
- /*
- * In the partial case, we have built the item into
- * out_data already using __db_buildpartial. Just need
- * to pad from the end of out_data, not from data->size.
- */
- size = out_data->size;
- } else {
- if ((ret = __os_malloc(
- env, re_len, &out_data->data)) != 0)
- return (ret);
- memcpy(out_data->data, data->data, size);
- }
- memset((u_int8_t *)out_data->data + size, re_pad,
- re_len - size);
- out_data->size = re_len;
- }
-
- return (ret);
-}
-
-/*
- * __dbc_put_secondaries --
- * Insert the secondary keys, and validate the foreign key constraints.
- */
-static inline int
-__dbc_put_secondaries(dbc,
- pkey, data, orig_data, s_count, s_keys_buf, put_statep)
- DBC *dbc;
- DBT *pkey, *data, *orig_data, *s_keys_buf;
- int s_count;
- u_int32_t *put_statep;
-{
- DB *dbp, *sdbp;
- DBC *fdbc, *sdbc;
- DBT fdata, oldpkey, *skeyp, temppkey, tempskey, *tskeyp;
- ENV *env;
- int cmp, ret, rmw, t_ret;
- u_int32_t nskey;
-
- dbp = dbc->dbp;
- env = dbp->env;
- fdbc = sdbc = NULL;
- sdbp = NULL;
- ret = t_ret = 0;
- rmw = FLD_ISSET(*put_statep, DBC_PUT_RMW) ? DB_RMW : 0;
-
- /*
- * Loop through the secondaries. (Step 3.)
- *
- * Note that __db_s_first and __db_s_next will take care of
- * thread-locking and refcounting issues.
- */
- for (ret = __db_s_first(dbp, &sdbp), skeyp = s_keys_buf;
- sdbp != NULL && ret == 0;
- ret = __db_s_next(&sdbp, dbc->txn), ++skeyp) {
- DB_ASSERT(env, skeyp - s_keys_buf < s_count);
- /*
- * Don't process this secondary if the key is immutable and we
- * know that the old record exists. This optimization can't be
- * used if we have not checked for the old record yet.
- */
- if (FLD_ISSET(*put_statep, DBC_PUT_HAVEREC) &&
- !FLD_ISSET(*put_statep, DBC_PUT_NODEL) &&
- FLD_ISSET(sdbp->s_assoc_flags, DB_ASSOC_IMMUTABLE_KEY))
- continue;
-
- /*
- * Call the callback for this secondary, to get the
- * appropriate secondary key.
- */
- if ((ret = sdbp->s_callback(sdbp,
- pkey, data, skeyp)) != 0) {
- /* Not indexing is equivalent to an empty key set. */
- if (ret == DB_DONOTINDEX) {
- F_SET(skeyp, DB_DBT_MULTIPLE);
- skeyp->size = 0;
- ret = 0;
- } else
- goto err;
- }
-
- if (sdbp->s_foreign != NULL &&
- (ret = __db_cursor_int(sdbp->s_foreign,
- dbc->thread_info, dbc->txn, sdbp->s_foreign->type,
- PGNO_INVALID, 0, dbc->locker, &fdbc)) != 0)
- goto err;
-
- /*
- * Mark the secondary key DBT(s) as set -- that is, the
- * callback returned at least one secondary key.
- *
- * Also, if this secondary index is associated with a foreign
- * database, check that the foreign db contains the key(s) to
- * maintain referential integrity. Set flags in fdata to avoid
- * mem copying, we just need to know existence. We need to do
- * this check before setting DB_DBT_ISSET, otherwise __dbc_get
- * will overwrite the flag values.
- */
- if (F_ISSET(skeyp, DB_DBT_MULTIPLE)) {
-#ifdef DIAGNOSTIC
- __db_check_skeyset(sdbp, skeyp);
-#endif
- for (tskeyp = (DBT *)skeyp->data, nskey = skeyp->size;
- nskey > 0; nskey--, tskeyp++) {
- if (fdbc != NULL) {
- memset(&fdata, 0, sizeof(DBT));
- F_SET(&fdata,
- DB_DBT_PARTIAL | DB_DBT_USERMEM);
- if ((ret = __dbc_get(
- fdbc, tskeyp, &fdata,
- DB_SET | rmw)) == DB_NOTFOUND ||
- ret == DB_KEYEMPTY) {
- ret = DB_FOREIGN_CONFLICT;
- break;
- }
- }
- F_SET(tskeyp, DB_DBT_ISSET);
- }
- tskeyp = (DBT *)skeyp->data;
- nskey = skeyp->size;
- } else {
- if (fdbc != NULL) {
- memset(&fdata, 0, sizeof(DBT));
- F_SET(&fdata, DB_DBT_PARTIAL | DB_DBT_USERMEM);
- if ((ret = __dbc_get(fdbc, skeyp, &fdata,
- DB_SET | rmw)) == DB_NOTFOUND ||
- ret == DB_KEYEMPTY)
- ret = DB_FOREIGN_CONFLICT;
- }
- F_SET(skeyp, DB_DBT_ISSET);
- tskeyp = skeyp;
- nskey = 1;
- }
- if (fdbc != NULL && (t_ret = __dbc_close(fdbc)) != 0 &&
- ret == 0)
- ret = t_ret;
- fdbc = NULL;
- if (ret != 0)
- goto err;
-
- /*
- * If we have the old record, we can generate and remove any
- * old secondary key(s) now. We can also skip the secondary
- * put if there is no change.
- */
- if (FLD_ISSET(*put_statep, DBC_PUT_HAVEREC)) {
- if ((ret = __dbc_del_oldskey(sdbp, dbc,
- skeyp, pkey, orig_data)) == DB_KEYEXIST)
- continue;
- else if (ret != 0)
- goto err;
- }
- if (nskey == 0)
- continue;
-
- /*
- * Open a cursor in this secondary.
- *
- * Use the same locker ID as our primary cursor, so that
- * we're guaranteed that the locks don't conflict (e.g. in CDB
- * or if we're subdatabases that share and want to lock a
- * metadata page).
- */
- if ((ret = __db_cursor_int(sdbp, dbc->thread_info, dbc->txn,
- sdbp->type, PGNO_INVALID, 0, dbc->locker, &sdbc)) != 0)
- goto err;
-
- /*
- * If we're in CDB, updates will fail since the new cursor
- * isn't a writer. However, we hold the WRITE lock in the
- * primary and will for as long as our new cursor lasts,
- * and the primary and secondary share a lock file ID,
- * so it's safe to consider this a WRITER. The close
- * routine won't try to put anything because we don't
- * really have a lock.
- */
- if (CDB_LOCKING(env)) {
- DB_ASSERT(env, sdbc->mylock.off == LOCK_INVALID);
- F_SET(sdbc, DBC_WRITER);
- }
-
- /*
- * Swap the primary key to the byte order of this secondary, if
- * necessary. By doing this now, we can compare directly
- * against the data already in the secondary without having to
- * swap it after reading.
- */
- SWAP_IF_NEEDED(sdbp, pkey);
-
- for (; nskey > 0 && ret == 0; nskey--, tskeyp++) {
- /* Skip this key if it is already in the database. */
- if (!F_ISSET(tskeyp, DB_DBT_ISSET))
- continue;
-
- /*
- * There are three cases here--
- * 1) The secondary supports sorted duplicates.
- * If we attempt to put a secondary/primary pair
- * that already exists, that's a duplicate
- * duplicate, and c_put will return DB_KEYEXIST
- * (see __db_duperr). This will leave us with
- * exactly one copy of the secondary/primary pair,
- * and this is just right--we'll avoid deleting it
- * later, as the old and new secondaries will
- * match (since the old secondary is the dup dup
- * that's already there).
- * 2) The secondary supports duplicates, but they're not
- * sorted. We need to avoid putting a duplicate
- * duplicate, because the matching old and new
- * secondaries will prevent us from deleting
- * anything and we'll wind up with two secondary
- * records that point to the same primary key. Do
- * a c_get(DB_GET_BOTH); only do the put if the
- * secondary doesn't exist.
- * 3) The secondary doesn't support duplicates at all.
- * In this case, secondary keys must be unique;
- * if another primary key already exists for this
- * secondary key, we have to either overwrite it
- * or not put this one, and in either case we've
- * corrupted the secondary index. Do a
- * c_get(DB_SET). If the secondary/primary pair
- * already exists, do nothing; if the secondary
- * exists with a different primary, return an
- * error; and if the secondary does not exist,
- * put it.
- */
- if (!F_ISSET(sdbp, DB_AM_DUP)) {
- /* Case 3. */
- memset(&oldpkey, 0, sizeof(DBT));
- F_SET(&oldpkey, DB_DBT_MALLOC);
- ret = __dbc_get(sdbc,
- tskeyp, &oldpkey, rmw | DB_SET);
- if (ret == 0) {
- cmp = __bam_defcmp(sdbp,
- &oldpkey, pkey);
- __os_ufree(env, oldpkey.data);
- /*
- * If the secondary key is unchanged,
- * skip the put and go on to the next
- * one.
- */
- if (cmp == 0)
- continue;
-
- __db_errx(env, "%s%s",
- "Put results in a non-unique secondary key in an ",
- "index not configured to support duplicates");
- ret = EINVAL;
- }
- if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY)
- break;
- } else if (!F_ISSET(sdbp, DB_AM_DUPSORT)) {
- /* Case 2. */
- DB_INIT_DBT(tempskey,
- tskeyp->data, tskeyp->size);
- DB_INIT_DBT(temppkey,
- pkey->data, pkey->size);
- ret = __dbc_get(sdbc, &tempskey, &temppkey,
- rmw | DB_GET_BOTH);
- if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY)
- break;
- }
-
- ret = __dbc_put(sdbc, tskeyp, pkey,
- DB_UPDATE_SECONDARY);
-
- /*
- * We don't know yet whether this was a put-overwrite
- * that in fact changed nothing. If it was, we may get
- * DB_KEYEXIST. This is not an error.
- */
- if (ret == DB_KEYEXIST)
- ret = 0;
- }
-
- /* Make sure the primary key is back in native byte-order. */
- SWAP_IF_NEEDED(sdbp, pkey);
-
- if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0)
- ret = t_ret;
-
- if (ret != 0)
- goto err;
-
- /*
- * Mark that we have a key for this secondary so we can check
- * it later before deleting the old one. We can't set it
- * earlier or it would be cleared in the calls above.
- */
- F_SET(skeyp, DB_DBT_ISSET);
- }
-err: if (sdbp != NULL &&
- (t_ret = __db_s_done(sdbp, dbc->txn)) != 0 && ret == 0)
- ret = t_ret;
- COMPQUIET(s_count, 0);
- return (ret);
-}
-
-static int
-__dbc_put_primary(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- DB *dbp, *sdbp;
- DBC *dbc_n, *pdbc;
- DBT oldkey, olddata, newdata;
- DBT *all_skeys, *skeyp, *tskeyp;
- ENV *env;
- int ret, t_ret, s_count;
- u_int32_t nskey, put_state, rmw;
-
- dbp = dbc->dbp;
- env = dbp->env;
- ret = t_ret = s_count = 0;
- put_state = 0;
- sdbp = NULL;
- pdbc = dbc_n = NULL;
- all_skeys = NULL;
- memset(&newdata, 0, sizeof(DBT));
- memset(&olddata, 0, sizeof(DBT));
-
- /*
- * We do multiple cursor operations in some cases and subsequently
- * access the data DBT information. Set DB_DBT_MALLOC so we don't risk
- * modification of the data between our uses of it.
- */
- F_SET(&olddata, DB_DBT_MALLOC);
-
- /*
- * We have at least one secondary which we may need to update.
- *
- * There is a rather vile locking issue here. Secondary gets
- * will always involve acquiring a read lock in the secondary,
- * then acquiring a read lock in the primary. Ideally, we
- * would likewise perform puts by updating all the secondaries
- * first, then doing the actual put in the primary, to avoid
- * deadlock (since having multiple threads doing secondary
- * gets and puts simultaneously is probably a common case).
- *
- * However, if this put is a put-overwrite--and we have no way to
- * tell in advance whether it will be--we may need to delete
- * an outdated secondary key. In order to find that old
- * secondary key, we need to get the record we're overwriting,
- * before we overwrite it.
- *
- * (XXX: It would be nice to avoid this extra get, and have the
- * underlying put routines somehow pass us the old record
- * since they need to traverse the tree anyway. I'm saving
- * this optimization for later, as it's a lot of work, and it
- * would be hard to fit into this locking paradigm anyway.)
- *
- * The simple thing to do would be to go get the old record before
- * we do anything else. Unfortunately, though, doing so would
- * violate our "secondary, then primary" lock acquisition
- * ordering--even in the common case where no old primary record
- * exists, we'll still acquire and keep a lock on the page where
- * we're about to do the primary insert.
- *
- * To get around this, we do the following gyrations, which
- * hopefully solve this problem in the common case:
- *
- * 1) If this is a c_put(DB_CURRENT), go ahead and get the
- * old record. We already hold the lock on this page in
- * the primary, so no harm done, and we'll need the primary
- * key (which we weren't passed in this case) to do any
- * secondary puts anyway.
- * If this is a put(DB_APPEND), then we need to insert the item,
- * so that we can know the key value. So go ahead and insert. In
- * the case of a put(DB_APPEND) without secondaries it is
- * implemented in the __db_put method as an optimization.
- *
- * 2) If we're doing a partial put, we need to perform the
- * get on the primary key right away, since we don't have
- * the whole datum that the secondary key is based on.
- * We may also need to pad out the record if the primary
- * has a fixed record length.
- *
- * 3) Loop through the secondary indices, putting into each a
- * new secondary key that corresponds to the new record.
- *
- * 4) If we haven't done so in (1) or (2), get the old primary
- * key/data pair. If one does not exist--the common case--we're
- * done with secondary indices, and can go straight on to the
- * primary put.
- *
- * 5) If we do have an old primary key/data pair, however, we need
- * to loop through all the secondaries a second time and delete
- * the old secondary in each.
- */
- s_count = __db_s_count(dbp);
- if ((ret = __os_calloc(env,
- (u_int)s_count, sizeof(DBT), &all_skeys)) != 0)
- goto err;
-
- /*
- * Primary indices can't have duplicates, so only DB_APPEND,
- * DB_CURRENT, DB_KEYFIRST, and DB_KEYLAST make any sense. Other flags
- * should have been caught by the checking routine, but
- * add a sprinkling of paranoia.
- */
- DB_ASSERT(env, flags == DB_APPEND || flags == DB_CURRENT ||
- flags == DB_KEYFIRST || flags == DB_KEYLAST ||
- flags == DB_NOOVERWRITE || flags == DB_OVERWRITE_DUP);
-
- /*
- * We'll want to use DB_RMW in a few places, but it's only legal
- * when locking is on.
- */
- rmw = STD_LOCKING(dbc) ? DB_RMW : 0;
- if (rmw)
- FLD_SET(put_state, DBC_PUT_RMW);
-
- /* Resolve the primary key if required (Step 1). */
- if (flags == DB_CURRENT) {
- if ((ret = __dbc_put_resolve_key(dbc,
- &oldkey, &olddata, &put_state, flags)) != 0)
- goto err;
- key = &oldkey;
- } else if (flags == DB_APPEND) {
- if ((ret = __dbc_put_append(dbc,
- key, data, &put_state, flags)) != 0)
- goto err;
- }
-
- /*
- * PUT_NOOVERWRITE with secondaries is a troublesome case. We need
- * to check that the insert will work prior to making any changes
- * to secondaries. Try to work within the locking constraints outlined
- * above.
- *
- * This is DB->put (DB_NOOVERWRITE). DBC->put(DB_NODUPDATA) is not
- * relevant since it is only valid on DBs that support duplicates,
- * which primaries with secondaries can't have.
- */
- if (flags == DB_NOOVERWRITE) {
- /* Don't bother retrieving the data. */
- F_SET(key, DB_DBT_ISSET);
- olddata.dlen = 0;
- olddata.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM;
- if (__dbc_get(dbc, key, &olddata, DB_SET) != DB_NOTFOUND) {
- ret = DB_KEYEXIST;
- goto done;
- }
- }
-
- /*
- * Check for partial puts using DB_DBT_PARTIAL (Step 2).
- */
- if (F_ISSET(data, DB_DBT_PARTIAL)) {
- if ((ret = __dbc_put_partial(dbc,
- key, data, &olddata, &newdata, &put_state, flags)) != 0)
- goto err;
- } else {
- newdata = *data;
- }
-
- /*
- * Check for partial puts, with fixed length record databases (Step 2).
- */
- if ((dbp->type == DB_RECNO && F_ISSET(dbp, DB_AM_FIXEDLEN)) ||
- (dbp->type == DB_QUEUE)) {
- if ((ret = __dbc_put_fixed_len(dbc, data, &newdata)) != 0)
- goto err;
- }
-
- /* Validate any foreign databases, and update secondaries. (Step 3). */
- if ((ret = __dbc_put_secondaries(dbc, key, &newdata,
- &olddata, s_count, all_skeys, &put_state))
- != 0)
- goto err;
- /*
- * If we've already got the old primary key/data pair, the secondary
- * updates are already done.
- */
- if (FLD_ISSET(put_state, DBC_PUT_HAVEREC))
- goto done;
-
- /*
- * If still necessary, go get the old primary key/data. (Step 4.)
- *
- * See the comments in step 2. This is real familiar.
- */
- if ((ret = __dbc_idup(dbc, &pdbc, 0)) != 0)
- goto err;
- DB_ASSERT(env, flags != DB_CURRENT);
- F_SET(key, DB_DBT_ISSET);
- ret = __dbc_get(pdbc, key, &olddata, rmw | DB_SET);
- if (ret == DB_KEYEMPTY || ret == DB_NOTFOUND) {
- FLD_SET(put_state, DBC_PUT_NODEL);
- ret = 0;
- }
- if ((t_ret = __dbc_close(pdbc)) != 0 && ret == 0)
- ret = t_ret;
- if (ret != 0)
- goto err;
-
- /*
- * Check whether we do in fact have an old record we may need to
- * delete. (Step 5).
- */
- if (FLD_ISSET(put_state, DBC_PUT_NODEL))
- goto done;
-
- for (ret = __db_s_first(dbp, &sdbp), skeyp = all_skeys;
- sdbp != NULL && ret == 0;
- ret = __db_s_next(&sdbp, dbc->txn), skeyp++) {
- DB_ASSERT(env, skeyp - all_skeys < s_count);
- /*
- * Don't process this secondary if the key is immutable. We
- * know that the old record exists, so this optimization can
- * always be used.
- */
- if (FLD_ISSET(sdbp->s_assoc_flags, DB_ASSOC_IMMUTABLE_KEY))
- continue;
-
- if ((ret = __dbc_del_oldskey(sdbp, dbc,
- skeyp, key, &olddata)) != 0 && ret != DB_KEYEXIST)
- goto err;
- }
- if (ret != 0)
- goto err;
-
-done:
-err:
- if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0)
- ret = t_ret;
-
- /* If newdata or olddata were used, free their buffers. */
- if (newdata.data != NULL && newdata.data != data->data)
- __os_free(env, newdata.data);
- if (olddata.data != NULL)
- __os_ufree(env, olddata.data);
-
- CDB_LOCKING_DONE(env, dbc);
-
- if (sdbp != NULL &&
- (t_ret = __db_s_done(sdbp, dbc->txn)) != 0 && ret == 0)
- ret = t_ret;
-
- for (skeyp = all_skeys; skeyp - all_skeys < s_count; skeyp++) {
- if (F_ISSET(skeyp, DB_DBT_MULTIPLE)) {
- for (nskey = skeyp->size, tskeyp = (DBT *)skeyp->data;
- nskey > 0;
- nskey--, tskeyp++)
- FREE_IF_NEEDED(env, tskeyp);
- }
- FREE_IF_NEEDED(env, skeyp);
- }
- if (all_skeys != NULL)
- __os_free(env, all_skeys);
- return (ret);
-}
-
-/*
- * __dbc_put --
- * Put using a cursor.
- *
- * PUBLIC: int __dbc_put __P((DBC *, DBT *, DBT *, u_int32_t));
- */
-int
-__dbc_put(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- DB *dbp;
- int ret;
-
- dbp = dbc->dbp;
- ret = 0;
-
- /*
- * Putting to secondary indices is forbidden; when we need to
- * internally update one, we're called with a private flag,
- * DB_UPDATE_SECONDARY, which does the right thing but won't return an
- * error during flag checking.
- *
- * As a convenience, many places that want the default DB_KEYLAST
- * behavior call DBC->put with flags == 0. Protect lower-level code
- * here by translating that.
- *
- * Lastly, the DB_OVERWRITE_DUP flag is equivalent to DB_KEYLAST unless
- * there are sorted duplicates. Limit the number of places that need
- * to test for it explicitly.
- */
- if (flags == DB_UPDATE_SECONDARY || flags == 0 ||
- (flags == DB_OVERWRITE_DUP && !F_ISSET(dbp, DB_AM_DUPSORT)))
- flags = DB_KEYLAST;
-
- CDB_LOCKING_INIT(dbc->env, dbc);
-
- /*
- * Check to see if we are a primary and have secondary indices.
- * If we are not, we save ourselves a good bit of trouble and
- * just skip to the "normal" put.
- */
- if (DB_IS_PRIMARY(dbp) &&
- ((ret = __dbc_put_primary(dbc, key, data, flags)) != 0))
- return (ret);
-
- /*
- * If this is an append operation, the insert was done prior to the
- * secondary updates, so we are finished.
- */
- if (flags == DB_APPEND)
- return (ret);
-
-#ifdef HAVE_COMPRESSION
- if (DB_IS_COMPRESSED(dbp))
- return (__bamc_compress_put(dbc, key, data, flags));
-#endif
-
- return (__dbc_iput(dbc, key, data, flags));
-}
-
-/*
- * __dbc_iput --
- * Implementation of put using a cursor.
- *
- * PUBLIC: int __dbc_iput __P((DBC *, DBT *, DBT *, u_int32_t));
- */
-int
-__dbc_iput(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- DBC *dbc_n, *oldopd, *opd;
- db_pgno_t pgno;
- int ret, t_ret;
- u_int32_t tmp_flags;
-
- /*
- * Cursor Cleanup Note:
- * All of the cursors passed to the underlying access methods by this
- * routine are duplicated cursors. On return, any referenced pages
- * will be discarded, and, if the cursor is not intended to be used
- * again, the close function will be called. So, pages/locks that
- * the cursor references do not need to be resolved by the underlying
- * functions.
- */
- dbc_n = NULL;
- ret = t_ret = 0;
-
- /*
- * If we have an off-page duplicates cursor, and the operation applies
- * to it, perform the operation. Duplicate the cursor and call the
- * underlying function.
- *
- * Off-page duplicate trees are locked in the primary tree, that is,
- * we acquire a write lock in the primary tree and no locks in the
- * off-page dup tree. If the put operation is done in an off-page
- * duplicate tree, call the primary cursor's upgrade routine first.
- */
- if (dbc->internal->opd != NULL &&
- (flags == DB_AFTER || flags == DB_BEFORE || flags == DB_CURRENT)) {
- /*
- * A special case for hash off-page duplicates. Hash doesn't
- * support (and is documented not to support) put operations
- * relative to a cursor which references an already deleted
- * item. For consistency, apply the same criteria to off-page
- * duplicates as well.
- */
- if (dbc->dbtype == DB_HASH && F_ISSET(
- ((BTREE_CURSOR *)(dbc->internal->opd->internal)),
- C_DELETED)) {
- ret = DB_NOTFOUND;
- goto err;
- }
-
- if ((ret = dbc->am_writelock(dbc)) != 0 ||
- (ret = __dbc_dup(dbc, &dbc_n, DB_POSITION)) != 0)
- goto err;
- opd = dbc_n->internal->opd;
- if ((ret = opd->am_put(
- opd, key, data, flags, NULL)) != 0)
- goto err;
- goto done;
- }
-
- /*
- * Perform an operation on the main cursor. Duplicate the cursor,
- * and call the underlying function.
- */
- if (flags == DB_AFTER || flags == DB_BEFORE || flags == DB_CURRENT)
- tmp_flags = DB_POSITION;
- else
- tmp_flags = 0;
-
- /*
- * If this cursor is going to be closed immediately, we don't
- * need to take precautions to clean it up on error.
- */
- if (F_ISSET(dbc, DBC_TRANSIENT | DBC_PARTITIONED))
- dbc_n = dbc;
- else if ((ret = __dbc_idup(dbc, &dbc_n, tmp_flags)) != 0)
- goto err;
-
- pgno = PGNO_INVALID;
- if ((ret = dbc_n->am_put(dbc_n, key, data, flags, &pgno)) != 0)
- goto err;
-
- /*
- * We may be referencing a new off-page duplicates tree. Acquire
- * a new cursor and call the underlying function.
- */
- if (pgno != PGNO_INVALID) {
- oldopd = dbc_n->internal->opd;
- if ((ret = __dbc_newopd(dbc, pgno, oldopd, &opd)) != 0) {
- dbc_n->internal->opd = opd;
- goto err;
- }
-
- dbc_n->internal->opd = opd;
- opd->internal->pdbc = dbc_n;
-
- if (flags == DB_NOOVERWRITE)
- flags = DB_KEYLAST;
- if ((ret = opd->am_put(
- opd, key, data, flags, NULL)) != 0)
- goto err;
- }
-
-done:
-err: /* Cleanup and cursor resolution. */
- if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
-}
-
-/*
- * __dbc_del_oldskey --
- * Delete an old secondary key, if necessary.
- * Returns DB_KEYEXIST if the new and old keys match..
- */
-static int
-__dbc_del_oldskey(sdbp, dbc, skey, pkey, olddata)
- DB *sdbp;
- DBC *dbc;
- DBT *skey, *pkey, *olddata;
-{
- DB *dbp;
- DBC *sdbc;
- DBT *toldskeyp, *tskeyp;
- DBT oldskey, temppkey, tempskey;
- ENV *env;
- int ret, t_ret;
- u_int32_t i, noldskey, nsame, nskey, rmw;
-
- sdbc = NULL;
- dbp = sdbp->s_primary;
- env = dbp->env;
- nsame = 0;
- rmw = STD_LOCKING(dbc) ? DB_RMW : 0;
-
- /*
- * Get the old secondary key.
- */
- memset(&oldskey, 0, sizeof(DBT));
- if ((ret = sdbp->s_callback(sdbp, pkey, olddata, &oldskey)) != 0) {
- if (ret == DB_DONOTINDEX ||
- (F_ISSET(&oldskey, DB_DBT_MULTIPLE) && oldskey.size == 0))
- /* There's no old key to delete. */
- ret = 0;
- return (ret);
- }
-
- if (F_ISSET(&oldskey, DB_DBT_MULTIPLE)) {
-#ifdef DIAGNOSTIC
- __db_check_skeyset(sdbp, &oldskey);
-#endif
- toldskeyp = (DBT *)oldskey.data;
- noldskey = oldskey.size;
- } else {
- toldskeyp = &oldskey;
- noldskey = 1;
- }
-
- if (F_ISSET(skey, DB_DBT_MULTIPLE)) {
- nskey = skey->size;
- skey = (DBT *)skey->data;
- } else
- nskey = F_ISSET(skey, DB_DBT_ISSET) ? 1 : 0;
-
- for (; noldskey > 0 && ret == 0; noldskey--, toldskeyp++) {
- /*
- * Check whether this old secondary key is also a new key
- * before we delete it. Note that bt_compare is (and must be)
- * set no matter what access method we're in.
- */
- for (i = 0, tskeyp = skey; i < nskey; i++, tskeyp++)
- if (((BTREE *)sdbp->bt_internal)->bt_compare(sdbp,
- toldskeyp, tskeyp) == 0) {
- nsame++;
- F_CLR(tskeyp, DB_DBT_ISSET);
- break;
- }
-
- if (i < nskey) {
- FREE_IF_NEEDED(env, toldskeyp);
- continue;
- }
-
- if (sdbc == NULL) {
- if ((ret = __db_cursor_int(sdbp,
- dbc->thread_info, dbc->txn, sdbp->type,
- PGNO_INVALID, 0, dbc->locker, &sdbc)) != 0)
- goto err;
- if (CDB_LOCKING(env)) {
- DB_ASSERT(env,
- sdbc->mylock.off == LOCK_INVALID);
- F_SET(sdbc, DBC_WRITER);
- }
- }
-
- /*
- * Don't let c_get(DB_GET_BOTH) stomp on our data. Use
- * temporary DBTs instead.
- */
- SWAP_IF_NEEDED(sdbp, pkey);
- DB_INIT_DBT(temppkey, pkey->data, pkey->size);
- DB_INIT_DBT(tempskey, toldskeyp->data, toldskeyp->size);
- if ((ret = __dbc_get(sdbc,
- &tempskey, &temppkey, rmw | DB_GET_BOTH)) == 0)
- ret = __dbc_del(sdbc, DB_UPDATE_SECONDARY);
- else if (ret == DB_NOTFOUND)
- ret = __db_secondary_corrupt(dbp);
- SWAP_IF_NEEDED(sdbp, pkey);
- FREE_IF_NEEDED(env, toldskeyp);
- }
-
-err: for (; noldskey > 0; noldskey--, toldskeyp++)
- FREE_IF_NEEDED(env, toldskeyp);
- FREE_IF_NEEDED(env, &oldskey);
- if (sdbc != NULL && (t_ret = __dbc_close(sdbc)) != 0 && ret == 0)
- ret = t_ret;
- if (ret == 0 && nsame == nskey)
- return (DB_KEYEXIST);
- return (ret);
-}
-
-/*
- * __db_duperr()
- * Error message: we don't currently support sorted duplicate duplicates.
- * PUBLIC: int __db_duperr __P((DB *, u_int32_t));
- */
-int
-__db_duperr(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- /*
- * If we run into this error while updating a secondary index,
- * don't yell--there's no clean way to pass DB_NODUPDATA in along
- * with DB_UPDATE_SECONDARY, but we may run into this problem
- * in a normal, non-error course of events.
- *
- * !!!
- * If and when we ever permit duplicate duplicates in sorted-dup
- * databases, we need to either change the secondary index code
- * to check for dup dups, or we need to maintain the implicit
- * "DB_NODUPDATA" behavior for databases with DB_AM_SECONDARY set.
- */
- if (flags != DB_NODUPDATA && !F_ISSET(dbp, DB_AM_SECONDARY))
- __db_errx(dbp->env,
- "Duplicate data items are not supported with sorted data");
- return (DB_KEYEXIST);
-}
-
-/*
- * __dbc_cleanup --
- * Clean up duplicate cursors.
- *
- * PUBLIC: int __dbc_cleanup __P((DBC *, DBC *, int));
- */
-int
-__dbc_cleanup(dbc, dbc_n, failed)
- DBC *dbc, *dbc_n;
- int failed;
-{
- DB *dbp;
- DBC *opd;
- DBC_INTERNAL *internal;
- DB_MPOOLFILE *mpf;
- int ret, t_ret;
-
- dbp = dbc->dbp;
- mpf = dbp->mpf;
- internal = dbc->internal;
- ret = 0;
-
- /* Discard any pages we're holding. */
- if (internal->page != NULL) {
- if ((t_ret = __memp_fput(mpf, dbc->thread_info,
- internal->page, dbc->priority)) != 0 && ret == 0)
- ret = t_ret;
- internal->page = NULL;
- }
- opd = internal->opd;
- if (opd != NULL && opd->internal->page != NULL) {
- if ((t_ret = __memp_fput(mpf, dbc->thread_info,
- opd->internal->page, dbc->priority)) != 0 && ret == 0)
- ret = t_ret;
- opd->internal->page = NULL;
- }
-
- /*
- * If dbc_n is NULL, there's no internal cursor swapping to be done
- * and no dbc_n to close--we probably did the entire operation on an
- * offpage duplicate cursor. Just return.
- *
- * If dbc and dbc_n are the same, we're either inside a DB->{put/get}
- * operation, and as an optimization we performed the operation on
- * the main cursor rather than on a duplicated one, or we're in a
- * bulk get that can't have moved the cursor (DB_MULTIPLE with the
- * initial c_get operation on an off-page dup cursor). Just
- * return--either we know we didn't move the cursor, or we're going
- * to close it before we return to application code, so we're sure
- * not to visibly violate the "cursor stays put on error" rule.
- */
- if (dbc_n == NULL || dbc == dbc_n)
- return (ret);
-
- if (dbc_n->internal->page != NULL) {
- if ((t_ret = __memp_fput(mpf, dbc->thread_info,
- dbc_n->internal->page, dbc->priority)) != 0 && ret == 0)
- ret = t_ret;
- dbc_n->internal->page = NULL;
- }
- opd = dbc_n->internal->opd;
- if (opd != NULL && opd->internal->page != NULL) {
- if ((t_ret = __memp_fput(mpf, dbc->thread_info,
- opd->internal->page, dbc->priority)) != 0 && ret == 0)
- ret = t_ret;
- opd->internal->page = NULL;
- }
-
- /*
- * If we didn't fail before entering this routine or just now when
- * freeing pages, swap the interesting contents of the old and new
- * cursors.
- */
- if (!failed && ret == 0) {
- if (opd != NULL)
- opd->internal->pdbc = dbc;
- if (internal->opd != NULL)
- internal->opd->internal->pdbc = dbc_n;
- dbc->internal = dbc_n->internal;
- dbc_n->internal = internal;
- }
-
- /*
- * Close the cursor we don't care about anymore. The close can fail,
- * but we only expect DB_LOCK_DEADLOCK failures. This violates our
- * "the cursor is unchanged on error" semantics, but since all you can
- * do with a DB_LOCK_DEADLOCK failure is close the cursor, I believe
- * that's OK.
- *
- * XXX
- * There's no way to recover from failure to close the old cursor.
- * All we can do is move to the new position and return an error.
- *
- * XXX
- * We might want to consider adding a flag to the cursor, so that any
- * subsequent operations other than close just return an error?
- */
- if ((t_ret = __dbc_close(dbc_n)) != 0 && ret == 0)
- ret = t_ret;
-
- /*
- * If this was an update that is supporting dirty reads
- * then we may have just swapped our read for a write lock
- * which is held by the surviving cursor. We need
- * to explicitly downgrade this lock. The closed cursor
- * may only have had a read lock.
- */
- if (F_ISSET(dbp, DB_AM_READ_UNCOMMITTED) &&
- dbc->internal->lock_mode == DB_LOCK_WRITE) {
- if ((t_ret =
- __TLPUT(dbc, dbc->internal->lock)) != 0 && ret == 0)
- ret = t_ret;
- if (t_ret == 0)
- dbc->internal->lock_mode = DB_LOCK_WWRITE;
- if (dbc->internal->page != NULL && (t_ret =
- __memp_shared(dbp->mpf, dbc->internal->page)) != 0 &&
- ret == 0)
- ret = t_ret;
- }
-
- return (ret);
-}
-
-/*
- * __dbc_secondary_get_pp --
- * This wrapper function for DBC->pget() is the DBC->get() function
- * for a secondary index cursor.
- *
- * PUBLIC: int __dbc_secondary_get_pp __P((DBC *, DBT *, DBT *, u_int32_t));
- */
-int
-__dbc_secondary_get_pp(dbc, skey, data, flags)
- DBC *dbc;
- DBT *skey, *data;
- u_int32_t flags;
-{
- DB_ASSERT(dbc->env, F_ISSET(dbc->dbp, DB_AM_SECONDARY));
- return (__dbc_pget_pp(dbc, skey, NULL, data, flags));
-}
-
-/*
- * __dbc_pget --
- * Get a primary key/data pair through a secondary index.
- *
- * PUBLIC: int __dbc_pget __P((DBC *, DBT *, DBT *, DBT *, u_int32_t));
- */
-int
-__dbc_pget(dbc, skey, pkey, data, flags)
- DBC *dbc;
- DBT *skey, *pkey, *data;
- u_int32_t flags;
-{
- DB *pdbp, *sdbp;
- DBC *dbc_n, *pdbc;
- DBT nullpkey;
- u_int32_t save_pkey_flags, tmp_flags, tmp_read_locking, tmp_rmw;
- int pkeymalloc, ret, t_ret;
-
- sdbp = dbc->dbp;
- pdbp = sdbp->s_primary;
- dbc_n = NULL;
- pkeymalloc = t_ret = 0;
-
- /*
- * The challenging part of this function is getting the behavior
- * right for all the various permutations of DBT flags. The
- * next several blocks handle the various cases we need to
- * deal with specially.
- */
-
- /*
- * We may be called with a NULL pkey argument, if we've been
- * wrapped by a 2-DBT get call. If so, we need to use our
- * own DBT.
- */
- if (pkey == NULL) {
- memset(&nullpkey, 0, sizeof(DBT));
- pkey = &nullpkey;
- }
-
- /* Clear OR'd in additional bits so we can check for flag equality. */
- tmp_rmw = LF_ISSET(DB_RMW);
- LF_CLR(DB_RMW);
-
- SET_READ_LOCKING_FLAGS(dbc, tmp_read_locking);
- /*
- * DB_GET_RECNO is a special case, because we're interested not in
- * the primary key/data pair, but rather in the primary's record
- * number.
- */
- if (flags == DB_GET_RECNO) {
- if (tmp_rmw)
- F_SET(dbc, DBC_RMW);
- F_SET(dbc, tmp_read_locking);
- ret = __dbc_pget_recno(dbc, pkey, data, flags);
- if (tmp_rmw)
- F_CLR(dbc, DBC_RMW);
- /* Clear the temp flags, but leave WAS_READ_COMMITTED. */
- F_CLR(dbc, tmp_read_locking & ~DBC_WAS_READ_COMMITTED);
- return (ret);
- }
-
- /*
- * If the DBTs we've been passed don't have any of the
- * user-specified memory management flags set, we want to make sure
- * we return values using the DBTs dbc->rskey, dbc->rkey, and
- * dbc->rdata, respectively.
- *
- * There are two tricky aspects to this: first, we need to pass
- * skey and pkey *in* to the initial c_get on the secondary key,
- * since either or both may be looked at by it (depending on the
- * get flag). Second, we must not use a normal DB->get call
- * on the secondary, even though that's what we want to accomplish,
- * because the DB handle may be free-threaded. Instead,
- * we open a cursor, then take steps to ensure that we actually use
- * the rkey/rdata from the *secondary* cursor.
- *
- * We accomplish all this by passing in the DBTs we started out
- * with to the c_get, but swapping the contents of rskey and rkey,
- * respectively, into rkey and rdata; __db_ret will treat them like
- * the normal key/data pair in a c_get call, and will realloc them as
- * need be (this is "step 1"). Then, for "step 2", we swap back
- * rskey/rkey/rdata to normal, and do a get on the primary with the
- * secondary dbc appointed as the owner of the returned-data memory.
- *
- * Note that in step 2, we copy the flags field in case we need to
- * pass down a DB_DBT_PARTIAL or other flag that is compatible with
- * letting DB do the memory management.
- */
-
- /*
- * It is correct, though slightly sick, to attempt a partial get of a
- * primary key. However, if we do so here, we'll never find the
- * primary record; clear the DB_DBT_PARTIAL field of pkey just for the
- * duration of the next call.
- */
- save_pkey_flags = pkey->flags;
- F_CLR(pkey, DB_DBT_PARTIAL);
-
- /*
- * Now we can go ahead with the meat of this call. First, get the
- * primary key from the secondary index. (What exactly we get depends
- * on the flags, but the underlying cursor get will take care of the
- * dirty work.) Duplicate the cursor, in case the later get on the
- * primary fails.
- */
- switch (flags) {
- case DB_CURRENT:
- case DB_GET_BOTHC:
- case DB_NEXT:
- case DB_NEXT_DUP:
- case DB_NEXT_NODUP:
- case DB_PREV:
- case DB_PREV_DUP:
- case DB_PREV_NODUP:
- tmp_flags = DB_POSITION;
- break;
- default:
- tmp_flags = 0;
- break;
- }
-
- if (F_ISSET(dbc, DBC_PARTITIONED | DBC_TRANSIENT))
- dbc_n = dbc;
- else if ((ret = __dbc_dup(dbc, &dbc_n, tmp_flags)) != 0)
- return (ret);
-
- F_SET(dbc_n, DBC_TRANSIENT);
-
- if (tmp_rmw)
- F_SET(dbc_n, DBC_RMW);
- F_SET(dbc_n, tmp_read_locking);
-
- /*
- * If we've been handed a primary key, it will be in native byte order,
- * so we need to swap it before reading from the secondary.
- */
- if (flags == DB_GET_BOTH || flags == DB_GET_BOTHC ||
- flags == DB_GET_BOTH_RANGE)
- SWAP_IF_NEEDED(sdbp, pkey);
-
-retry: /* Step 1. */
- dbc_n->rdata = dbc->rkey;
- dbc_n->rkey = dbc->rskey;
- ret = __dbc_get(dbc_n, skey, pkey, flags);
- /* Restore pkey's flags in case we stomped the PARTIAL flag. */
- pkey->flags = save_pkey_flags;
-
- /*
- * We need to swap the primary key to native byte order if we read it
- * successfully, or if we swapped it on entry above. We can't return
- * with the application's data modified.
- */
- if (ret == 0 || flags == DB_GET_BOTH || flags == DB_GET_BOTHC ||
- flags == DB_GET_BOTH_RANGE)
- SWAP_IF_NEEDED(sdbp, pkey);
-
- if (ret != 0)
- goto err;
-
- /*
- * Now we're ready for "step 2". If either or both of pkey and data do
- * not have memory management flags set--that is, if DB is managing
- * their memory--we need to swap around the rkey/rdata structures so
- * that we don't wind up trying to use memory managed by the primary
- * database cursor, which we'll close before we return.
- *
- * !!!
- * If you're carefully following the bouncing ball, you'll note that in
- * the DB-managed case, the buffer hanging off of pkey is the same as
- * dbc->rkey->data. This is just fine; we may well realloc and stomp
- * on it when we return, if we're doing a DB_GET_BOTH and need to
- * return a different partial or key (depending on the comparison
- * function), but this is safe.
- *
- * !!!
- * We need to use __db_cursor_int here rather than simply calling
- * pdbp->cursor, because otherwise, if we're in CDB, we'll allocate a
- * new locker ID and leave ourselves open to deadlocks. (Even though
- * we're only acquiring read locks, we'll still block if there are any
- * waiters.)
- */
- if ((ret = __db_cursor_int(pdbp, dbc->thread_info,
- dbc->txn, pdbp->type, PGNO_INVALID, 0, dbc->locker, &pdbc)) != 0)
- goto err;
-
- F_SET(pdbc, tmp_read_locking |
- F_ISSET(dbc, DBC_READ_UNCOMMITTED | DBC_READ_COMMITTED | DBC_RMW));
-
- /*
- * We're about to use pkey a second time. If DB_DBT_MALLOC is set on
- * it, we'll leak the memory we allocated the first time. Thus, set
- * DB_DBT_REALLOC instead so that we reuse that memory instead of
- * leaking it.
- *
- * Alternatively, if the application is handling copying for pkey, we
- * need to take a copy now. The copy will be freed on exit from
- * __dbc_pget_pp (and we must be coming through there if DB_DBT_USERCOPY
- * is set). In the case of DB_GET_BOTH_RANGE, the pkey supplied by
- * the application has already been copied in but the value may have
- * changed in the search. In that case, free the original copy and get
- * a new one.
- *
- * !!!
- * This assumes that the user must always specify a compatible realloc
- * function if a malloc function is specified. I think this is a
- * reasonable requirement.
- */
- if (F_ISSET(pkey, DB_DBT_MALLOC)) {
- F_CLR(pkey, DB_DBT_MALLOC);
- F_SET(pkey, DB_DBT_REALLOC);
- pkeymalloc = 1;
- } else if (F_ISSET(pkey, DB_DBT_USERCOPY)) {
- if (flags == DB_GET_BOTH_RANGE)
- __dbt_userfree(sdbp->env, NULL, pkey, NULL);
- if ((ret = __dbt_usercopy(sdbp->env, pkey)) != 0)
- goto err;
- }
-
- /*
- * Do the actual get. Set DBC_TRANSIENT since we don't care about
- * preserving the position on error, and it's faster. SET_RET_MEM so
- * that the secondary DBC owns any returned-data memory.
- */
- F_SET(pdbc, DBC_TRANSIENT);
- SET_RET_MEM(pdbc, dbc);
- ret = __dbc_get(pdbc, pkey, data, DB_SET);
-
- /*
- * If the item wasn't found in the primary, this is a bug; our
- * secondary has somehow gotten corrupted, and contains elements that
- * don't correspond to anything in the primary. Complain.
- */
-
- /* Now close the primary cursor. */
- if ((t_ret = __dbc_close(pdbc)) != 0 && ret == 0)
- ret = t_ret;
-
- else if (ret == DB_NOTFOUND) {
- if (!F_ISSET(pdbc, DBC_READ_UNCOMMITTED))
- ret = __db_secondary_corrupt(pdbp);
- else switch (flags) {
- case DB_GET_BOTHC:
- case DB_NEXT:
- case DB_NEXT_DUP:
- case DB_NEXT_NODUP:
- case DB_PREV:
- case DB_PREV_DUP:
- case DB_PREV_NODUP:
- goto retry;
- default:
- break;
- }
- }
-
-err: /* Cleanup and cursor resolution. */
- if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0)
- ret = t_ret;
- if (pkeymalloc) {
- /*
- * If pkey had a MALLOC flag, we need to restore it; otherwise,
- * if the user frees the buffer but reuses the DBT without
- * NULL'ing its data field or changing the flags, we may drop
- * core.
- */
- F_CLR(pkey, DB_DBT_REALLOC);
- F_SET(pkey, DB_DBT_MALLOC);
- }
-
- return (ret);
-}
-
-/*
- * __dbc_pget_recno --
- * Perform a DB_GET_RECNO c_pget on a secondary index. Returns
- * the secondary's record number in the pkey field and the primary's
- * in the data field.
- */
-static int
-__dbc_pget_recno(sdbc, pkey, data, flags)
- DBC *sdbc;
- DBT *pkey, *data;
- u_int32_t flags;
-{
- DB *pdbp, *sdbp;
- DBC *pdbc;
- DBT discardme, primary_key;
- ENV *env;
- db_recno_t oob;
- u_int32_t rmw;
- int ret, t_ret;
-
- sdbp = sdbc->dbp;
- pdbp = sdbp->s_primary;
- env = sdbp->env;
- pdbc = NULL;
- ret = t_ret = 0;
-
- rmw = LF_ISSET(DB_RMW);
-
- memset(&discardme, 0, sizeof(DBT));
- F_SET(&discardme, DB_DBT_USERMEM | DB_DBT_PARTIAL);
-
- oob = RECNO_OOB;
-
- /*
- * If the primary is an rbtree, we want its record number, whether
- * or not the secondary is one too. Fetch the recno into "data".
- *
- * If it's not an rbtree, return RECNO_OOB in "data".
- */
- if (F_ISSET(pdbp, DB_AM_RECNUM)) {
- /*
- * Get the primary key, so we can find the record number
- * in the primary. (We're uninterested in the secondary key.)
- */
- memset(&primary_key, 0, sizeof(DBT));
- F_SET(&primary_key, DB_DBT_MALLOC);
- if ((ret = __dbc_get(sdbc,
- &discardme, &primary_key, rmw | DB_CURRENT)) != 0)
- return (ret);
-
- /*
- * Open a cursor on the primary, set it to the right record,
- * and fetch its recno into "data".
- *
- * (See __dbc_pget for comments on the use of __db_cursor_int.)
- *
- * SET_RET_MEM so that the secondary DBC owns any returned-data
- * memory.
- */
- if ((ret = __db_cursor_int(pdbp, sdbc->thread_info, sdbc->txn,
- pdbp->type, PGNO_INVALID, 0, sdbc->locker, &pdbc)) != 0)
- goto perr;
- SET_RET_MEM(pdbc, sdbc);
- if ((ret = __dbc_get(pdbc,
- &primary_key, &discardme, rmw | DB_SET)) != 0)
- goto perr;
-
- ret = __dbc_get(pdbc, &discardme, data, rmw | DB_GET_RECNO);
-
-perr: __os_ufree(env, primary_key.data);
- if (pdbc != NULL &&
- (t_ret = __dbc_close(pdbc)) != 0 && ret == 0)
- ret = t_ret;
- if (ret != 0)
- return (ret);
- } else if ((ret = __db_retcopy(env, data, &oob,
- sizeof(oob), &sdbc->rkey->data, &sdbc->rkey->ulen)) != 0)
- return (ret);
-
- /*
- * If the secondary is an rbtree, we want its record number, whether
- * or not the primary is one too. Fetch the recno into "pkey".
- *
- * If it's not an rbtree, return RECNO_OOB in "pkey".
- */
- if (F_ISSET(sdbp, DB_AM_RECNUM))
- return (__dbc_get(sdbc, &discardme, pkey, flags));
- else
- return (__db_retcopy(env, pkey, &oob,
- sizeof(oob), &sdbc->rdata->data, &sdbc->rdata->ulen));
-}
-
-/*
- * __db_wrlock_err -- do not have a write lock.
- */
-static int
-__db_wrlock_err(env)
- ENV *env;
-{
- __db_errx(env, "Write attempted on read-only cursor");
- return (EPERM);
-}
-
-/*
- * __dbc_del_secondary --
- * Perform a delete operation on a secondary index: call through
- * to the primary and delete the primary record that this record
- * points to.
- *
- * Note that deleting the primary record will call c_del on all
- * the secondaries, including this one; thus, it is not necessary
- * to execute both this function and an actual delete.
- */
-static int
-__dbc_del_secondary(dbc)
- DBC *dbc;
-{
- DB *pdbp;
- DBC *pdbc;
- DBT skey, pkey;
- ENV *env;
- int ret, t_ret;
- u_int32_t rmw;
-
- pdbp = dbc->dbp->s_primary;
- env = pdbp->env;
- rmw = STD_LOCKING(dbc) ? DB_RMW : 0;
-
- /*
- * Get the current item that we're pointing at.
- * We don't actually care about the secondary key, just
- * the primary.
- */
- memset(&skey, 0, sizeof(DBT));
- memset(&pkey, 0, sizeof(DBT));
- F_SET(&skey, DB_DBT_PARTIAL | DB_DBT_USERMEM);
- if ((ret = __dbc_get(dbc, &skey, &pkey, DB_CURRENT)) != 0)
- return (ret);
-
- SWAP_IF_NEEDED(dbc->dbp, &pkey);
-
- /*
- * Create a cursor on the primary with our locker ID,
- * so that when it calls back, we don't conflict.
- *
- * We create a cursor explicitly because there's no
- * way to specify the same locker ID if we're using
- * locking but not transactions if we use the DB->del
- * interface. This shouldn't be any less efficient
- * anyway.
- */
- if ((ret = __db_cursor_int(pdbp, dbc->thread_info, dbc->txn,
- pdbp->type, PGNO_INVALID, 0, dbc->locker, &pdbc)) != 0)
- return (ret);
-
- /*
- * See comment in __dbc_put--if we're in CDB,
- * we already hold the locks we need, and we need to flag
- * the cursor as a WRITER so we don't run into errors
- * when we try to delete.
- */
- if (CDB_LOCKING(env)) {
- DB_ASSERT(env, pdbc->mylock.off == LOCK_INVALID);
- F_SET(pdbc, DBC_WRITER);
- }
-
- /*
- * Set the new cursor to the correct primary key. Then
- * delete it. We don't really care about the datum;
- * just reuse our skey DBT.
- *
- * If the primary get returns DB_NOTFOUND, something is amiss--
- * every record in the secondary should correspond to some record
- * in the primary.
- */
- if ((ret = __dbc_get(pdbc, &pkey, &skey, DB_SET | rmw)) == 0)
- ret = __dbc_del(pdbc, 0);
- else if (ret == DB_NOTFOUND)
- ret = __db_secondary_corrupt(pdbp);
-
- if ((t_ret = __dbc_close(pdbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __dbc_del_primary --
- * Perform a delete operation on a primary index. Loop through
- * all the secondary indices which correspond to this primary
- * database, and delete any secondary keys that point at the current
- * record.
- *
- * PUBLIC: int __dbc_del_primary __P((DBC *));
- */
-int
-__dbc_del_primary(dbc)
- DBC *dbc;
-{
- DB *dbp, *sdbp;
- DBC *sdbc;
- DBT *tskeyp;
- DBT data, pkey, skey, temppkey, tempskey;
- ENV *env;
- u_int32_t nskey, rmw;
- int ret, t_ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
- sdbp = NULL;
- rmw = STD_LOCKING(dbc) ? DB_RMW : 0;
-
- /*
- * If we're called at all, we have at least one secondary.
- * (Unfortunately, we can't assert this without grabbing the mutex.)
- * Get the current record so that we can construct appropriate
- * secondary keys as needed.
- */
- memset(&pkey, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
- if ((ret = __dbc_get(dbc, &pkey, &data, DB_CURRENT)) != 0)
- return (ret);
-
- memset(&skey, 0, sizeof(DBT));
- for (ret = __db_s_first(dbp, &sdbp);
- sdbp != NULL && ret == 0;
- ret = __db_s_next(&sdbp, dbc->txn)) {
- /*
- * Get the secondary key for this secondary and the current
- * item.
- */
- if ((ret = sdbp->s_callback(sdbp, &pkey, &data, &skey)) != 0) {
- /* Not indexing is equivalent to an empty key set. */
- if (ret == DB_DONOTINDEX) {
- F_SET(&skey, DB_DBT_MULTIPLE);
- skey.size = 0;
- } else /* We had a substantive error. Bail. */
- goto err;
- }
-
-#ifdef DIAGNOSTIC
- if (F_ISSET(&skey, DB_DBT_MULTIPLE))
- __db_check_skeyset(sdbp, &skey);
-#endif
-
- if (F_ISSET(&skey, DB_DBT_MULTIPLE)) {
- tskeyp = (DBT *)skey.data;
- nskey = skey.size;
- if (nskey == 0)
- continue;
- } else {
- tskeyp = &skey;
- nskey = 1;
- }
-
- /* Open a secondary cursor. */
- if ((ret = __db_cursor_int(sdbp,
- dbc->thread_info, dbc->txn, sdbp->type,
- PGNO_INVALID, 0, dbc->locker, &sdbc)) != 0)
- goto err;
- /* See comment above and in __dbc_put. */
- if (CDB_LOCKING(env)) {
- DB_ASSERT(env, sdbc->mylock.off == LOCK_INVALID);
- F_SET(sdbc, DBC_WRITER);
- }
-
- for (; nskey > 0; nskey--, tskeyp++) {
- /*
- * Set the secondary cursor to the appropriate item.
- * Delete it.
- *
- * We want to use DB_RMW if locking is on; it's only
- * legal then, though.
- *
- * !!!
- * Don't stomp on any callback-allocated buffer in skey
- * when we do a c_get(DB_GET_BOTH); use a temp DBT
- * instead. Similarly, don't allow pkey to be
- * invalidated when the cursor is closed.
- */
- DB_INIT_DBT(tempskey, tskeyp->data, tskeyp->size);
- SWAP_IF_NEEDED(sdbp, &pkey);
- DB_INIT_DBT(temppkey, pkey.data, pkey.size);
- if ((ret = __dbc_get(sdbc, &tempskey, &temppkey,
- DB_GET_BOTH | rmw)) == 0)
- ret = __dbc_del(sdbc, DB_UPDATE_SECONDARY);
- else if (ret == DB_NOTFOUND)
- ret = __db_secondary_corrupt(dbp);
- SWAP_IF_NEEDED(sdbp, &pkey);
- FREE_IF_NEEDED(env, tskeyp);
- }
-
- if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0)
- ret = t_ret;
- if (ret != 0)
- goto err;
-
- /*
- * In the common case where there is a single secondary key, we
- * will have freed any application-allocated data in skey
- * already. In the multiple key case, we need to free it here.
- * It is safe to do this twice as the macro resets the data
- * field.
- */
- FREE_IF_NEEDED(env, &skey);
- }
-
-err: if (sdbp != NULL &&
- (t_ret = __db_s_done(sdbp, dbc->txn)) != 0 && ret == 0)
- ret = t_ret;
- FREE_IF_NEEDED(env, &skey);
- return (ret);
-}
-
-/*
- * __dbc_del_foreign --
- * Apply the foreign database constraints for a particular foreign
- * database when an item is being deleted (dbc points at item being deleted
- * in the foreign database.)
- *
- * Delete happens in dbp, check for occurrences of key in pdpb.
- * Terminology:
- * Foreign db = Where delete occurs (dbp).
- * Secondary db = Where references to dbp occur (sdbp, a secondary)
- * Primary db = sdbp's primary database, references to dbp are secondary
- * keys here
- * Foreign Key = Key being deleted in dbp (fkey)
- * Primary Key = Key of the corresponding entry in sdbp's primary (pkey).
- */
-static int
-__dbc_del_foreign(dbc)
- DBC *dbc;
-{
- DB_FOREIGN_INFO *f_info;
- DB *dbp, *pdbp, *sdbp;
- DBC *pdbc, *sdbc;
- DBT data, fkey, pkey;
- ENV *env;
- u_int32_t flags, rmw;
- int changed, ret, t_ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- memset(&fkey, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
- if ((ret = __dbc_get(dbc, &fkey, &data, DB_CURRENT)) != 0)
- return (ret);
-
- LIST_FOREACH(f_info, &(dbp->f_primaries), f_links) {
- sdbp = f_info->dbp;
- pdbp = sdbp->s_primary;
- flags = f_info->flags;
-
- rmw = (STD_LOCKING(dbc) &&
- !LF_ISSET(DB_FOREIGN_ABORT)) ? DB_RMW : 0;
-
- /*
- * Handle CDB locking. Some of this is copied from
- * __dbc_del_primary, but a bit more acrobatics are required.
- * If we're not going to abort, then we need to get a write
- * cursor. If CDB_ALLDB is set, then only one write cursor is
- * allowed and we hold it, so we fudge things and promote the
- * cursor on the other DBs manually, it won't cause a problem.
- * If CDB_ALLDB is not set, then we go through the usual route
- * to make sure we block as necessary. If there are any open
- * read cursors on sdbp, the delete or put call later will
- * block.
- *
- * If NULLIFY is set, we'll need a cursor on the primary to
- * update it with the nullified data. Because primary and
- * secondary dbs share a lock file ID in CDB, we open a cursor
- * on the secondary and then get another writeable cursor on the
- * primary via __db_cursor_int to avoid deadlocking.
- */
- sdbc = pdbc = NULL;
- if (!LF_ISSET(DB_FOREIGN_ABORT) && CDB_LOCKING(env) &&
- !F_ISSET(env->dbenv, DB_ENV_CDB_ALLDB)) {
- ret = __db_cursor(sdbp,
- dbc->thread_info, dbc->txn, &sdbc, DB_WRITECURSOR);
- if (LF_ISSET(DB_FOREIGN_NULLIFY) && ret == 0) {
- ret = __db_cursor_int(pdbp,
- dbc->thread_info, dbc->txn, pdbp->type,
- PGNO_INVALID, 0, dbc->locker, &pdbc);
- F_SET(pdbc, DBC_WRITER);
- }
- } else {
- ret = __db_cursor_int(sdbp, dbc->thread_info, dbc->txn,
- sdbp->type, PGNO_INVALID, 0, dbc->locker, &sdbc);
- if (LF_ISSET(DB_FOREIGN_NULLIFY) && ret == 0)
- ret = __db_cursor_int(pdbp, dbc->thread_info,
- dbc->txn, pdbp->type, PGNO_INVALID, 0,
- dbc->locker, &pdbc);
- }
- if (ret != 0) {
- if (sdbc != NULL)
- (void)__dbc_close(sdbc);
- return (ret);
- }
- if (CDB_LOCKING(env) && F_ISSET(env->dbenv, DB_ENV_CDB_ALLDB)) {
- DB_ASSERT(env, sdbc->mylock.off == LOCK_INVALID);
- F_SET(sdbc, DBC_WRITER);
- if (LF_ISSET(DB_FOREIGN_NULLIFY) && pdbc != NULL) {
- DB_ASSERT(env,
- pdbc->mylock.off == LOCK_INVALID);
- F_SET(pdbc, DBC_WRITER);
- }
- }
-
- /*
- * There are three actions possible when a foreign database has
- * items corresponding to a deleted item:
- * DB_FOREIGN_ABORT - The delete operation should be aborted.
- * DB_FOREIGN_CASCADE - All corresponding foreign items should
- * be deleted.
- * DB_FOREIGN_NULLIFY - A callback needs to be made, allowing
- * the application to modify the data DBT from the
- * associated database. If the callback makes a
- * modification, the updated item needs to replace the
- * original item in the foreign db
- */
- memset(&pkey, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
- ret = __dbc_pget(sdbc, &fkey, &pkey, &data, DB_SET|rmw);
-
- if (ret == DB_NOTFOUND) {
- /* No entry means no constraint */
- ret = __dbc_close(sdbc);
- if (LF_ISSET(DB_FOREIGN_NULLIFY) &&
- (t_ret = __dbc_close(pdbc)) != 0)
- ret = t_ret;
- if (ret != 0)
- return (ret);
- continue;
- } else if (ret != 0) {
- /* Just return the error code from the pget */
- (void)__dbc_close(sdbc);
- if (LF_ISSET(DB_FOREIGN_NULLIFY))
- (void)__dbc_close(pdbc);
- return (ret);
- } else if (LF_ISSET(DB_FOREIGN_ABORT)) {
- /* If the record exists and ABORT is set, we're done */
- if ((ret = __dbc_close(sdbc)) != 0)
- return (ret);
- return (DB_FOREIGN_CONFLICT);
- }
-
- /*
- * There were matching items in the primary DB, and the action
- * is either DB_FOREIGN_CASCADE or DB_FOREIGN_NULLIFY.
- */
- while (ret == 0) {
- if (LF_ISSET(DB_FOREIGN_CASCADE)) {
- /*
- * Don't use the DB_UPDATE_SECONDARY flag,
- * since we want the delete to cascade into the
- * secondary's primary.
- */
- if ((ret = __dbc_del(sdbc, 0)) != 0) {
- __db_err(env, ret,
- "Attempt to execute cascading delete in a foreign index failed");
- break;
- }
- } else if (LF_ISSET(DB_FOREIGN_NULLIFY)) {
- changed = 0;
- if ((ret = f_info->callback(sdbp,
- &pkey, &data, &fkey, &changed)) != 0) {
- __db_err(env, ret,
- "Foreign database application callback");
- break;
- }
-
- /*
- * If the user callback modified the DBT and
- * a put on the primary failed.
- */
- if (changed && (ret = __dbc_put(pdbc,
- &pkey, &data, DB_KEYFIRST)) != 0) {
- __db_err(env, ret,
- "Attempt to overwrite item in foreign database with nullified value failed");
- break;
- }
- }
- /* retrieve the next matching item from the prim. db */
- memset(&pkey, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
- ret = __dbc_pget(sdbc,
- &fkey, &pkey, &data, DB_NEXT_DUP|rmw);
- }
-
- if (ret == DB_NOTFOUND)
- ret = 0;
- if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0)
- ret = t_ret;
- if (LF_ISSET(DB_FOREIGN_NULLIFY) &&
- (t_ret = __dbc_close(pdbc)) != 0 && ret == 0)
- ret = t_ret;
- if (ret != 0)
- return (ret);
- }
-
- return (ret);
-}
-
-/*
- * __db_s_first --
- * Get the first secondary, if any are present, from the primary.
- *
- * PUBLIC: int __db_s_first __P((DB *, DB **));
- */
-int
-__db_s_first(pdbp, sdbpp)
- DB *pdbp, **sdbpp;
-{
- DB *sdbp;
-
- MUTEX_LOCK(pdbp->env, pdbp->mutex);
- sdbp = LIST_FIRST(&pdbp->s_secondaries);
-
- /* See __db_s_next. */
- if (sdbp != NULL)
- sdbp->s_refcnt++;
- MUTEX_UNLOCK(pdbp->env, pdbp->mutex);
-
- *sdbpp = sdbp;
-
- return (0);
-}
-
-/*
- * __db_s_next --
- * Get the next secondary in the list.
- *
- * PUBLIC: int __db_s_next __P((DB **, DB_TXN *));
- */
-int
-__db_s_next(sdbpp, txn)
- DB **sdbpp;
- DB_TXN *txn;
-{
- DB *sdbp, *pdbp, *closeme;
- ENV *env;
- int ret;
-
- /*
- * Secondary indices are kept in a linked list, s_secondaries,
- * off each primary DB handle. If a primary is free-threaded,
- * this list may only be traversed or modified while the primary's
- * thread mutex is held.
- *
- * The tricky part is that we don't want to hold the thread mutex
- * across the full set of secondary puts necessary for each primary
- * put, or we'll wind up essentially single-threading all the puts
- * to the handle; the secondary puts will each take about as
- * long as the primary does, and may require I/O. So we instead
- * hold the thread mutex only long enough to follow one link to the
- * next secondary, and then we release it before performing the
- * actual secondary put.
- *
- * The only danger here is that we might legitimately close a
- * secondary index in one thread while another thread is performing
- * a put and trying to update that same secondary index. To
- * prevent this from happening, we refcount the secondary handles.
- * If close is called on a secondary index handle while we're putting
- * to it, it won't really be closed--the refcount will simply drop,
- * and we'll be responsible for closing it here.
- */
- sdbp = *sdbpp;
- pdbp = sdbp->s_primary;
- env = pdbp->env;
- closeme = NULL;
-
- MUTEX_LOCK(env, pdbp->mutex);
- DB_ASSERT(env, sdbp->s_refcnt != 0);
- if (--sdbp->s_refcnt == 0) {
- LIST_REMOVE(sdbp, s_links);
- closeme = sdbp;
- }
- sdbp = LIST_NEXT(sdbp, s_links);
- if (sdbp != NULL)
- sdbp->s_refcnt++;
- MUTEX_UNLOCK(env, pdbp->mutex);
-
- *sdbpp = sdbp;
-
- /*
- * closeme->close() is a wrapper; call __db_close explicitly.
- */
- if (closeme == NULL)
- ret = 0;
- else
- ret = __db_close(closeme, txn, 0);
-
- return (ret);
-}
-
-/*
- * __db_s_done --
- * Properly decrement the refcount on a secondary database handle we're
- * using, without calling __db_s_next.
- *
- * PUBLIC: int __db_s_done __P((DB *, DB_TXN *));
- */
-int
-__db_s_done(sdbp, txn)
- DB *sdbp;
- DB_TXN *txn;
-{
- DB *pdbp;
- ENV *env;
- int doclose, ret;
-
- pdbp = sdbp->s_primary;
- env = pdbp->env;
- doclose = 0;
-
- MUTEX_LOCK(env, pdbp->mutex);
- DB_ASSERT(env, sdbp->s_refcnt != 0);
- if (--sdbp->s_refcnt == 0) {
- LIST_REMOVE(sdbp, s_links);
- doclose = 1;
- }
- MUTEX_UNLOCK(env, pdbp->mutex);
-
- if (doclose == 0)
- ret = 0;
- else
- ret = __db_close(sdbp, txn, 0);
- return (ret);
-}
-
-/*
- * __db_s_count --
- * Count the number of secondaries associated with a given primary.
- */
-static int
-__db_s_count(pdbp)
- DB *pdbp;
-{
- DB *sdbp;
- ENV *env;
- int count;
-
- env = pdbp->env;
- count = 0;
-
- MUTEX_LOCK(env, pdbp->mutex);
- for (sdbp = LIST_FIRST(&pdbp->s_secondaries);
- sdbp != NULL;
- sdbp = LIST_NEXT(sdbp, s_links))
- ++count;
- MUTEX_UNLOCK(env, pdbp->mutex);
-
- return (count);
-}
-
-/*
- * __db_buildpartial --
- * Build the record that will result after a partial put is applied to
- * an existing record.
- *
- * This should probably be merged with __bam_build, but that requires
- * a little trickery if we plan to keep the overflow-record optimization
- * in that function.
- *
- * PUBLIC: int __db_buildpartial __P((DB *, DBT *, DBT *, DBT *));
- */
-int
-__db_buildpartial(dbp, oldrec, partial, newrec)
- DB *dbp;
- DBT *oldrec, *partial, *newrec;
-{
- ENV *env;
- u_int32_t len, nbytes;
- u_int8_t *buf;
- int ret;
-
- env = dbp->env;
-
- DB_ASSERT(env, F_ISSET(partial, DB_DBT_PARTIAL));
-
- memset(newrec, 0, sizeof(DBT));
-
- nbytes = __db_partsize(oldrec->size, partial);
- newrec->size = nbytes;
-
- if ((ret = __os_malloc(env, nbytes, &buf)) != 0)
- return (ret);
- newrec->data = buf;
-
- /* Nul or pad out the buffer, for any part that isn't specified. */
- memset(buf,
- F_ISSET(dbp, DB_AM_FIXEDLEN) ? ((BTREE *)dbp->bt_internal)->re_pad :
- 0, nbytes);
-
- /* Copy in any leading data from the original record. */
- memcpy(buf, oldrec->data,
- partial->doff > oldrec->size ? oldrec->size : partial->doff);
-
- /* Copy the data from partial. */
- memcpy(buf + partial->doff, partial->data, partial->size);
-
- /* Copy any trailing data from the original record. */
- len = partial->doff + partial->dlen;
- if (oldrec->size > len)
- memcpy(buf + partial->doff + partial->size,
- (u_int8_t *)oldrec->data + len, oldrec->size - len);
-
- return (0);
-}
-
-/*
- * __db_partsize --
- * Given the number of bytes in an existing record and a DBT that
- * is about to be partial-put, calculate the size of the record
- * after the put.
- *
- * This code is called from __bam_partsize.
- *
- * PUBLIC: u_int32_t __db_partsize __P((u_int32_t, DBT *));
- */
-u_int32_t
-__db_partsize(nbytes, data)
- u_int32_t nbytes;
- DBT *data;
-{
-
- /*
- * There are really two cases here:
- *
- * Case 1: We are replacing some bytes that do not exist (i.e., they
- * are past the end of the record). In this case the number of bytes
- * we are replacing is irrelevant and all we care about is how many
- * bytes we are going to add from offset. So, the new record length
- * is going to be the size of the new bytes (size) plus wherever those
- * new bytes begin (doff).
- *
- * Case 2: All the bytes we are replacing exist. Therefore, the new
- * size is the oldsize (nbytes) minus the bytes we are replacing (dlen)
- * plus the bytes we are adding (size).
- */
- if (nbytes < data->doff + data->dlen) /* Case 1 */
- return (data->doff + data->size);
-
- return (nbytes + data->size - data->dlen); /* Case 2 */
-}
-
-#ifdef DIAGNOSTIC
-/*
- * __db_check_skeyset --
- * Diagnostic check that the application's callback returns a set of
- * secondary keys without repeats.
- *
- * PUBLIC: #ifdef DIAGNOSTIC
- * PUBLIC: void __db_check_skeyset __P((DB *, DBT *));
- * PUBLIC: #endif
- */
-void
-__db_check_skeyset(sdbp, skeyp)
- DB *sdbp;
- DBT *skeyp;
-{
- DBT *firstkey, *lastkey, *key1, *key2;
- ENV *env;
-
- env = sdbp->env;
-
- firstkey = (DBT *)skeyp->data;
- lastkey = firstkey + skeyp->size;
- for (key1 = firstkey; key1 < lastkey; key1++)
- for (key2 = key1 + 1; key2 < lastkey; key2++)
- DB_ASSERT(env,
- ((BTREE *)sdbp->bt_internal)->bt_compare(sdbp,
- key1, key2) != 0);
-}
-#endif
diff --git a/db/db_cds.c b/db/db_cds.c
deleted file mode 100644
index 5efda31..0000000
--- a/db/db_cds.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 2000-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_am.h"
-#include "dbinc/lock.h"
-#include "dbinc/txn.h"
-
-static int __cdsgroup_abort __P((DB_TXN *txn));
-static int __cdsgroup_commit __P((DB_TXN *txn, u_int32_t flags));
-static int __cdsgroup_discard __P((DB_TXN *txn, u_int32_t flags));
-static u_int32_t __cdsgroup_id __P((DB_TXN *txn));
-static int __cdsgroup_notsup __P((ENV *env, const char *meth));
-static int __cdsgroup_prepare __P((DB_TXN *txn, u_int8_t *gid));
-static int __cdsgroup_set_name __P((DB_TXN *txn, const char *name));
-static int __cdsgroup_set_timeout
- __P((DB_TXN *txn, db_timeout_t timeout, u_int32_t flags));
-
-/*
- * __cdsgroup_notsup --
- * Error when CDS groups don't support a method.
- */
-static int
-__cdsgroup_notsup(env, meth)
- ENV *env;
- const char *meth;
-{
- __db_errx(env, "CDS groups do not support %s", meth);
- return (DB_OPNOTSUP);
-}
-
-static int
-__cdsgroup_abort(txn)
- DB_TXN *txn;
-{
- return (__cdsgroup_notsup(txn->mgrp->env, "abort"));
-}
-
-static int
-__cdsgroup_commit(txn, flags)
- DB_TXN *txn;
- u_int32_t flags;
-{
- DB_LOCKER *locker;
- DB_LOCKREQ lreq;
- ENV *env;
- int ret, t_ret;
-
- COMPQUIET(flags, 0);
- env = txn->mgrp->env;
-
- /* Check for live cursors. */
- if (txn->cursors != 0) {
- __db_errx(env, "CDS group has active cursors");
- return (EINVAL);
- }
-
- /* We may be holding handle locks; release them. */
- lreq.op = DB_LOCK_PUT_ALL;
- lreq.obj = NULL;
- ret = __lock_vec(env, txn->locker, 0, &lreq, 1, NULL);
-
- env = txn->mgrp->env;
- locker = txn->locker;
- __os_free(env, txn->mgrp);
- __os_free(env, txn);
- if ((t_ret = __lock_id_free(env, locker)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
-}
-
-static int __cdsgroup_discard(txn, flags)
- DB_TXN *txn;
- u_int32_t flags;
-{
- COMPQUIET(flags, 0);
- return (__cdsgroup_notsup(txn->mgrp->env, "discard"));
-}
-
-static u_int32_t __cdsgroup_id(txn)
- DB_TXN *txn;
-{
- return (txn->txnid);
-}
-
-static int __cdsgroup_prepare(txn, gid)
- DB_TXN *txn;
- u_int8_t *gid;
-{
- COMPQUIET(gid, NULL);
- return (__cdsgroup_notsup(txn->mgrp->env, "prepare"));
-}
-
-static int __cdsgroup_set_name(txn, name)
- DB_TXN *txn;
- const char *name;
-{
- COMPQUIET(name, NULL);
- return (__cdsgroup_notsup(txn->mgrp->env, "set_name"));
-}
-
-static int __cdsgroup_set_timeout(txn, timeout, flags)
- DB_TXN *txn;
- db_timeout_t timeout;
- u_int32_t flags;
-{
- COMPQUIET(timeout, 0);
- COMPQUIET(flags, 0);
- return (__cdsgroup_notsup(txn->mgrp->env, "set_timeout"));
-}
-
-/*
- * __cds_txn_begin --
- * ENV->cdsgroup_begin
- *
- * PUBLIC: int __cdsgroup_begin __P((DB_ENV *, DB_TXN **));
- */
-int
-__cdsgroup_begin(dbenv, txnpp)
- DB_ENV *dbenv;
- DB_TXN **txnpp;
-{
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- ENV *env;
- int ret;
-
- env = dbenv->env;
-
- ENV_ILLEGAL_BEFORE_OPEN(env, "cdsgroup_begin");
- if (!CDB_LOCKING(env))
- return (__env_not_config(env, "cdsgroup_begin", DB_INIT_CDB));
-
- ENV_ENTER(env, ip);
- *txnpp = txn = NULL;
- if ((ret = __os_calloc(env, 1, sizeof(DB_TXN), &txn)) != 0)
- goto err;
- /*
- * We need a dummy DB_TXNMGR -- it's the only way to get from a
- * transaction handle to the environment handle.
- */
- if ((ret = __os_calloc(env, 1, sizeof(DB_TXNMGR), &txn->mgrp)) != 0)
- goto err;
- txn->mgrp->env = env;
-
- if ((ret = __lock_id(env, &txn->txnid, &txn->locker)) != 0)
- goto err;
-
- txn->flags = TXN_CDSGROUP;
- txn->abort = __cdsgroup_abort;
- txn->commit = __cdsgroup_commit;
- txn->discard = __cdsgroup_discard;
- txn->id = __cdsgroup_id;
- txn->prepare = __cdsgroup_prepare;
- txn->set_name = __cdsgroup_set_name;
- txn->set_timeout = __cdsgroup_set_timeout;
-
- *txnpp = txn;
-
- if (0) {
-err: if (txn != NULL) {
- if (txn->mgrp != NULL)
- __os_free(env, txn->mgrp);
- __os_free(env, txn);
- }
- }
- ENV_LEAVE(env, ip);
- return (ret);
-}
diff --git a/db/db_conv.c b/db/db_conv.c
deleted file mode 100644
index 4572683..0000000
--- a/db/db_conv.c
+++ /dev/null
@@ -1,733 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/crypto.h"
-#include "dbinc/hmac.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_swap.h"
-#include "dbinc/btree.h"
-#include "dbinc/hash.h"
-#include "dbinc/log.h"
-#include "dbinc/qam.h"
-
-/*
- * __db_pgin --
- * Primary page-swap routine.
- *
- * PUBLIC: int __db_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *));
- */
-int
-__db_pgin(dbenv, pg, pp, cookie)
- DB_ENV *dbenv;
- db_pgno_t pg;
- void *pp;
- DBT *cookie;
-{
- DB dummydb, *dbp;
- DB_CIPHER *db_cipher;
- DB_LSN not_used;
- DB_PGINFO *pginfo;
- ENV *env;
- PAGE *pagep;
- size_t sum_len;
- int is_hmac, ret;
- u_int8_t *chksum;
-
- pginfo = (DB_PGINFO *)cookie->data;
- env = dbenv->env;
- pagep = (PAGE *)pp;
-
- ret = is_hmac = 0;
- chksum = NULL;
- memset(&dummydb, 0, sizeof(DB));
- dbp = &dummydb;
- dbp->dbenv = dbenv;
- dbp->env = env;
- dbp->flags = pginfo->flags;
- dbp->pgsize = pginfo->db_pagesize;
- db_cipher = env->crypto_handle;
- switch (pagep->type) {
- case P_HASHMETA:
- case P_BTREEMETA:
- case P_QAMMETA:
- /*
- * If checksumming is set on the meta-page, we must set
- * it in the dbp.
- */
- if (FLD_ISSET(((DBMETA *)pp)->metaflags, DBMETA_CHKSUM))
- F_SET(dbp, DB_AM_CHKSUM);
- else
- F_CLR(dbp, DB_AM_CHKSUM);
- if (((DBMETA *)pp)->encrypt_alg != 0 ||
- F_ISSET(dbp, DB_AM_ENCRYPT))
- is_hmac = 1;
- /*
- * !!!
- * For all meta pages it is required that the chksum
- * be at the same location. Use BTMETA to get to it
- * for any meta type.
- */
- chksum = ((BTMETA *)pp)->chksum;
- sum_len = DBMETASIZE;
- break;
- case P_INVALID:
- /*
- * We assume that we've read a file hole if we have
- * a zero LSN, zero page number and P_INVALID. Otherwise
- * we have an invalid page that might contain real data.
- */
- if (IS_ZERO_LSN(LSN(pagep)) && pagep->pgno == PGNO_INVALID) {
- sum_len = 0;
- break;
- }
- /* FALLTHROUGH */
- default:
- chksum = P_CHKSUM(dbp, pagep);
- sum_len = pginfo->db_pagesize;
- /*
- * If we are reading in a non-meta page, then if we have
- * a db_cipher then we are using hmac.
- */
- is_hmac = CRYPTO_ON(env) ? 1 : 0;
- break;
- }
-
- /*
- * We expect a checksum error if there was a configuration problem.
- * If there is no configuration problem and we don't get a match,
- * it's fatal: panic the system.
- */
- if (F_ISSET(dbp, DB_AM_CHKSUM) && sum_len != 0) {
- if (F_ISSET(dbp, DB_AM_SWAP) && is_hmac == 0)
- P_32_SWAP(chksum);
- switch (ret = __db_check_chksum(
- env, NULL, db_cipher, chksum, pp, sum_len, is_hmac)) {
- case 0:
- break;
- case -1:
- if (DBENV_LOGGING(env))
- (void)__db_cksum_log(
- env, NULL, &not_used, DB_FLUSH);
- __db_errx(env,
- "checksum error: page %lu: catastrophic recovery required",
- (u_long)pg);
- return (__env_panic(env, DB_RUNRECOVERY));
- default:
- return (ret);
- }
- }
- if ((ret = __db_decrypt_pg(env, dbp, pagep)) != 0)
- return (ret);
- switch (pagep->type) {
- case P_INVALID:
- if (pginfo->type == DB_QUEUE)
- return (__qam_pgin_out(env, pg, pp, cookie));
- else
- return (__ham_pgin(dbp, pg, pp, cookie));
- case P_HASH_UNSORTED:
- case P_HASH:
- case P_HASHMETA:
- return (__ham_pgin(dbp, pg, pp, cookie));
- case P_BTREEMETA:
- case P_IBTREE:
- case P_IRECNO:
- case P_LBTREE:
- case P_LDUP:
- case P_LRECNO:
- case P_OVERFLOW:
- return (__bam_pgin(dbp, pg, pp, cookie));
- case P_QAMMETA:
- case P_QAMDATA:
- return (__qam_pgin_out(env, pg, pp, cookie));
- default:
- break;
- }
- return (__db_pgfmt(env, pg));
-}
-
-/*
- * __db_pgout --
- * Primary page-swap routine.
- *
- * PUBLIC: int __db_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *));
- */
-int
-__db_pgout(dbenv, pg, pp, cookie)
- DB_ENV *dbenv;
- db_pgno_t pg;
- void *pp;
- DBT *cookie;
-{
- DB dummydb, *dbp;
- DB_PGINFO *pginfo;
- ENV *env;
- PAGE *pagep;
- int ret;
-
- pginfo = (DB_PGINFO *)cookie->data;
- env = dbenv->env;
- pagep = (PAGE *)pp;
-
- memset(&dummydb, 0, sizeof(DB));
- dbp = &dummydb;
- dbp->dbenv = dbenv;
- dbp->env = env;
- dbp->flags = pginfo->flags;
- dbp->pgsize = pginfo->db_pagesize;
- ret = 0;
- switch (pagep->type) {
- case P_INVALID:
- if (pginfo->type == DB_QUEUE)
- ret = __qam_pgin_out(env, pg, pp, cookie);
- else
- ret = __ham_pgout(dbp, pg, pp, cookie);
- break;
- case P_HASH:
- case P_HASH_UNSORTED:
- /*
- * Support pgout of unsorted hash pages - since online
- * replication upgrade can cause pages of this type to be
- * written out.
- *
- * FALLTHROUGH
- */
- case P_HASHMETA:
- ret = __ham_pgout(dbp, pg, pp, cookie);
- break;
- case P_BTREEMETA:
- case P_IBTREE:
- case P_IRECNO:
- case P_LBTREE:
- case P_LDUP:
- case P_LRECNO:
- case P_OVERFLOW:
- ret = __bam_pgout(dbp, pg, pp, cookie);
- break;
- case P_QAMMETA:
- case P_QAMDATA:
- ret = __qam_pgin_out(env, pg, pp, cookie);
- break;
- default:
- return (__db_pgfmt(env, pg));
- }
- if (ret)
- return (ret);
-
- return (__db_encrypt_and_checksum_pg(env, dbp, pagep));
-}
-
-/*
- * __db_decrypt_pg --
- * Utility function to decrypt a db page.
- *
- * PUBLIC: int __db_decrypt_pg __P((ENV *, DB *, PAGE *));
- */
-int
-__db_decrypt_pg (env, dbp, pagep)
- ENV *env;
- DB *dbp;
- PAGE *pagep;
-{
- DB_CIPHER *db_cipher;
- size_t pg_len, pg_off;
- u_int8_t *iv;
- int ret;
-
- db_cipher = env->crypto_handle;
- ret = 0;
- iv = NULL;
- if (F_ISSET(dbp, DB_AM_ENCRYPT)) {
- DB_ASSERT(env, db_cipher != NULL);
- DB_ASSERT(env, F_ISSET(dbp, DB_AM_CHKSUM));
-
- pg_off = P_OVERHEAD(dbp);
- DB_ASSERT(env, db_cipher->adj_size(pg_off) == 0);
-
- switch (pagep->type) {
- case P_HASHMETA:
- case P_BTREEMETA:
- case P_QAMMETA:
- /*
- * !!!
- * For all meta pages it is required that the iv
- * be at the same location. Use BTMETA to get to it
- * for any meta type.
- */
- iv = ((BTMETA *)pagep)->iv;
- pg_len = DBMETASIZE;
- break;
- case P_INVALID:
- if (IS_ZERO_LSN(LSN(pagep)) &&
- pagep->pgno == PGNO_INVALID) {
- pg_len = 0;
- break;
- }
- /* FALLTHROUGH */
- default:
- iv = P_IV(dbp, pagep);
- pg_len = dbp->pgsize;
- break;
- }
- if (pg_len != 0)
- ret = db_cipher->decrypt(env, db_cipher->data,
- iv, ((u_int8_t *)pagep) + pg_off,
- pg_len - pg_off);
- }
- return (ret);
-}
-
-/*
- * __db_encrypt_and_checksum_pg --
- * Utility function to encrypt and checksum a db page.
- *
- * PUBLIC: int __db_encrypt_and_checksum_pg
- * PUBLIC: __P((ENV *, DB *, PAGE *));
- */
-int
-__db_encrypt_and_checksum_pg (env, dbp, pagep)
- ENV *env;
- DB *dbp;
- PAGE *pagep;
-{
- DB_CIPHER *db_cipher;
- int ret;
- size_t pg_off, pg_len, sum_len;
- u_int8_t *chksum, *iv, *key;
-
- chksum = iv = key = NULL;
- db_cipher = env->crypto_handle;
-
- if (F_ISSET(dbp, DB_AM_ENCRYPT)) {
- DB_ASSERT(env, db_cipher != NULL);
- DB_ASSERT(env, F_ISSET(dbp, DB_AM_CHKSUM));
-
- pg_off = P_OVERHEAD(dbp);
- DB_ASSERT(env, db_cipher->adj_size(pg_off) == 0);
-
- key = db_cipher->mac_key;
-
- switch (pagep->type) {
- case P_HASHMETA:
- case P_BTREEMETA:
- case P_QAMMETA:
- /*
- * !!!
- * For all meta pages it is required that the iv
- * be at the same location. Use BTMETA to get to it
- * for any meta type.
- */
- iv = ((BTMETA *)pagep)->iv;
- pg_len = DBMETASIZE;
- break;
- default:
- iv = P_IV(dbp, pagep);
- pg_len = dbp->pgsize;
- break;
- }
- if ((ret = db_cipher->encrypt(env, db_cipher->data,
- iv, ((u_int8_t *)pagep) + pg_off, pg_len - pg_off)) != 0)
- return (ret);
- }
- if (F_ISSET(dbp, DB_AM_CHKSUM)) {
- switch (pagep->type) {
- case P_HASHMETA:
- case P_BTREEMETA:
- case P_QAMMETA:
- /*
- * !!!
- * For all meta pages it is required that the chksum
- * be at the same location. Use BTMETA to get to it
- * for any meta type.
- */
- chksum = ((BTMETA *)pagep)->chksum;
- sum_len = DBMETASIZE;
- break;
- default:
- chksum = P_CHKSUM(dbp, pagep);
- sum_len = dbp->pgsize;
- break;
- }
- __db_chksum(NULL, (u_int8_t *)pagep, sum_len, key, chksum);
- if (F_ISSET(dbp, DB_AM_SWAP) && !F_ISSET(dbp, DB_AM_ENCRYPT))
- P_32_SWAP(chksum);
- }
- return (0);
-}
-
-/*
- * __db_metaswap --
- * Byteswap the common part of the meta-data page.
- *
- * PUBLIC: void __db_metaswap __P((PAGE *));
- */
-void
-__db_metaswap(pg)
- PAGE *pg;
-{
- u_int8_t *p;
-
- p = (u_int8_t *)pg;
-
- /* Swap the meta-data information. */
- SWAP32(p); /* lsn.file */
- SWAP32(p); /* lsn.offset */
- SWAP32(p); /* pgno */
- SWAP32(p); /* magic */
- SWAP32(p); /* version */
- SWAP32(p); /* pagesize */
- p += 4; /* unused, page type, unused, unused */
- SWAP32(p); /* free */
- SWAP32(p); /* alloc_lsn part 1 */
- SWAP32(p); /* alloc_lsn part 2 */
- SWAP32(p); /* cached key count */
- SWAP32(p); /* cached record count */
- SWAP32(p); /* flags */
-}
-
-/*
- * __db_byteswap --
- * Byteswap an ordinary database page.
- *
- * PUBLIC: int __db_byteswap
- * PUBLIC: __P((DB *, db_pgno_t, PAGE *, size_t, int));
- */
-int
-__db_byteswap(dbp, pg, h, pagesize, pgin)
- DB *dbp;
- db_pgno_t pg;
- PAGE *h;
- size_t pagesize;
- int pgin;
-{
- ENV *env;
- BINTERNAL *bi;
- BKEYDATA *bk;
- BOVERFLOW *bo;
- RINTERNAL *ri;
- db_indx_t i, *inp, len, tmp;
- u_int8_t *end, *p, *pgend;
-
- if (pagesize == 0)
- return (0);
-
- env = dbp->env;
-
- if (pgin) {
- M_32_SWAP(h->lsn.file);
- M_32_SWAP(h->lsn.offset);
- M_32_SWAP(h->pgno);
- M_32_SWAP(h->prev_pgno);
- M_32_SWAP(h->next_pgno);
- M_16_SWAP(h->entries);
- M_16_SWAP(h->hf_offset);
- }
-
- pgend = (u_int8_t *)h + pagesize;
-
- inp = P_INP(dbp, h);
- if ((u_int8_t *)inp >= pgend)
- goto out;
-
- switch (TYPE(h)) {
- case P_HASH_UNSORTED:
- case P_HASH:
- for (i = 0; i < NUM_ENT(h); i++) {
- if (pgin)
- M_16_SWAP(inp[i]);
-
- if (P_ENTRY(dbp, h, i) >= pgend)
- continue;
-
- switch (HPAGE_TYPE(dbp, h, i)) {
- case H_KEYDATA:
- break;
- case H_DUPLICATE:
- len = LEN_HKEYDATA(dbp, h, pagesize, i);
- p = HKEYDATA_DATA(P_ENTRY(dbp, h, i));
- for (end = p + len; p < end;) {
- if (pgin) {
- P_16_SWAP(p);
- memcpy(&tmp,
- p, sizeof(db_indx_t));
- p += sizeof(db_indx_t);
- } else {
- memcpy(&tmp,
- p, sizeof(db_indx_t));
- SWAP16(p);
- }
- p += tmp;
- SWAP16(p);
- }
- break;
- case H_OFFDUP:
- p = HOFFPAGE_PGNO(P_ENTRY(dbp, h, i));
- SWAP32(p); /* pgno */
- break;
- case H_OFFPAGE:
- p = HOFFPAGE_PGNO(P_ENTRY(dbp, h, i));
- SWAP32(p); /* pgno */
- SWAP32(p); /* tlen */
- break;
- default:
- return (__db_pgfmt(env, pg));
- }
-
- }
-
- /*
- * The offsets in the inp array are used to determine
- * the size of entries on a page; therefore they
- * cannot be converted until we've done all the
- * entries.
- */
- if (!pgin)
- for (i = 0; i < NUM_ENT(h); i++)
- M_16_SWAP(inp[i]);
- break;
- case P_LBTREE:
- case P_LDUP:
- case P_LRECNO:
- for (i = 0; i < NUM_ENT(h); i++) {
- if (pgin)
- M_16_SWAP(inp[i]);
-
- /*
- * In the case of on-page duplicates, key information
- * should only be swapped once.
- */
- if (h->type == P_LBTREE && i > 1) {
- if (pgin) {
- if (inp[i] == inp[i - 2])
- continue;
- } else {
- M_16_SWAP(inp[i]);
- if (inp[i] == inp[i - 2])
- continue;
- M_16_SWAP(inp[i]);
- }
- }
-
- bk = GET_BKEYDATA(dbp, h, i);
- if ((u_int8_t *)bk >= pgend)
- continue;
- switch (B_TYPE(bk->type)) {
- case B_KEYDATA:
- M_16_SWAP(bk->len);
- break;
- case B_DUPLICATE:
- case B_OVERFLOW:
- bo = (BOVERFLOW *)bk;
- M_32_SWAP(bo->pgno);
- M_32_SWAP(bo->tlen);
- break;
- default:
- return (__db_pgfmt(env, pg));
- }
-
- if (!pgin)
- M_16_SWAP(inp[i]);
- }
- break;
- case P_IBTREE:
- for (i = 0; i < NUM_ENT(h); i++) {
- if (pgin)
- M_16_SWAP(inp[i]);
-
- bi = GET_BINTERNAL(dbp, h, i);
- if ((u_int8_t *)bi >= pgend)
- continue;
-
- M_16_SWAP(bi->len);
- M_32_SWAP(bi->pgno);
- M_32_SWAP(bi->nrecs);
-
- switch (B_TYPE(bi->type)) {
- case B_KEYDATA:
- break;
- case B_DUPLICATE:
- case B_OVERFLOW:
- bo = (BOVERFLOW *)bi->data;
- M_32_SWAP(bo->pgno);
- M_32_SWAP(bo->tlen);
- break;
- default:
- return (__db_pgfmt(env, pg));
- }
-
- if (!pgin)
- M_16_SWAP(inp[i]);
- }
- break;
- case P_IRECNO:
- for (i = 0; i < NUM_ENT(h); i++) {
- if (pgin)
- M_16_SWAP(inp[i]);
-
- ri = GET_RINTERNAL(dbp, h, i);
- if ((u_int8_t *)ri >= pgend)
- continue;
-
- M_32_SWAP(ri->pgno);
- M_32_SWAP(ri->nrecs);
-
- if (!pgin)
- M_16_SWAP(inp[i]);
- }
- break;
- case P_INVALID:
- case P_OVERFLOW:
- case P_QAMDATA:
- /* Nothing to do. */
- break;
- default:
- return (__db_pgfmt(env, pg));
- }
-
-out: if (!pgin) {
- /* Swap the header information. */
- M_32_SWAP(h->lsn.file);
- M_32_SWAP(h->lsn.offset);
- M_32_SWAP(h->pgno);
- M_32_SWAP(h->prev_pgno);
- M_32_SWAP(h->next_pgno);
- M_16_SWAP(h->entries);
- M_16_SWAP(h->hf_offset);
- }
- return (0);
-}
-
-/*
- * __db_pageswap --
- * Byteswap any database page. Normally, the page to be swapped will be
- * referenced by the "pp" argument and the pdata argument will be NULL.
- * This function is also called by automatically generated log functions,
- * where the page may be split into separate header and data parts. In
- * that case, pdata is not NULL we reconsitute
- *
- * PUBLIC: int __db_pageswap
- * PUBLIC: __P((DB *, void *, size_t, DBT *, int));
- */
-int
-__db_pageswap(dbp, pp, len, pdata, pgin)
- DB *dbp;
- void *pp;
- size_t len;
- DBT *pdata;
- int pgin;
-{
- ENV *env;
- db_pgno_t pg;
- size_t pgsize;
- void *pgcopy;
- int ret;
- u_int16_t hoffset;
-
- env = dbp->env;
-
- switch (TYPE(pp)) {
- case P_BTREEMETA:
- return (__bam_mswap(env, pp));
-
- case P_HASHMETA:
- return (__ham_mswap(env, pp));
-
- case P_QAMMETA:
- return (__qam_mswap(env, pp));
-
- case P_INVALID:
- case P_OVERFLOW:
- case P_QAMDATA:
- /*
- * We may have been passed an invalid page, or a queue data
- * page, or an overflow page where fields like hoffset have a
- * special meaning. In that case, no swapping of the page data
- * is required, just the fields in the page header.
- */
- pdata = NULL;
- break;
-
- default:
- break;
- }
-
- if (pgin) {
- P_32_COPYSWAP(&PGNO(pp), &pg);
- P_16_COPYSWAP(&HOFFSET(pp), &hoffset);
- } else {
- pg = PGNO(pp);
- hoffset = HOFFSET(pp);
- }
-
- if (pdata == NULL)
- ret = __db_byteswap(dbp, pg, (PAGE *)pp, len, pgin);
- else {
- pgsize = hoffset + pdata->size;
- if ((ret = __os_malloc(env, pgsize, &pgcopy)) != 0)
- return (ret);
- memset(pgcopy, 0, pgsize);
- memcpy(pgcopy, pp, len);
- memcpy((u_int8_t *)pgcopy + hoffset, pdata->data, pdata->size);
-
- ret = __db_byteswap(dbp, pg, (PAGE *)pgcopy, pgsize, pgin);
- memcpy(pp, pgcopy, len);
-
- /*
- * If we are swapping data to be written to the log, we can't
- * overwrite the buffer that was passed in: it may be a pointer
- * into a page in cache. We set DB_DBT_APPMALLOC here so that
- * the calling code can free the memory we allocate here.
- */
- if (!pgin) {
- if ((ret =
- __os_malloc(env, pdata->size, &pdata->data)) != 0) {
- __os_free(env, pgcopy);
- return (ret);
- }
- F_SET(pdata, DB_DBT_APPMALLOC);
- }
- memcpy(pdata->data, (u_int8_t *)pgcopy + hoffset, pdata->size);
- __os_free(env, pgcopy);
- }
-
- return (ret);
-}
diff --git a/db/db_dispatch.c b/db/db_dispatch.c
deleted file mode 100644
index 65dc260..0000000
--- a/db/db_dispatch.c
+++ /dev/null
@@ -1,953 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- */
-/*
- * Copyright (c) 1995, 1996
- * The President and Fellows of Harvard University. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Margo Seltzer.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/hash.h"
-#include "dbinc/fop.h"
-#include "dbinc/lock.h"
-#include "dbinc/log.h"
-#include "dbinc/mp.h"
-#include "dbinc/txn.h"
-
-static int __db_txnlist_find_internal __P((ENV *, DB_TXNHEAD *,
- db_txnlist_type, u_int32_t, DB_TXNLIST **,
- int, u_int32_t *));
-
-/*
- * __db_dispatch --
- *
- * This is the transaction dispatch function used by the db access methods.
- * It is designed to handle the record format used by all the access
- * methods (the one automatically generated by the db_{h,log,read}.sh
- * scripts in the tools directory). An application using a different
- * recovery paradigm will supply a different dispatch function to txn_open.
- *
- * PUBLIC: int __db_dispatch __P((ENV *,
- * PUBLIC: DB_DISTAB *, DBT *, DB_LSN *, db_recops, DB_TXNHEAD *));
- */
-int
-__db_dispatch(env, dtab, db, lsnp, redo, info)
- ENV *env; /* The environment. */
- DB_DISTAB *dtab;
- DBT *db; /* The log record upon which to dispatch. */
- DB_LSN *lsnp; /* The lsn of the record being dispatched. */
- db_recops redo; /* Redo this op (or undo it). */
- DB_TXNHEAD *info; /* Transaction list. */
-{
- DB_ENV *dbenv;
- DB_LSN prev_lsn;
- u_int32_t rectype, status, txnid, urectype;
- int make_call, ret;
-
- dbenv = env->dbenv;
- LOGCOPY_32(env, &rectype, db->data);
- LOGCOPY_32(env, &txnid, (u_int8_t *)db->data + sizeof(rectype));
-
- make_call = ret = 0;
-
- /* If we don't have a dispatch table, it's hard to dispatch. */
- DB_ASSERT(env, dtab != NULL);
-
- /*
- * If we find a record that is in the user's number space and they
- * have specified a recovery routine, let them handle it. If they
- * didn't specify a recovery routine, then we expect that they've
- * followed all our rules and registered new recovery functions.
- */
- switch (redo) {
- case DB_TXN_ABORT:
- case DB_TXN_APPLY:
- case DB_TXN_PRINT:
- make_call = 1;
- break;
- case DB_TXN_OPENFILES:
- /*
- * We collect all the transactions that have
- * "begin" records, those with no previous LSN,
- * so that we do not abort partial transactions.
- * These are known to be undone, otherwise the
- * log would not have been freeable.
- */
- LOGCOPY_TOLSN(env, &prev_lsn, (u_int8_t *)db->data +
- sizeof(rectype) + sizeof(txnid));
- if (txnid != 0 && prev_lsn.file == 0 && (ret =
- __db_txnlist_add(env, info, txnid, TXN_OK, NULL)) != 0)
- return (ret);
-
- /* FALLTHROUGH */
- case DB_TXN_POPENFILES:
- if (rectype == DB___dbreg_register ||
- rectype == DB___txn_child ||
- rectype == DB___txn_ckp || rectype == DB___txn_recycle)
- return ((dtab->int_dispatch[rectype])(env,
- db, lsnp, redo, info));
- break;
- case DB_TXN_BACKWARD_ROLL:
- /*
- * Running full recovery in the backward pass. In general,
- * we only process records during this pass that belong
- * to aborted transactions. Unfortunately, there are several
- * exceptions:
- * 1. If this is a meta-record, one not associated with
- * a transaction, then we must always process it.
- * 2. If this is a transaction commit/abort, we must
- * always process it, so that we know the status of
- * every transaction.
- * 3. If this is a child commit, we need to process it
- * because the outcome of the child transaction depends
- * on the outcome of the parent.
- * 4. If this is a dbreg_register record, we must always
- * process is because they contain non-transactional
- * closes that must be properly handled.
- * 5. If this is a noop, we must always undo it so that we
- * properly handle any aborts before a file was closed.
- * 6. If this a file remove, we need to process it to
- * determine if the on-disk file is the same as the
- * one being described.
- */
- switch (rectype) {
- /*
- * These either do not belong to a transaction or (regop)
- * must be processed regardless of the status of the
- * transaction.
- */
- case DB___txn_regop:
- case DB___txn_recycle:
- case DB___txn_ckp:
- make_call = 1;
- break;
- /*
- * These belong to a transaction whose status must be
- * checked.
- */
- case DB___txn_child:
- case DB___db_noop:
- case DB___fop_file_remove:
- case DB___dbreg_register:
- make_call = 1;
-
- /* FALLTHROUGH */
- default:
- if (txnid == 0)
- break;
-
- ret = __db_txnlist_find(env, info, txnid, &status);
-
- /* If not found, this is an incomplete abort. */
- if (ret == DB_NOTFOUND)
- return (__db_txnlist_add(env,
- info, txnid, TXN_IGNORE, lsnp));
- if (ret != 0)
- return (ret);
-
- /*
- * If we ignore the transaction, ignore the operation
- * UNLESS this is a child commit in which case we need
- * to make sure that the child also gets marked as
- * ignore.
- */
- if (status == TXN_IGNORE && rectype != DB___txn_child) {
- make_call = 0;
- break;
- }
- if (status == TXN_COMMIT)
- break;
-
- /* Set make_call in case we came through default */
- make_call = 1;
- if (status == TXN_OK &&
- (ret = __db_txnlist_update(env,
- info, txnid, rectype == DB___txn_prepare ?
- TXN_PREPARE : TXN_ABORT, NULL, &status, 0)) != 0)
- return (ret);
- }
- break;
- case DB_TXN_FORWARD_ROLL:
- /*
- * In the forward pass, if we haven't seen the transaction,
- * do nothing, else recover it.
- *
- * We need to always redo DB___db_noop records, so that we
- * properly handle any commits after the file was closed.
- */
- switch (rectype) {
- case DB___txn_recycle:
- case DB___txn_ckp:
- case DB___db_noop:
- case DB___dbreg_register:
- make_call = 1;
- break;
-
- default:
- if (txnid == 0)
- status = 0;
- else {
- ret = __db_txnlist_find(env,
- info, txnid, &status);
-
- if (ret == DB_NOTFOUND)
- /* Break out out of if clause. */
- ;
- else if (ret != 0)
- return (ret);
- else if (status == TXN_COMMIT) {
- make_call = 1;
- break;
- }
- }
-
- }
- break;
- default:
- return (__db_unknown_flag(
- env, "__db_dispatch", (u_int32_t)redo));
- }
-
- if (make_call) {
- /*
- * If the debug flag is set then we are logging
- * records for a non-durable update so that they
- * may be examined for diagnostic purposes.
- * So only make the call if we are printing,
- * otherwise we need to extract the previous
- * lsn so undo will work properly.
- */
- if (rectype & DB_debug_FLAG) {
- if (redo == DB_TXN_PRINT)
- rectype &= ~DB_debug_FLAG;
- else {
- LOGCOPY_TOLSN(env, lsnp,
- (u_int8_t *)db->data +
- sizeof(rectype) +
- sizeof(txnid));
- return (0);
- }
- }
- if (rectype >= DB_user_BEGIN) {
- if (dbenv->app_dispatch != NULL)
- return (dbenv->app_dispatch(dbenv,
- db, lsnp, redo));
-
- /* No application-specific dispatch */
- urectype = rectype - DB_user_BEGIN;
- if (urectype > dtab->ext_size ||
- dtab->ext_dispatch[urectype] == NULL) {
- __db_errx(env,
- "Illegal application-specific record type %lu in log",
- (u_long)rectype);
- return (EINVAL);
- }
- return ((dtab->ext_dispatch[urectype])(dbenv,
- db, lsnp, redo));
- } else {
- if (rectype > dtab->int_size ||
- dtab->int_dispatch[rectype] == NULL) {
- __db_errx(env,
- "Illegal record type %lu in log",
- (u_long)rectype);
- return (EINVAL);
- }
- return ((dtab->int_dispatch[rectype])(env,
- db, lsnp, redo, info));
- }
- }
-
- return (0);
-}
-
-/*
- * __db_add_recovery -- Add recovery functions to the dispatch table.
- *
- * We have two versions of this, an external one and an internal one,
- * because application-specific functions take different arguments
- * for dispatch (ENV versus DB_ENV).
- *
- * This is the external version.
- *
- * PUBLIC: int __db_add_recovery __P((DB_ENV *, DB_DISTAB *,
- * PUBLIC: int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops), u_int32_t));
- */
-int
-__db_add_recovery(dbenv, dtab, func, ndx)
- DB_ENV *dbenv;
- DB_DISTAB *dtab;
- int (*func) __P((DB_ENV *, DBT *, DB_LSN *, db_recops));
- u_int32_t ndx;
-{
- size_t i, nsize;
- int ret;
-
- /* Make sure this is an application-specific record. */
- if (ndx < DB_user_BEGIN) {
- __db_errx(dbenv->env,
- "Attempting to add application-specific record with invalid type %lu",
- (u_long)ndx);
- return (EINVAL);
- }
- ndx -= DB_user_BEGIN;
-
- /* Check if we have to grow the table. */
- if (ndx >= dtab->ext_size) {
- nsize = ndx + 40;
- if ((ret =
- __os_realloc(dbenv->env, nsize *
- sizeof((dtab->ext_dispatch)[0]), &dtab->ext_dispatch))
- != 0)
- return (ret);
- for (i = dtab->ext_size; i < nsize; ++i)
- (dtab->ext_dispatch)[i] = NULL;
- dtab->ext_size = nsize;
- }
-
- (dtab->ext_dispatch)[ndx] = func;
- return (0);
-}
-
-/*
- * __db_add_recovery_int --
- *
- * Internal version of dispatch addition function.
- *
- *
- * PUBLIC: int __db_add_recovery_int __P((ENV *, DB_DISTAB *,
- * PUBLIC: int (*)(ENV *, DBT *, DB_LSN *, db_recops, void *), u_int32_t));
- */
-int
-__db_add_recovery_int(env, dtab, func, ndx)
- ENV *env;
- DB_DISTAB *dtab;
- int (*func) __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- u_int32_t ndx;
-{
- size_t i, nsize;
- int ret;
-
- if (ndx >= DB_user_BEGIN) {
- __db_errx(env,
- "Attempting to add internal record with invalid type %lu",
- (u_long)ndx);
- return (EINVAL);
- }
-
- /* Check if we have to grow the table. */
- if (ndx >= dtab->int_size) {
- nsize = ndx + 40;
- if ((ret =
- __os_realloc(env, nsize * sizeof((dtab->int_dispatch)[0]),
- &dtab->int_dispatch)) != 0)
- return (ret);
- for (i = dtab->int_size; i < nsize; ++i)
- (dtab->int_dispatch)[i] = NULL;
- dtab->int_size = nsize;
- }
-
- (dtab->int_dispatch)[ndx] = func;
- return (0);
-}
-
-/*
- * __db_txnlist_init --
- * Initialize transaction linked list.
- *
- * PUBLIC: int __db_txnlist_init __P((ENV *, DB_THREAD_INFO *,
- * PUBLIC: u_int32_t, u_int32_t, DB_LSN *, DB_TXNHEAD **));
- */
-int
-__db_txnlist_init(env, ip, low_txn, hi_txn, trunc_lsn, retp)
- ENV *env;
- DB_THREAD_INFO *ip;
- u_int32_t low_txn, hi_txn;
- DB_LSN *trunc_lsn;
- DB_TXNHEAD **retp;
-{
- DB_TXNHEAD *headp;
- u_int32_t size, tmp;
- int ret;
-
- /*
- * Size a hash table.
- * If low is zero then we are being called during rollback
- * and we need only one slot.
- * Hi maybe lower than low if we have recycled txnid's.
- * The numbers here are guesses about txn density, we can afford
- * to look at a few entries in each slot.
- */
- if (low_txn == 0)
- size = 1;
- else {
- if (hi_txn < low_txn) {
- tmp = hi_txn;
- hi_txn = low_txn;
- low_txn = tmp;
- }
- tmp = hi_txn - low_txn;
- /* See if we wrapped around. */
- if (tmp > (TXN_MAXIMUM - TXN_MINIMUM) / 2)
- tmp = (low_txn - TXN_MINIMUM) + (TXN_MAXIMUM - hi_txn);
- size = tmp / 5;
- if (size < 100)
- size = 100;
- }
- if ((ret = __os_malloc(env,
- sizeof(DB_TXNHEAD) + size * sizeof(headp->head), &headp)) != 0)
- return (ret);
-
- memset(headp, 0, sizeof(DB_TXNHEAD) + size * sizeof(headp->head));
- headp->maxid = hi_txn;
- headp->generation = 0;
- headp->nslots = size;
- headp->gen_alloc = 8;
- headp->thread_info = ip;
- if ((ret = __os_malloc(env, headp->gen_alloc *
- sizeof(headp->gen_array[0]), &headp->gen_array)) != 0) {
- __os_free(env, headp);
- return (ret);
- }
- headp->gen_array[0].generation = 0;
- headp->gen_array[0].txn_min = TXN_MINIMUM;
- headp->gen_array[0].txn_max = TXN_MAXIMUM;
- if (trunc_lsn != NULL) {
- headp->trunc_lsn = *trunc_lsn;
- headp->maxlsn = *trunc_lsn;
- } else {
- ZERO_LSN(headp->trunc_lsn);
- ZERO_LSN(headp->maxlsn);
- }
- ZERO_LSN(headp->ckplsn);
-
- *retp = headp;
- return (0);
-}
-
-#define FIND_GENERATION(hp, txnid, gen) do { \
- u_int32_t __i; \
- for (__i = 0; __i <= (hp)->generation; __i++) \
- /* The range may wrap around the end. */ \
- if ((hp)->gen_array[__i].txn_min < \
- (hp)->gen_array[__i].txn_max ? \
- ((txnid) >= (hp)->gen_array[__i].txn_min && \
- (txnid) <= (hp)->gen_array[__i].txn_max) : \
- ((txnid) >= (hp)->gen_array[__i].txn_min || \
- (txnid) <= (hp)->gen_array[__i].txn_max)) \
- break; \
- DB_ASSERT(env, __i <= (hp)->generation); \
- gen = (hp)->gen_array[__i].generation; \
-} while (0)
-
-/*
- * __db_txnlist_add --
- * Add an element to our transaction linked list.
- *
- * PUBLIC: int __db_txnlist_add __P((ENV *,
- * PUBLIC: DB_TXNHEAD *, u_int32_t, u_int32_t, DB_LSN *));
- */
-int
-__db_txnlist_add(env, hp, txnid, status, lsn)
- ENV *env;
- DB_TXNHEAD *hp;
- u_int32_t txnid, status;
- DB_LSN *lsn;
-{
- DB_TXNLIST *elp;
- int ret;
-
- if ((ret = __os_malloc(env, sizeof(DB_TXNLIST), &elp)) != 0)
- return (ret);
-
- LIST_INSERT_HEAD(&hp->head[DB_TXNLIST_MASK(hp, txnid)], elp, links);
-
- /* Find the most recent generation containing this ID */
- FIND_GENERATION(hp, txnid, elp->u.t.generation);
- elp->type = TXNLIST_TXNID;
- elp->u.t.txnid = txnid;
- elp->u.t.status = status;
- if (txnid > hp->maxid)
- hp->maxid = txnid;
- if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT)
- hp->maxlsn = *lsn;
-
- DB_ASSERT(env, lsn == NULL ||
- status != TXN_COMMIT || LOG_COMPARE(&hp->maxlsn, lsn) >= 0);
-
- return (0);
-}
-
-/*
- * __db_txnlist_remove --
- * Remove an element from our transaction linked list.
- *
- * PUBLIC: int __db_txnlist_remove __P((ENV *, DB_TXNHEAD *, u_int32_t));
- */
-int
-__db_txnlist_remove(env, hp, txnid)
- ENV *env;
- DB_TXNHEAD *hp;
- u_int32_t txnid;
-{
- DB_TXNLIST *entry;
- u_int32_t status;
-
- return (__db_txnlist_find_internal(env,
- hp, TXNLIST_TXNID, txnid, &entry, 1, &status));
-}
-
-/*
- * __db_txnlist_ckp --
- * Used to record the maximum checkpoint that will be retained
- * after recovery. Typically this is simply the max checkpoint, but
- * if we are doing client replication recovery or timestamp-based
- * recovery, we are going to virtually truncate the log and we need
- * to retain the last checkpoint before the truncation point.
- *
- * PUBLIC: void __db_txnlist_ckp __P((ENV *, DB_TXNHEAD *, DB_LSN *));
- */
-void
-__db_txnlist_ckp(env, hp, ckp_lsn)
- ENV *env;
- DB_TXNHEAD *hp;
- DB_LSN *ckp_lsn;
-{
-
- COMPQUIET(env, NULL);
-
- if (IS_ZERO_LSN(hp->ckplsn) && !IS_ZERO_LSN(hp->maxlsn) &&
- LOG_COMPARE(&hp->maxlsn, ckp_lsn) >= 0)
- hp->ckplsn = *ckp_lsn;
-}
-
-/*
- * __db_txnlist_end --
- * Discard transaction linked list.
- *
- * PUBLIC: void __db_txnlist_end __P((ENV *, DB_TXNHEAD *));
- */
-void
-__db_txnlist_end(env, hp)
- ENV *env;
- DB_TXNHEAD *hp;
-{
- u_int32_t i;
- DB_TXNLIST *p;
-
- if (hp == NULL)
- return;
-
- for (i = 0; i < hp->nslots; i++)
- while (hp != NULL && (p = LIST_FIRST(&hp->head[i])) != NULL) {
- switch (p->type) {
- case TXNLIST_LSN:
- __os_free(env, p->u.l.lsn_stack);
- break;
- case TXNLIST_DELETE:
- case TXNLIST_TXNID:
- default:
- /*
- * Possibly an incomplete DB_TXNLIST; just
- * free it.
- */
- break;
- }
- LIST_REMOVE(p, links);
- __os_free(env, p);
- }
-
- if (hp->gen_array != NULL)
- __os_free(env, hp->gen_array);
- __os_free(env, hp);
-}
-
-/*
- * __db_txnlist_find --
- * Checks to see if a txnid with the current generation is in the
- * txnid list. This returns DB_NOTFOUND if the item isn't in the
- * list otherwise it returns (like __db_txnlist_find_internal)
- * the status of the transaction. A txnid of 0 means the record
- * was generated while not in a transaction.
- *
- * PUBLIC: int __db_txnlist_find __P((ENV *,
- * PUBLIC: DB_TXNHEAD *, u_int32_t, u_int32_t *));
- */
-int
-__db_txnlist_find(env, hp, txnid, statusp)
- ENV *env;
- DB_TXNHEAD *hp;
- u_int32_t txnid, *statusp;
-{
- DB_TXNLIST *entry;
-
- if (txnid == 0)
- return (DB_NOTFOUND);
-
- return (__db_txnlist_find_internal(env, hp,
- TXNLIST_TXNID, txnid, &entry, 0, statusp));
-}
-
-/*
- * __db_txnlist_update --
- * Change the status of an existing transaction entry.
- * Returns DB_NOTFOUND if no such entry exists.
- *
- * PUBLIC: int __db_txnlist_update __P((ENV *, DB_TXNHEAD *,
- * PUBLIC: u_int32_t, u_int32_t, DB_LSN *, u_int32_t *, int));
- */
-int
-__db_txnlist_update(env, hp, txnid, status, lsn, ret_status, add_ok)
- ENV *env;
- DB_TXNHEAD *hp;
- u_int32_t txnid, status;
- DB_LSN *lsn;
- u_int32_t *ret_status;
- int add_ok;
-{
- DB_TXNLIST *elp;
- int ret;
-
- if (txnid == 0)
- return (DB_NOTFOUND);
-
- ret = __db_txnlist_find_internal(env,
- hp, TXNLIST_TXNID, txnid, &elp, 0, ret_status);
-
- if (ret == DB_NOTFOUND && add_ok) {
- *ret_status = status;
- return (__db_txnlist_add(env, hp, txnid, status, lsn));
- }
- if (ret != 0)
- return (ret);
-
- if (*ret_status == TXN_IGNORE)
- return (0);
-
- elp->u.t.status = status;
-
- if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT)
- hp->maxlsn = *lsn;
-
- return (ret);
-}
-
-/*
- * __db_txnlist_find_internal --
- * Find an entry on the transaction list. If the entry is not there or
- * the list pointer is not initialized we return DB_NOTFOUND. If the
- * item is found, we return the status. Currently we always call this
- * with an initialized list pointer but checking for NULL keeps it general.
- */
-static int
-__db_txnlist_find_internal(env,
- hp, type, txnid, txnlistp, delete, statusp)
- ENV *env;
- DB_TXNHEAD *hp;
- db_txnlist_type type;
- u_int32_t txnid;
- DB_TXNLIST **txnlistp;
- int delete;
- u_int32_t *statusp;
-{
- struct __db_headlink *head;
- DB_TXNLIST *p;
- u_int32_t generation, hash;
- int ret;
-
- ret = 0;
-
- if (hp == NULL)
- return (DB_NOTFOUND);
-
- switch (type) {
- case TXNLIST_TXNID:
- hash = txnid;
- FIND_GENERATION(hp, txnid, generation);
- break;
- case TXNLIST_DELETE:
- case TXNLIST_LSN:
- default:
- return (__env_panic(env, EINVAL));
- }
-
- head = &hp->head[DB_TXNLIST_MASK(hp, hash)];
- LIST_FOREACH(p, head, links) {
- if (p->type != type)
- continue;
- switch (type) {
- case TXNLIST_TXNID:
- if (p->u.t.txnid != txnid ||
- generation != p->u.t.generation)
- continue;
- *statusp = p->u.t.status;
- break;
-
- case TXNLIST_DELETE:
- case TXNLIST_LSN:
- default:
- return (__env_panic(env, EINVAL));
- }
- if (delete == 1) {
- LIST_REMOVE(p, links);
- __os_free(env, p);
- *txnlistp = NULL;
- } else if (p != LIST_FIRST(head)) {
- /* Move it to head of list. */
- LIST_REMOVE(p, links);
- LIST_INSERT_HEAD(head, p, links);
- *txnlistp = p;
- } else
- *txnlistp = p;
- return (ret);
- }
-
- return (DB_NOTFOUND);
-}
-
-/*
- * __db_txnlist_gen --
- * Change the current generation number.
- *
- * PUBLIC: int __db_txnlist_gen __P((ENV *,
- * PUBLIC: DB_TXNHEAD *, int, u_int32_t, u_int32_t));
- */
-int
-__db_txnlist_gen(env, hp, incr, min, max)
- ENV *env;
- DB_TXNHEAD *hp;
- int incr;
- u_int32_t min, max;
-{
- int ret;
-
- /*
- * During recovery generation numbers keep track of "restart"
- * checkpoints and recycle records. Restart checkpoints occur
- * whenever we take a checkpoint and there are no outstanding
- * transactions. When that happens, we can reset transaction IDs
- * back to TXNID_MINIMUM. Currently we only do the reset
- * at then end of recovery. Recycle records occur when txnids
- * are exhausted during runtime. A free range of ids is identified
- * and logged. This code maintains a stack of ranges. A txnid
- * is given the generation number of the first range it falls into
- * in the stack.
- */
- if (incr < 0) {
- --hp->generation;
- memmove(hp->gen_array, &hp->gen_array[1],
- (hp->generation + 1) * sizeof(hp->gen_array[0]));
- } else {
- ++hp->generation;
- if (hp->generation >= hp->gen_alloc) {
- hp->gen_alloc *= 2;
- if ((ret = __os_realloc(env, hp->gen_alloc *
- sizeof(hp->gen_array[0]), &hp->gen_array)) != 0)
- return (ret);
- }
- memmove(&hp->gen_array[1], &hp->gen_array[0],
- hp->generation * sizeof(hp->gen_array[0]));
- hp->gen_array[0].generation = hp->generation;
- hp->gen_array[0].txn_min = min;
- hp->gen_array[0].txn_max = max;
- }
- return (0);
-}
-
-/*
- * __db_txnlist_lsnadd --
- * Save the prev_lsn from a txn_child record.
- *
- * PUBLIC: int __db_txnlist_lsnadd __P((ENV *, DB_TXNHEAD *, DB_LSN *));
- */
-int
-__db_txnlist_lsnadd(env, hp, lsnp)
- ENV *env;
- DB_TXNHEAD *hp;
- DB_LSN *lsnp;
-{
- DB_TXNLIST *elp;
- int ret;
-
- if (IS_ZERO_LSN(*lsnp))
- return (0);
-
- LIST_FOREACH(elp, &hp->head[0], links)
- if (elp->type == TXNLIST_LSN)
- break;
-
- if (elp == NULL) {
- if ((ret = __db_txnlist_lsninit(env, hp, lsnp)) != 0)
- return (ret);
- return (DB_SURPRISE_KID);
- }
-
- if (elp->u.l.stack_indx == elp->u.l.stack_size) {
- elp->u.l.stack_size <<= 1;
- if ((ret = __os_realloc(env, sizeof(DB_LSN) *
- elp->u.l.stack_size, &elp->u.l.lsn_stack)) != 0) {
- __db_txnlist_end(env, hp);
- return (ret);
- }
- }
- elp->u.l.lsn_stack[elp->u.l.stack_indx++] = *lsnp;
-
- return (0);
-}
-
-/*
- * __db_txnlist_lsnget --
- *
- * PUBLIC: int __db_txnlist_lsnget __P((ENV *,
- * PUBLIC: DB_TXNHEAD *, DB_LSN *, u_int32_t));
- * Get the lsn saved from a txn_child record.
- */
-int
-__db_txnlist_lsnget(env, hp, lsnp, flags)
- ENV *env;
- DB_TXNHEAD *hp;
- DB_LSN *lsnp;
- u_int32_t flags;
-{
- DB_TXNLIST *elp;
-
- COMPQUIET(env, NULL);
- COMPQUIET(flags, 0);
-
- LIST_FOREACH(elp, &hp->head[0], links)
- if (elp->type == TXNLIST_LSN)
- break;
-
- if (elp == NULL || elp->u.l.stack_indx == 0) {
- ZERO_LSN(*lsnp);
- return (0);
- }
-
- *lsnp = elp->u.l.lsn_stack[--elp->u.l.stack_indx];
-
- return (0);
-}
-
-/*
- * __db_txnlist_lsninit --
- * Initialize a transaction list with an lsn array entry.
- *
- * PUBLIC: int __db_txnlist_lsninit __P((ENV *, DB_TXNHEAD *, DB_LSN *));
- */
-int
-__db_txnlist_lsninit(env, hp, lsnp)
- ENV *env;
- DB_TXNHEAD *hp;
- DB_LSN *lsnp;
-{
- DB_TXNLIST *elp;
- int ret;
-
- elp = NULL;
-
- if ((ret = __os_malloc(env, sizeof(DB_TXNLIST), &elp)) != 0)
- goto err;
- LIST_INSERT_HEAD(&hp->head[0], elp, links);
-
- elp->type = TXNLIST_LSN;
- if ((ret = __os_malloc(env,
- sizeof(DB_LSN) * DB_LSN_STACK_SIZE, &elp->u.l.lsn_stack)) != 0)
- goto err;
- elp->u.l.stack_indx = 1;
- elp->u.l.stack_size = DB_LSN_STACK_SIZE;
- elp->u.l.lsn_stack[0] = *lsnp;
-
- return (0);
-
-err: __db_txnlist_end(env, hp);
- return (ret);
-}
-
-#ifdef DEBUG
-/*
- * __db_txnlist_print --
- * Print out the transaction list.
- *
- * PUBLIC: void __db_txnlist_print __P((DB_TXNHEAD *));
- */
-void
-__db_txnlist_print(hp)
- DB_TXNHEAD *hp;
-{
- DB_TXNLIST *p;
- u_int32_t i;
- char *txntype;
-
- printf("Maxid: %lu Generation: %lu\n",
- (u_long)hp->maxid, (u_long)hp->generation);
- for (i = 0; i < hp->nslots; i++)
- LIST_FOREACH(p, &hp->head[i], links) {
- if (p->type != TXNLIST_TXNID) {
- printf("Unrecognized type: %d\n", p->type);
- continue;
- }
- switch (p->u.t.status) {
- case TXN_OK:
- txntype = "OK";
- break;
- case TXN_COMMIT:
- txntype = "commit";
- break;
- case TXN_PREPARE:
- txntype = "prepare";
- break;
- case TXN_ABORT:
- txntype = "abort";
- break;
- case TXN_IGNORE:
- txntype = "ignore";
- break;
- case TXN_EXPECTED:
- txntype = "expected";
- break;
- case TXN_UNEXPECTED:
- txntype = "unexpected";
- break;
- default:
- txntype = "UNKNOWN";
- break;
- }
- printf("TXNID: %lx(%lu): %s\n",
- (u_long)p->u.t.txnid,
- (u_long)p->u.t.generation, txntype);
- }
-}
-#endif
diff --git a/db/db_dup.c b/db/db_dup.c
deleted file mode 100644
index b789e03..0000000
--- a/db/db_dup.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/mp.h"
-#include "dbinc/db_am.h"
-
-/*
- * __db_ditem_nolog --
- * Remove an item from a page without affecting its recoverability.
- *
- * PUBLIC: int __db_ditem_nolog __P((DBC *, PAGE *, u_int32_t, u_int32_t));
- */
-int
-__db_ditem_nolog(dbc, pagep, indx, nbytes)
- DBC *dbc;
- PAGE *pagep;
- u_int32_t indx, nbytes;
-{
- DB *dbp;
- db_indx_t cnt, *inp, offset;
- u_int8_t *from;
-
- dbp = dbc->dbp;
- DB_ASSERT(dbp->env, IS_DIRTY(pagep));
- DB_ASSERT(dbp->env, indx < NUM_ENT(pagep));
-
- /*
- * If there's only a single item on the page, we don't have to
- * work hard.
- */
- if (NUM_ENT(pagep) == 1) {
- NUM_ENT(pagep) = 0;
- HOFFSET(pagep) = dbp->pgsize;
- return (0);
- }
-
- inp = P_INP(dbp, pagep);
- /*
- * Pack the remaining key/data items at the end of the page. Use
- * memmove(3), the regions may overlap.
- */
- from = (u_int8_t *)pagep + HOFFSET(pagep);
- DB_ASSERT(dbp->env, inp[indx] >= HOFFSET(pagep));
- memmove(from + nbytes, from, inp[indx] - HOFFSET(pagep));
- HOFFSET(pagep) += nbytes;
-
- /* Adjust the indices' offsets. */
- offset = inp[indx];
- for (cnt = 0; cnt < NUM_ENT(pagep); ++cnt)
- if (inp[cnt] < offset)
- inp[cnt] += nbytes;
-
- /* Shift the indices down. */
- --NUM_ENT(pagep);
- if (indx != NUM_ENT(pagep))
- memmove(&inp[indx], &inp[indx + 1],
- sizeof(db_indx_t) * (NUM_ENT(pagep) - indx));
-
- return (0);
-}
-
-/*
- * __db_ditem --
- * Remove an item from a page, logging it if enabled.
- *
- * PUBLIC: int __db_ditem __P((DBC *, PAGE *, u_int32_t, u_int32_t));
- */
-int
-__db_ditem(dbc, pagep, indx, nbytes)
- DBC *dbc;
- PAGE *pagep;
- u_int32_t indx, nbytes;
-{
- DB *dbp;
- DBT ldbt;
- int ret;
-
- dbp = dbc->dbp;
-
- if (DBC_LOGGING(dbc)) {
- ldbt.data = P_ENTRY(dbp, pagep, indx);
- ldbt.size = nbytes;
- if ((ret = __db_addrem_log(dbp, dbc->txn,
- &LSN(pagep), 0, DB_REM_DUP, PGNO(pagep),
- (u_int32_t)indx, nbytes, &ldbt, NULL, &LSN(pagep))) != 0)
- return (ret);
- } else
- LSN_NOT_LOGGED(LSN(pagep));
-
- return (__db_ditem_nolog(dbc, pagep, indx, nbytes));
-}
-
-/*
- * __db_pitem_nolog --
- * Put an item on a page without logging.
- *
- * PUBLIC: int __db_pitem_nolog
- * PUBLIC: __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *));
- */
-int
-__db_pitem_nolog(dbc, pagep, indx, nbytes, hdr, data)
- DBC *dbc;
- PAGE *pagep;
- u_int32_t indx;
- u_int32_t nbytes;
- DBT *hdr, *data;
-{
- BKEYDATA bk;
- DB *dbp;
- DBT thdr;
- db_indx_t *inp;
- u_int8_t *p;
-
- dbp = dbc->dbp;
-
- DB_ASSERT(dbp->env, IS_DIRTY(pagep));
-
- if (nbytes > P_FREESPACE(dbp, pagep)) {
- DB_ASSERT(dbp->env, nbytes <= P_FREESPACE(dbp, pagep));
- return (EINVAL);
- }
-
- if (hdr == NULL) {
- B_TSET(bk.type, B_KEYDATA);
- bk.len = data == NULL ? 0 : data->size;
-
- thdr.data = &bk;
- thdr.size = SSZA(BKEYDATA, data);
- hdr = &thdr;
- }
- inp = P_INP(dbp, pagep);
-
- /* Adjust the index table, then put the item on the page. */
- if (indx != NUM_ENT(pagep))
- memmove(&inp[indx + 1], &inp[indx],
- sizeof(db_indx_t) * (NUM_ENT(pagep) - indx));
- HOFFSET(pagep) -= nbytes;
- inp[indx] = HOFFSET(pagep);
- ++NUM_ENT(pagep);
-
- p = P_ENTRY(dbp, pagep, indx);
- memcpy(p, hdr->data, hdr->size);
- if (data != NULL)
- memcpy(p + hdr->size, data->data, data->size);
-
- return (0);
-}
-
-/*
- * __db_pitem --
- * Put an item on a page.
- *
- * PUBLIC: int __db_pitem
- * PUBLIC: __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *));
- */
-int
-__db_pitem(dbc, pagep, indx, nbytes, hdr, data)
- DBC *dbc;
- PAGE *pagep;
- u_int32_t indx;
- u_int32_t nbytes;
- DBT *hdr, *data;
-{
- DB *dbp;
- int ret;
-
- dbp = dbc->dbp;
- /*
- * Put a single item onto a page. The logic figuring out where to
- * insert and whether it fits is handled in the caller. All we do
- * here is manage the page shuffling. We cheat a little bit in that
- * we don't want to copy the dbt on a normal put twice. If hdr is
- * NULL, we create a BKEYDATA structure on the page, otherwise, just
- * copy the caller's information onto the page.
- *
- * This routine is also used to put entries onto the page where the
- * entry is pre-built, e.g., during recovery. In this case, the hdr
- * will point to the entry, and the data argument will be NULL.
- *
- * !!!
- * There's a tremendous potential for off-by-one errors here, since
- * the passed in header sizes must be adjusted for the structure's
- * placeholder for the trailing variable-length data field.
- */
- if (DBC_LOGGING(dbc)) {
- if ((ret = __db_addrem_log(dbp, dbc->txn,
- &LSN(pagep), 0, DB_ADD_DUP, PGNO(pagep),
- (u_int32_t)indx, nbytes, hdr, data, &LSN(pagep))) != 0)
- return (ret);
- } else
- LSN_NOT_LOGGED(LSN(pagep));
-
- return (__db_pitem_nolog(dbc, pagep, indx, nbytes, hdr, data));
-}
diff --git a/db/db_iface.c b/db/db_iface.c
deleted file mode 100644
index 55f3e2a..0000000
--- a/db/db_iface.c
+++ /dev/null
@@ -1,2817 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/btree.h"
-#include "dbinc/hash.h"
-#ifndef HAVE_QUEUE
-#include "dbinc/qam.h" /* For __db_no_queue_am(). */
-#endif
-#include "dbinc/lock.h"
-#include "dbinc/log.h"
-#include "dbinc/mp.h"
-#include "dbinc/partition.h"
-#include "dbinc/txn.h"
-
-static int __db_associate_arg __P((DB *, DB *,
- int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t));
-static int __dbc_del_arg __P((DBC *, u_int32_t));
-static int __dbc_pget_arg __P((DBC *, DBT *, u_int32_t));
-static int __dbc_put_arg __P((DBC *, DBT *, DBT *, u_int32_t));
-static int __db_curinval __P((const ENV *));
-static int __db_cursor_arg __P((DB *, u_int32_t));
-static int __db_del_arg __P((DB *, DBT *, u_int32_t));
-static int __db_get_arg __P((const DB *, DBT *, DBT *, u_int32_t));
-static int __db_join_arg __P((DB *, DBC **, u_int32_t));
-static int __db_open_arg __P((DB *,
- DB_TXN *, const char *, const char *, DBTYPE, u_int32_t));
-static int __db_pget_arg __P((DB *, DBT *, u_int32_t));
-static int __db_put_arg __P((DB *, DBT *, DBT *, u_int32_t));
-static int __dbt_ferr __P((const DB *, const char *, const DBT *, int));
-static int __db_associate_foreign_arg __P((DB *, DB *,
- int (*)(DB *, const DBT *, DBT *, const DBT *, int *),
- u_int32_t));
-
-/*
- * These functions implement the Berkeley DB API. They are organized in a
- * layered fashion. The interface functions (XXX_pp) perform all generic
- * error checks (for example, PANIC'd region, replication state change
- * in progress, inconsistent transaction usage), call function-specific
- * check routines (_arg) to check for proper flag usage, etc., do pre-amble
- * processing (incrementing handle counts, handling local transactions),
- * call the function and then do post-amble processing (local transactions,
- * decrement handle counts).
- *
- * The basic structure is:
- * Check for simple/generic errors (PANIC'd region)
- * Check if replication is changing state (increment handle count).
- * Call function-specific argument checking routine
- * Create internal transaction if necessary
- * Call underlying worker function
- * Commit/abort internal transaction if necessary
- * Decrement handle count
- */
-
-/*
- * __db_associate_pp --
- * DB->associate pre/post processing.
- *
- * PUBLIC: int __db_associate_pp __P((DB *, DB_TXN *, DB *,
- * PUBLIC: int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t));
- */
-int
-__db_associate_pp(dbp, txn, sdbp, callback, flags)
- DB *dbp, *sdbp;
- DB_TXN *txn;
- int (*callback) __P((DB *, const DBT *, const DBT *, DBT *));
- u_int32_t flags;
-{
- DBC *sdbc;
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret, txn_local;
-
- env = dbp->env;
- txn_local = 0;
-
- STRIP_AUTO_COMMIT(flags);
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check &&
- (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- /*
- * Secondary cursors may have the primary's lock file ID, so we need
- * to make sure that no older cursors are lying around when we make
- * the transition.
- */
- if (TAILQ_FIRST(&sdbp->active_queue) != NULL ||
- TAILQ_FIRST(&sdbp->join_queue) != NULL) {
- __db_errx(env,
- "Databases may not become secondary indices while cursors are open");
- ret = EINVAL;
- goto err;
- }
-
- if ((ret = __db_associate_arg(dbp, sdbp, callback, flags)) != 0)
- goto err;
-
- /*
- * Create a local transaction as necessary, check for consistent
- * transaction usage, and, if we have no transaction but do have
- * locking on, acquire a locker id for the handle lock acquisition.
- */
- if (IS_DB_AUTO_COMMIT(dbp, txn)) {
- if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
- goto err;
- txn_local = 1;
- }
-
- /* Check for consistent transaction usage. */
- if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0)
- goto err;
-
- while ((sdbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL)
- if ((ret = __dbc_destroy(sdbc)) != 0)
- goto err;
-
- ret = __db_associate(dbp, ip, txn, sdbp, callback, flags);
-
-err: if (txn_local &&
- (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
- ret = t_ret;
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_associate_arg --
- * Check DB->associate arguments.
- */
-static int
-__db_associate_arg(dbp, sdbp, callback, flags)
- DB *dbp, *sdbp;
- int (*callback) __P((DB *, const DBT *, const DBT *, DBT *));
- u_int32_t flags;
-{
- ENV *env;
- int ret;
-
- env = dbp->env;
-
- if (F_ISSET(sdbp, DB_AM_SECONDARY)) {
- __db_errx(env,
- "Secondary index handles may not be re-associated");
- return (EINVAL);
- }
- if (F_ISSET(dbp, DB_AM_SECONDARY)) {
- __db_errx(env,
- "Secondary indices may not be used as primary databases");
- return (EINVAL);
- }
- if (F_ISSET(dbp, DB_AM_DUP)) {
- __db_errx(env,
- "Primary databases may not be configured with duplicates");
- return (EINVAL);
- }
- if (F_ISSET(dbp, DB_AM_RENUMBER)) {
- __db_errx(env,
- "Renumbering recno databases may not be used as primary databases");
- return (EINVAL);
- }
-
- /*
- * It's OK for the primary and secondary to not share an environment IFF
- * the environments are local to the DB handle. (Specifically, cursor
- * adjustment will work correctly in this case.) The environment being
- * local implies the environment is not configured for either locking or
- * transactions, as neither of those could work correctly.
- */
- if (dbp->env != sdbp->env &&
- (!F_ISSET(dbp->env, ENV_DBLOCAL) ||
- !F_ISSET(sdbp->env, ENV_DBLOCAL))) {
- __db_errx(env,
- "The primary and secondary must be opened in the same environment");
- return (EINVAL);
- }
- if ((DB_IS_THREADED(dbp) && !DB_IS_THREADED(sdbp)) ||
- (!DB_IS_THREADED(dbp) && DB_IS_THREADED(sdbp))) {
- __db_errx(env,
- "The DB_THREAD setting must be the same for primary and secondary");
- return (EINVAL);
- }
- if (callback == NULL &&
- (!F_ISSET(dbp, DB_AM_RDONLY) || !F_ISSET(sdbp, DB_AM_RDONLY))) {
- __db_errx(env,
- "Callback function may be NULL only when database handles are read-only");
- return (EINVAL);
- }
-
- if ((ret = __db_fchk(env, "DB->associate", flags, DB_CREATE |
- DB_IMMUTABLE_KEY)) != 0)
- return (ret);
-
- return (0);
-}
-
-/*
- * __db_close_pp --
- * DB->close pre/post processing.
- *
- * PUBLIC: int __db_close_pp __P((DB *, u_int32_t));
- */
-int
-__db_close_pp(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = dbp->env;
- ret = 0;
-
- /*
- * Close a DB handle -- as a handle destructor, we can't fail.
- *
- * !!!
- * The actual argument checking is simple, do it inline, outside of
- * the replication block.
- */
- if (flags != 0 && flags != DB_NOSYNC)
- ret = __db_ferr(env, "DB->close", 0);
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (t_ret = __db_rep_enter(dbp, 0, 0, 0)) != 0) {
- handle_check = 0;
- if (ret == 0)
- ret = t_ret;
- }
-
- if ((t_ret = __db_close(dbp, NULL, flags)) != 0 && ret == 0)
- ret = t_ret;
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_cursor_pp --
- * DB->cursor pre/post processing.
- *
- * PUBLIC: int __db_cursor_pp __P((DB *, DB_TXN *, DBC **, u_int32_t));
- */
-int
-__db_cursor_pp(dbp, txn, dbcp, flags)
- DB *dbp;
- DB_TXN *txn;
- DBC **dbcp;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- REGENV *renv;
- int rep_blocked, ret;
-
- env = dbp->env;
-
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor");
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- rep_blocked = 0;
- if (txn == NULL && IS_ENV_REPLICATED(env)) {
- if ((ret = __op_rep_enter(env)) != 0)
- goto err;
- rep_blocked = 1;
- renv = env->reginfo->primary;
- if (dbp->timestamp != renv->rep_timestamp) {
- __db_errx(env, "%s %s",
- "replication recovery unrolled committed transactions;",
- "open DB and DBcursor handles must be closed");
- ret = DB_REP_HANDLE_DEAD;
- goto err;
- }
- }
- if ((ret = __db_cursor_arg(dbp, flags)) != 0)
- goto err;
-
- /*
- * Check for consistent transaction usage. For now, assume this
- * cursor might be used for read operations only (in which case
- * it may not require a txn). We'll check more stringently in
- * c_del and c_put. (Note this means the read-op txn tests have
- * to be a subset of the write-op ones.)
- */
- if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0)
- goto err;
-
- ret = __db_cursor(dbp, ip, txn, dbcp, flags);
-
-err: /* Release replication block on error. */
- if (ret != 0 && rep_blocked)
- (void)__op_rep_exit(env);
-
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_cursor --
- * DB->cursor.
- *
- * PUBLIC: int __db_cursor __P((DB *,
- * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBC **, u_int32_t));
- */
-int
-__db_cursor(dbp, ip, txn, dbcp, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- DBC **dbcp;
- u_int32_t flags;
-{
- DBC *dbc;
- ENV *env;
- db_lockmode_t mode;
- int ret;
-
- env = dbp->env;
-
- if (MULTIVERSION(dbp) && txn == NULL && (LF_ISSET(DB_TXN_SNAPSHOT) ||
- F_ISSET(env->dbenv, DB_ENV_TXN_SNAPSHOT))) {
- if ((ret =
- __txn_begin(env, ip, NULL, &txn, DB_TXN_SNAPSHOT)) != 0)
- return (ret);
- F_SET(txn, TXN_PRIVATE);
- }
-
- if ((ret = __db_cursor_int(dbp, ip, txn, dbp->type, PGNO_INVALID,
- LF_ISSET(DB_CURSOR_BULK | DB_CURSOR_TRANSIENT), NULL, &dbc)) != 0)
- return (ret);
-
- /*
- * If this is CDB, do all the locking in the interface, which is
- * right here.
- */
- if (CDB_LOCKING(env)) {
- mode = (LF_ISSET(DB_WRITELOCK)) ? DB_LOCK_WRITE :
- ((LF_ISSET(DB_WRITECURSOR) || txn != NULL) ?
- DB_LOCK_IWRITE : DB_LOCK_READ);
- if ((ret = __lock_get(env, dbc->locker, 0,
- &dbc->lock_dbt, mode, &dbc->mylock)) != 0)
- goto err;
- if (LF_ISSET(DB_WRITECURSOR))
- F_SET(dbc, DBC_WRITECURSOR);
- if (LF_ISSET(DB_WRITELOCK))
- F_SET(dbc, DBC_WRITER);
- }
-
- if (LF_ISSET(DB_READ_UNCOMMITTED) ||
- (txn != NULL && F_ISSET(txn, TXN_READ_UNCOMMITTED)))
- F_SET(dbc, DBC_READ_UNCOMMITTED);
-
- if (LF_ISSET(DB_READ_COMMITTED) ||
- (txn != NULL && F_ISSET(txn, TXN_READ_COMMITTED)))
- F_SET(dbc, DBC_READ_COMMITTED);
-
- *dbcp = dbc;
- return (0);
-
-err: (void)__dbc_close(dbc);
- return (ret);
-}
-
-/*
- * __db_cursor_arg --
- * Check DB->cursor arguments.
- */
-static int
-__db_cursor_arg(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- ENV *env;
-
- env = dbp->env;
-
- /*
- * DB_READ_COMMITTED and DB_READ_UNCOMMITTED require locking.
- */
- if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED)) {
- if (!LOCKING_ON(env))
- return (__db_fnl(env, "DB->cursor"));
- }
-
- LF_CLR(DB_CURSOR_BULK |
- DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT);
-
- /* Check for invalid function flags. */
- if (LF_ISSET(DB_WRITECURSOR)) {
- if (DB_IS_READONLY(dbp))
- return (__db_rdonly(env, "DB->cursor"));
- if (!CDB_LOCKING(env))
- return (__db_ferr(env, "DB->cursor", 0));
- LF_CLR(DB_WRITECURSOR);
- } else if (LF_ISSET(DB_WRITELOCK)) {
- if (DB_IS_READONLY(dbp))
- return (__db_rdonly(env, "DB->cursor"));
- LF_CLR(DB_WRITELOCK);
- }
-
- if (flags != 0)
- return (__db_ferr(env, "DB->cursor", 0));
-
- return (0);
-}
-
-/*
- * __db_del_pp --
- * DB->del pre/post processing.
- *
- * PUBLIC: int __db_del_pp __P((DB *, DB_TXN *, DBT *, u_int32_t));
- */
-int
-__db_del_pp(dbp, txn, key, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret, txn_local;
-
- env = dbp->env;
- txn_local = 0;
-
- STRIP_AUTO_COMMIT(flags);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del");
-
-#ifdef CONFIG_TEST
- if (IS_REP_MASTER(env))
- DB_TEST_WAIT(env, env->test_check);
-#endif
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check &&
- (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- if ((ret = __db_del_arg(dbp, key, flags)) != 0)
- goto err;
-
- /* Create local transaction as necessary. */
- if (IS_DB_AUTO_COMMIT(dbp, txn)) {
- if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
- goto err;
- txn_local = 1;
- }
-
- /* Check for consistent transaction usage. */
- if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0)
- goto err;
-
- ret = __db_del(dbp, ip, txn, key, flags);
-
-err: if (txn_local &&
- (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
- ret = t_ret;
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
- ENV_LEAVE(env, ip);
- __dbt_userfree(env, key, NULL, NULL);
- return (ret);
-}
-
-/*
- * __db_del_arg --
- * Check DB->delete arguments.
- */
-static int
-__db_del_arg(dbp, key, flags)
- DB *dbp;
- DBT *key;
- u_int32_t flags;
-{
- ENV *env;
- int ret;
-
- env = dbp->env;
-
- /* Check for changes to a read-only tree. */
- if (DB_IS_READONLY(dbp))
- return (__db_rdonly(env, "DB->del"));
-
- /* Check for invalid function flags. */
- switch (flags) {
- case DB_CONSUME:
- if (dbp->type != DB_QUEUE)
- return (__db_ferr(env, "DB->del", 0));
- goto copy;
- case DB_MULTIPLE:
- case DB_MULTIPLE_KEY:
- if (!F_ISSET(key, DB_DBT_BULK)) {
- __db_errx(env,
- "DB->del with DB_MULTIPLE(_KEY) requires multiple key records");
- return (EINVAL);
- }
- /* FALL THROUGH */
- case 0:
-copy: if ((ret = __dbt_usercopy(env, key)) != 0)
- return (ret);
- break;
- default:
- return (__db_ferr(env, "DB->del", 0));
- }
-
- return (0);
-}
-
-/*
- * __db_exists --
- * DB->exists implementation.
- *
- * PUBLIC: int __db_exists __P((DB *, DB_TXN *, DBT *, u_int32_t));
- */
-int
-__db_exists(dbp, txn, key, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key;
- u_int32_t flags;
-{
- DBT data;
- int ret;
-
- /*
- * Most flag checking is done in the DB->get call, we only check for
- * specific incompatibilities here. This saves making __get_arg
- * aware of the exist method's API constraints.
- */
- STRIP_AUTO_COMMIT(flags);
- if ((ret = __db_fchk(dbp->env, "DB->exists", flags,
- DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) != 0)
- return (ret);
-
- /*
- * Configure a data DBT that returns no bytes so there's no copy
- * of the data.
- */
- memset(&data, 0, sizeof(data));
- data.dlen = 0;
- data.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM;
-
- return (dbp->get(dbp, txn, key, &data, flags));
-}
-
-/*
- * db_fd_pp --
- * DB->fd pre/post processing.
- *
- * PUBLIC: int __db_fd_pp __P((DB *, int *));
- */
-int
-__db_fd_pp(dbp, fdp)
- DB *dbp;
- int *fdp;
-{
- DB_FH *fhp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = dbp->env;
-
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->fd");
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0)
- goto err;
-
- /*
- * !!!
- * There's no argument checking to be done.
- *
- * !!!
- * The actual method call is simple, do it inline.
- *
- * XXX
- * Truly spectacular layering violation.
- */
- if ((ret = __mp_xxx_fh(dbp->mpf, &fhp)) == 0) {
- if (fhp == NULL) {
- *fdp = -1;
- __db_errx(env,
- "Database does not have a valid file handle");
- ret = ENOENT;
- } else
- *fdp = fhp->fd;
- }
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
-err: ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_get_pp --
- * DB->get pre/post processing.
- *
- * PUBLIC: int __db_get_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
- */
-int
-__db_get_pp(dbp, txn, key, data, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key, *data;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- u_int32_t mode;
- int handle_check, ignore_lease, ret, t_ret, txn_local;
-
- env = dbp->env;
- mode = 0;
- txn_local = 0;
-
- STRIP_AUTO_COMMIT(flags);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get");
-
- ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
- LF_CLR(DB_IGNORE_LEASE);
-
- if ((ret = __db_get_arg(dbp, key, data, flags)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check &&
- (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- if (LF_ISSET(DB_READ_UNCOMMITTED))
- mode = DB_READ_UNCOMMITTED;
- else if ((flags & DB_OPFLAGS_MASK) == DB_CONSUME ||
- (flags & DB_OPFLAGS_MASK) == DB_CONSUME_WAIT) {
- mode = DB_WRITELOCK;
- if (IS_DB_AUTO_COMMIT(dbp, txn)) {
- if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
- goto err;
- txn_local = 1;
- }
- }
-
- /* Check for consistent transaction usage. */
- if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID,
- mode == DB_WRITELOCK || LF_ISSET(DB_RMW) ? 0 : 1)) != 0)
- goto err;
-
- ret = __db_get(dbp, ip, txn, key, data, flags);
- /*
- * Check for master leases.
- */
- if (ret == 0 &&
- IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
- ret = __rep_lease_check(env, 1);
-
-err: if (txn_local &&
- (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
- ret = t_ret;
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- __dbt_userfree(env, key, NULL, data);
- return (ret);
-}
-
-/*
- * __db_get --
- * DB->get.
- *
- * PUBLIC: int __db_get __P((DB *,
- * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, u_int32_t));
- */
-int
-__db_get(dbp, ip, txn, key, data, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- DBT *key, *data;
- u_int32_t flags;
-{
- DBC *dbc;
- u_int32_t mode;
- int ret, t_ret;
-
- /*
- * The DB_CURSOR_TRANSIENT flag indicates that we're just doing a single
- * operation with this cursor, and that in case of error we don't need
- * to restore it to its old position. Thus, we can perform the get
- * without duplicating the cursor, saving some cycles in this common
- * case.
- */
- mode = DB_CURSOR_TRANSIENT;
- if (LF_ISSET(DB_READ_UNCOMMITTED)) {
- mode |= DB_READ_UNCOMMITTED;
- LF_CLR(DB_READ_UNCOMMITTED);
- } else if (LF_ISSET(DB_READ_COMMITTED)) {
- mode |= DB_READ_COMMITTED;
- LF_CLR(DB_READ_COMMITTED);
- } else if ((flags & DB_OPFLAGS_MASK) == DB_CONSUME ||
- (flags & DB_OPFLAGS_MASK) == DB_CONSUME_WAIT)
- mode |= DB_WRITELOCK;
-
- if ((ret = __db_cursor(dbp, ip, txn, &dbc, mode)) != 0)
- return (ret);
-
- DEBUG_LREAD(dbc, txn, "DB->get", key, NULL, flags);
-
- /*
- * The semantics of bulk gets are different for DB->get vs DBC->get.
- * Mark the cursor so the low-level bulk get routines know which
- * behavior we want.
- */
- F_SET(dbc, DBC_FROM_DB_GET);
-
- /*
- * SET_RET_MEM indicates that if key and/or data have no DBT
- * flags set and DB manages the returned-data memory, that memory
- * will belong to this handle, not to the underlying cursor.
- */
- SET_RET_MEM(dbc, dbp);
-
- if (LF_ISSET(~(DB_RMW | DB_MULTIPLE)) == 0)
- LF_SET(DB_SET);
-
-#ifdef HAVE_PARTITION
- if (F_ISSET(dbc, DBC_PARTITIONED))
- ret = __partc_get(dbc, key, data, flags);
- else
-#endif
- ret = __dbc_get(dbc, key, data, flags);
-
- if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __db_get_arg --
- * DB->get argument checking, used by both DB->get and DB->pget.
- */
-static int
-__db_get_arg(dbp, key, data, flags)
- const DB *dbp;
- DBT *key, *data;
- u_int32_t flags;
-{
- ENV *env;
- int dirty, multi, ret;
-
- env = dbp->env;
-
- /*
- * Check for read-modify-write validity. DB_RMW doesn't make sense
- * with CDB cursors since if you're going to write the cursor, you
- * had to create it with DB_WRITECURSOR. Regardless, we check for
- * LOCKING_ON and not STD_LOCKING, as we don't want to disallow it.
- * If this changes, confirm that DB does not itself set the DB_RMW
- * flag in a path where CDB may have been configured.
- */
- dirty = 0;
- if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) {
- if (!LOCKING_ON(env))
- return (__db_fnl(env, "DB->get"));
- if ((ret = __db_fcchk(env, "DB->get",
- flags, DB_READ_UNCOMMITTED, DB_READ_COMMITTED)) != 0)
- return (ret);
- if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED))
- dirty = 1;
- LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW);
- }
-
- multi = 0;
- if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
- if (LF_ISSET(DB_MULTIPLE_KEY))
- goto multi_err;
- multi = LF_ISSET(DB_MULTIPLE) ? 1 : 0;
- LF_CLR(DB_MULTIPLE);
- }
-
- /* Check for invalid function flags. */
- switch (flags) {
- case DB_GET_BOTH:
- if ((ret = __dbt_usercopy(env, data)) != 0)
- return (ret);
- /* FALLTHROUGH */
- case 0:
- if ((ret = __dbt_usercopy(env, key)) != 0) {
- __dbt_userfree(env, key, NULL, data);
- return (ret);
- }
- break;
- case DB_SET_RECNO:
- if (!F_ISSET(dbp, DB_AM_RECNUM))
- goto err;
- if ((ret = __dbt_usercopy(env, key)) != 0)
- return (ret);
- break;
- case DB_CONSUME:
- case DB_CONSUME_WAIT:
- if (dirty) {
- __db_errx(env,
- "%s is not supported with DB_CONSUME or DB_CONSUME_WAIT",
- LF_ISSET(DB_READ_UNCOMMITTED) ?
- "DB_READ_UNCOMMITTED" : "DB_READ_COMMITTED");
- return (EINVAL);
- }
- if (multi)
-multi_err: return (__db_ferr(env, "DB->get", 1));
- if (dbp->type == DB_QUEUE)
- break;
- /* FALLTHROUGH */
- default:
-err: return (__db_ferr(env, "DB->get", 0));
- }
-
- /*
- * Check for invalid key/data flags.
- */
- if ((ret =
- __dbt_ferr(dbp, "key", key, DB_RETURNS_A_KEY(dbp, flags))) != 0)
- return (ret);
- if ((ret = __dbt_ferr(dbp, "data", data, 1)) != 0)
- return (ret);
-
- if (multi) {
- if (!F_ISSET(data, DB_DBT_USERMEM)) {
- __db_errx(env,
- "DB_MULTIPLE requires DB_DBT_USERMEM be set");
- return (EINVAL);
- }
- if (F_ISSET(key, DB_DBT_PARTIAL) ||
- F_ISSET(data, DB_DBT_PARTIAL)) {
- __db_errx(env,
- "DB_MULTIPLE does not support DB_DBT_PARTIAL");
- return (EINVAL);
- }
- if (data->ulen < 1024 ||
- data->ulen < dbp->pgsize || data->ulen % 1024 != 0) {
- __db_errx(env, "%s%s",
- "DB_MULTIPLE buffers must be ",
- "aligned, at least page size and multiples of 1KB");
- return (EINVAL);
- }
- }
-
- return (0);
-}
-
-/*
- * __db_join_pp --
- * DB->join pre/post processing.
- *
- * PUBLIC: int __db_join_pp __P((DB *, DBC **, DBC **, u_int32_t));
- */
-int
-__db_join_pp(primary, curslist, dbcp, flags)
- DB *primary;
- DBC **curslist, **dbcp;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = primary->env;
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (ret =
- __db_rep_enter(primary, 1, 0, curslist[0]->txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- if ((ret = __db_join_arg(primary, curslist, flags)) == 0)
- ret = __db_join(primary, curslist, dbcp, flags);
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
-err: ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_join_arg --
- * Check DB->join arguments.
- */
-static int
-__db_join_arg(primary, curslist, flags)
- DB *primary;
- DBC **curslist;
- u_int32_t flags;
-{
- DB_TXN *txn;
- ENV *env;
- int i;
-
- env = primary->env;
-
- switch (flags) {
- case 0:
- case DB_JOIN_NOSORT:
- break;
- default:
- return (__db_ferr(env, "DB->join", 0));
- }
-
- if (curslist == NULL || curslist[0] == NULL) {
- __db_errx(env,
- "At least one secondary cursor must be specified to DB->join");
- return (EINVAL);
- }
-
- txn = curslist[0]->txn;
- for (i = 1; curslist[i] != NULL; i++)
- if (curslist[i]->txn != txn) {
- __db_errx(env,
- "All secondary cursors must share the same transaction");
- return (EINVAL);
- }
-
- return (0);
-}
-
-/*
- * __db_key_range_pp --
- * DB->key_range pre/post processing.
- *
- * PUBLIC: int __db_key_range_pp
- * PUBLIC: __P((DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t));
- */
-int
-__db_key_range_pp(dbp, txn, key, kr, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key;
- DB_KEY_RANGE *kr;
- u_int32_t flags;
-{
- DBC *dbc;
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = dbp->env;
-
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->key_range");
-
- /*
- * !!!
- * The actual argument checking is simple, do it inline, outside of
- * the replication block.
- */
- if (flags != 0)
- return (__db_ferr(env, "DB->key_range", 0));
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check &&
- (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- /* Check for consistent transaction usage. */
- if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0)
- goto err;
-
- /*
- * !!!
- * The actual method call is simple, do it inline.
- */
- switch (dbp->type) {
- case DB_BTREE:
-#ifndef HAVE_BREW
- if ((ret = __dbt_usercopy(env, key)) != 0)
- goto err;
-
- /* Acquire a cursor. */
- if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0)
- break;
-
- DEBUG_LWRITE(dbc, NULL, "bam_key_range", NULL, NULL, 0);
-#ifdef HAVE_PARTITION
- if (DB_IS_PARTITIONED(dbp))
- ret = __part_key_range(dbc, key, kr, flags);
- else
-#endif
- ret = __bam_key_range(dbc, key, kr, flags);
-
- if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
- __dbt_userfree(env, key, NULL, NULL);
- break;
-#else
- COMPQUIET(dbc, NULL);
- COMPQUIET(key, NULL);
- COMPQUIET(kr, NULL);
- /* FALLTHROUGH */
-#endif
- case DB_HASH:
- case DB_QUEUE:
- case DB_RECNO:
- ret = __dbh_am_chk(dbp, DB_OK_BTREE);
- break;
- case DB_UNKNOWN:
- default:
- ret = __db_unknown_type(env, "DB->key_range", dbp->type);
- break;
- }
-
-err: /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_open_pp --
- * DB->open pre/post processing.
- *
- * PUBLIC: int __db_open_pp __P((DB *, DB_TXN *,
- * PUBLIC: const char *, const char *, DBTYPE, u_int32_t, int));
- */
-int
-__db_open_pp(dbp, txn, fname, dname, type, flags, mode)
- DB *dbp;
- DB_TXN *txn;
- const char *fname, *dname;
- DBTYPE type;
- u_int32_t flags;
- int mode;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, nosync, remove_me, ret, t_ret, txn_local;
-
- env = dbp->env;
- nosync = 1;
- handle_check = remove_me = txn_local = 0;
-
- ENV_ENTER(env, ip);
-
- /*
- * Save the file and database names and flags. We do this here
- * because we don't pass all of the flags down into the actual
- * DB->open method call, we strip DB_AUTO_COMMIT at this layer.
- */
- if ((fname != NULL &&
- (ret = __os_strdup(env, fname, &dbp->fname)) != 0))
- goto err;
- if ((dname != NULL &&
- (ret = __os_strdup(env, dname, &dbp->dname)) != 0))
- goto err;
- dbp->open_flags = flags;
-
- /* Save the current DB handle flags for refresh. */
- dbp->orig_flags = dbp->flags;
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check &&
- (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- /*
- * Create local transaction as necessary, check for consistent
- * transaction usage.
- */
- if (IS_ENV_AUTO_COMMIT(env, txn, flags)) {
- if ((ret = __db_txn_auto_init(env, ip, &txn)) != 0)
- goto err;
- txn_local = 1;
- } else if (txn != NULL && !TXN_ON(env) &&
- (!CDB_LOCKING(env) || !F_ISSET(txn, TXN_CDSGROUP))) {
- ret = __db_not_txn_env(env);
- goto err;
- }
- LF_CLR(DB_AUTO_COMMIT);
-
- /*
- * We check arguments after possibly creating a local transaction,
- * which is unusual -- the reason is some flags are illegal if any
- * kind of transaction is in effect.
- */
- if ((ret = __db_open_arg(dbp, txn, fname, dname, type, flags)) == 0)
- if ((ret = __db_open(dbp, ip, txn, fname, dname, type,
- flags, mode, PGNO_BASE_MD)) != 0)
- goto txnerr;
-
- /*
- * You can open the database that describes the subdatabases in the
- * rest of the file read-only. The content of each key's data is
- * unspecified and applications should never be adding new records
- * or updating existing records. However, during recovery, we need
- * to open these databases R/W so we can redo/undo changes in them.
- * Likewise, we need to open master databases read/write during
- * rename and remove so we can be sure they're fully sync'ed, so
- * we provide an override flag for the purpose.
- */
- if (dname == NULL && !IS_RECOVERING(env) && !LF_ISSET(DB_RDONLY) &&
- !LF_ISSET(DB_RDWRMASTER) && F_ISSET(dbp, DB_AM_SUBDB)) {
- __db_errx(env,
- "files containing multiple databases may only be opened read-only");
- ret = EINVAL;
- goto txnerr;
- }
-
- /*
- * Success: file creations have to be synchronous, otherwise we don't
- * care.
- */
- if (F_ISSET(dbp, DB_AM_CREATED | DB_AM_CREATED_MSTR))
- nosync = 0;
-
- /* Success: don't discard the file on close. */
- F_CLR(dbp, DB_AM_DISCARD | DB_AM_CREATED | DB_AM_CREATED_MSTR);
-
- /*
- * If not transactional, remove the databases/subdatabases if it is
- * persistent. If we're transactional, the child transaction abort
- * cleans up.
- */
-txnerr: if (ret != 0 && !IS_REAL_TXN(txn)) {
- remove_me = (F_ISSET(dbp, DB_AM_CREATED) &&
- (fname != NULL || dname != NULL)) ? 1 : 0;
- if (F_ISSET(dbp, DB_AM_CREATED_MSTR) ||
- (dname == NULL && remove_me))
- /* Remove file. */
- (void)__db_remove_int(dbp,
- ip, txn, fname, NULL, DB_FORCE);
- else if (remove_me)
- /* Remove subdatabase. */
- (void)__db_remove_int(dbp,
- ip, txn, fname, dname, DB_FORCE);
- }
-
- if (txn_local && (t_ret =
- __db_txn_auto_resolve(env, txn, nosync, ret)) && ret == 0)
- ret = t_ret;
-
-err: /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_open_arg --
- * Check DB->open arguments.
- */
-static int
-__db_open_arg(dbp, txn, fname, dname, type, flags)
- DB *dbp;
- DB_TXN *txn;
- const char *fname, *dname;
- DBTYPE type;
- u_int32_t flags;
-{
- ENV *env;
- u_int32_t ok_flags;
- int ret;
-
- env = dbp->env;
-
- /* Validate arguments. */
-#undef OKFLAGS
-#define OKFLAGS \
- (DB_AUTO_COMMIT | DB_CREATE | DB_EXCL | DB_FCNTL_LOCKING | \
- DB_MULTIVERSION | DB_NOMMAP | DB_NO_AUTO_COMMIT | DB_RDONLY | \
- DB_RDWRMASTER | DB_READ_UNCOMMITTED | DB_THREAD | DB_TRUNCATE)
- if ((ret = __db_fchk(env, "DB->open", flags, OKFLAGS)) != 0)
- return (ret);
- if (LF_ISSET(DB_EXCL) && !LF_ISSET(DB_CREATE))
- return (__db_ferr(env, "DB->open", 1));
- if (LF_ISSET(DB_RDONLY) && LF_ISSET(DB_CREATE))
- return (__db_ferr(env, "DB->open", 1));
-
-#ifdef HAVE_VXWORKS
- if (LF_ISSET(DB_TRUNCATE)) {
- __db_errx(env, "DB_TRUNCATE not supported on VxWorks");
- return (DB_OPNOTSUP);
- }
-#endif
- switch (type) {
- case DB_UNKNOWN:
- if (LF_ISSET(DB_CREATE|DB_TRUNCATE)) {
- __db_errx(env,
- "DB_UNKNOWN type specified with DB_CREATE or DB_TRUNCATE");
- return (EINVAL);
- }
- ok_flags = 0;
- break;
- case DB_BTREE:
- ok_flags = DB_OK_BTREE;
- break;
- case DB_HASH:
-#ifndef HAVE_HASH
- return (__db_no_hash_am(env));
-#endif
- ok_flags = DB_OK_HASH;
- break;
- case DB_QUEUE:
-#ifndef HAVE_QUEUE
- return (__db_no_queue_am(env));
-#endif
- ok_flags = DB_OK_QUEUE;
- break;
- case DB_RECNO:
- ok_flags = DB_OK_RECNO;
- break;
- default:
- __db_errx(env, "unknown type: %lu", (u_long)type);
- return (EINVAL);
- }
- if (ok_flags)
- DB_ILLEGAL_METHOD(dbp, ok_flags);
-
- /* The environment may have been created, but never opened. */
- if (!F_ISSET(env, ENV_DBLOCAL | ENV_OPEN_CALLED)) {
- __db_errx(env, "database environment not yet opened");
- return (EINVAL);
- }
-
- /*
- * Historically, you could pass in an environment that didn't have a
- * mpool, and DB would create a private one behind the scenes. This
- * no longer works.
- */
- if (!F_ISSET(env, ENV_DBLOCAL) && !MPOOL_ON(env)) {
- __db_errx(env, "environment did not include a memory pool");
- return (EINVAL);
- }
-
- /*
- * You can't specify threads during DB->open if subsystems in the
- * environment weren't configured with them.
- */
- if (LF_ISSET(DB_THREAD) && !F_ISSET(env, ENV_DBLOCAL | ENV_THREAD)) {
- __db_errx(env, "environment not created using DB_THREAD");
- return (EINVAL);
- }
-
- /* DB_MULTIVERSION requires a database configured for transactions. */
- if (LF_ISSET(DB_MULTIVERSION) && !IS_REAL_TXN(txn)) {
- __db_errx(env,
- "DB_MULTIVERSION illegal without a transaction specified");
- return (EINVAL);
- }
-
- if (LF_ISSET(DB_MULTIVERSION) && type == DB_QUEUE) {
- __db_errx(env,
- "DB_MULTIVERSION illegal with queue databases");
- return (EINVAL);
- }
-
- /* DB_TRUNCATE is neither transaction recoverable nor lockable. */
- if (LF_ISSET(DB_TRUNCATE) && (LOCKING_ON(env) || txn != NULL)) {
- __db_errx(env,
- "DB_TRUNCATE illegal with %s specified",
- LOCKING_ON(env) ? "locking" : "transactions");
- return (EINVAL);
- }
-
- /* Subdatabase checks. */
- if (dname != NULL) {
- /* QAM can only be done on in-memory subdatabases. */
- if (type == DB_QUEUE && fname != NULL) {
- __db_errx(
- env, "Queue databases must be one-per-file");
- return (EINVAL);
- }
-
- /*
- * Named in-memory databases can't support certain flags,
- * so check here.
- */
- if (fname == NULL)
- F_CLR(dbp, DB_AM_CHKSUM | DB_AM_ENCRYPT);
- }
-
- return (0);
-}
-
-/*
- * __db_pget_pp --
- * DB->pget pre/post processing.
- *
- * PUBLIC: int __db_pget_pp
- * PUBLIC: __P((DB *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t));
- */
-int
-__db_pget_pp(dbp, txn, skey, pkey, data, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *skey, *pkey, *data;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ignore_lease, ret, t_ret;
-
- env = dbp->env;
-
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->pget");
-
- ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
- LF_CLR(DB_IGNORE_LEASE);
-
- if ((ret = __db_pget_arg(dbp, pkey, flags)) != 0 ||
- (ret = __db_get_arg(dbp, skey, data, flags)) != 0) {
- __dbt_userfree(env, skey, pkey, data);
- return (ret);
- }
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check &&
- (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- ret = __db_pget(dbp, ip, txn, skey, pkey, data, flags);
- /*
- * Check for master leases.
- */
- if (ret == 0 &&
- IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
- ret = __rep_lease_check(env, 1);
-
-err: /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- __dbt_userfree(env, skey, pkey, data);
- return (ret);
-}
-
-/*
- * __db_pget --
- * DB->pget.
- *
- * PUBLIC: int __db_pget __P((DB *,
- * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t));
- */
-int
-__db_pget(dbp, ip, txn, skey, pkey, data, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- DBT *skey, *pkey, *data;
- u_int32_t flags;
-{
- DBC *dbc;
- u_int32_t mode;
- int ret, t_ret;
-
- mode = DB_CURSOR_TRANSIENT;
- if (LF_ISSET(DB_READ_UNCOMMITTED)) {
- mode |= DB_READ_UNCOMMITTED;
- LF_CLR(DB_READ_UNCOMMITTED);
- } else if (LF_ISSET(DB_READ_COMMITTED)) {
- mode |= DB_READ_COMMITTED;
- LF_CLR(DB_READ_COMMITTED);
- }
-
- if ((ret = __db_cursor(dbp, ip, txn, &dbc, mode)) != 0)
- return (ret);
-
- SET_RET_MEM(dbc, dbp);
-
- DEBUG_LREAD(dbc, txn, "__db_pget", skey, NULL, flags);
-
- /*
- * !!!
- * The actual method call is simple, do it inline.
- *
- * The underlying cursor pget will fill in a default DBT for null
- * pkeys, and use the cursor's returned-key memory internally to
- * store any intermediate primary keys. However, we've just set
- * the returned-key memory to the DB handle's key memory, which
- * is unsafe to use if the DB handle is threaded. If the pkey
- * argument is NULL, use the DBC-owned returned-key memory
- * instead; it'll go away when we close the cursor before we
- * return, but in this case that's just fine, as we're not
- * returning the primary key.
- */
- if (pkey == NULL)
- dbc->rkey = &dbc->my_rkey;
-
- /*
- * The cursor is just a perfectly ordinary secondary database cursor.
- * Call its c_pget() method to do the dirty work.
- */
- if (flags == 0 || flags == DB_RMW)
- flags |= DB_SET;
-
- ret = __dbc_pget(dbc, skey, pkey, data, flags);
-
- if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __db_pget_arg --
- * Check DB->pget arguments.
- */
-static int
-__db_pget_arg(dbp, pkey, flags)
- DB *dbp;
- DBT *pkey;
- u_int32_t flags;
-{
- ENV *env;
- int ret;
-
- env = dbp->env;
-
- if (!F_ISSET(dbp, DB_AM_SECONDARY)) {
- __db_errx(env,
- "DB->pget may only be used on secondary indices");
- return (EINVAL);
- }
-
- if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
- __db_errx(env,
- "DB_MULTIPLE and DB_MULTIPLE_KEY may not be used on secondary indices");
- return (EINVAL);
- }
-
- /* DB_CONSUME makes no sense on a secondary index. */
- LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW);
- switch (flags) {
- case DB_CONSUME:
- case DB_CONSUME_WAIT:
- return (__db_ferr(env, "DB->pget", 0));
- default:
- /* __db_get_arg will catch the rest. */
- break;
- }
-
- /*
- * We allow the pkey field to be NULL, so that we can make the
- * two-DBT get calls into wrappers for the three-DBT ones.
- */
- if (pkey != NULL &&
- (ret = __dbt_ferr(dbp, "primary key", pkey, 1)) != 0)
- return (ret);
-
- if (flags == DB_GET_BOTH) {
- /* The pkey field can't be NULL if we're doing a DB_GET_BOTH. */
- if (pkey == NULL) {
- __db_errx(env,
- "DB_GET_BOTH on a secondary index requires a primary key");
- return (EINVAL);
- }
- if ((ret = __dbt_usercopy(env, pkey)) != 0)
- return (ret);
- }
-
- return (0);
-}
-
-/*
- * __db_put_pp --
- * DB->put pre/post processing.
- *
- * PUBLIC: int __db_put_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
- */
-int
-__db_put_pp(dbp, txn, key, data, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key, *data;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, txn_local, t_ret;
-
- env = dbp->env;
- txn_local = 0;
-
- STRIP_AUTO_COMMIT(flags);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->put");
-
- if ((ret = __db_put_arg(dbp, key, data, flags)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check &&
- (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- /* Create local transaction as necessary. */
- if (IS_DB_AUTO_COMMIT(dbp, txn)) {
- if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
- goto err;
- txn_local = 1;
- }
-
- /* Check for consistent transaction usage. */
- if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0)
- goto err;
-
- ret = __db_put(dbp, ip, txn, key, data, flags);
-
-err: if (txn_local &&
- (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
- ret = t_ret;
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- __dbt_userfree(env, key, NULL, data);
- return (ret);
-}
-
-/*
- * __db_put_arg --
- * Check DB->put arguments.
- */
-static int
-__db_put_arg(dbp, key, data, flags)
- DB *dbp;
- DBT *key, *data;
- u_int32_t flags;
-{
- ENV *env;
- int ret, returnkey;
-
- env = dbp->env;
- returnkey = 0;
-
- /* Check for changes to a read-only tree. */
- if (DB_IS_READONLY(dbp))
- return (__db_rdonly(env, "DB->put"));
-
- /* Check for puts on a secondary. */
- if (F_ISSET(dbp, DB_AM_SECONDARY)) {
- __db_errx(env, "DB->put forbidden on secondary indices");
- return (EINVAL);
- }
-
- if (LF_ISSET(DB_MULTIPLE_KEY | DB_MULTIPLE)) {
- if (LF_ISSET(DB_MULTIPLE) && LF_ISSET(DB_MULTIPLE_KEY))
- goto err;
-
- switch (LF_ISSET(DB_OPFLAGS_MASK)) {
- case 0:
- case DB_OVERWRITE_DUP:
- break;
- default:
- __db_errx(env,
- "DB->put: DB_MULTIPLE(_KEY) can only be combined with DB_OVERWRITE_DUP");
- return (EINVAL);
- }
-
- if (!F_ISSET(key, DB_DBT_BULK)) {
- __db_errx(env,
- "DB->put with DB_MULTIPLE(_KEY) requires a bulk key buffer");
- return (EINVAL);
- }
- }
- if (LF_ISSET(DB_MULTIPLE)) {
- if (!F_ISSET(data, DB_DBT_BULK)) {
- __db_errx(env,
- "DB->put with DB_MULTIPLE requires a bulk data buffer");
- return (EINVAL);
- }
- }
-
- /* Check for invalid function flags. */
- switch (LF_ISSET(DB_OPFLAGS_MASK)) {
- case 0:
- case DB_NOOVERWRITE:
- case DB_OVERWRITE_DUP:
- break;
- case DB_APPEND:
- if (dbp->type != DB_RECNO && dbp->type != DB_QUEUE)
- goto err;
- returnkey = 1;
- break;
- case DB_NODUPDATA:
- if (F_ISSET(dbp, DB_AM_DUPSORT))
- break;
- /* FALLTHROUGH */
- default:
-err: return (__db_ferr(env, "DB->put", 0));
- }
-
- /*
- * Check for invalid key/data flags. The key may reasonably be NULL
- * if DB_APPEND is set and the application doesn't care about the
- * returned key.
- */
- if (((returnkey && key != NULL) || !returnkey) &&
- (ret = __dbt_ferr(dbp, "key", key, returnkey)) != 0)
- return (ret);
- if (!LF_ISSET(DB_MULTIPLE_KEY) &&
- (ret = __dbt_ferr(dbp, "data", data, 0)) != 0)
- return (ret);
-
- /*
- * The key parameter should not be NULL or have the "partial" flag set
- * in a put call unless the user doesn't care about a key value we'd
- * return. The user tells us they don't care about the returned key by
- * setting the key parameter to NULL or configuring the key DBT to not
- * return any information. (Returned keys from a put are always record
- * numbers, and returning part of a record number doesn't make sense:
- * only accept a partial return if the length returned is 0.)
- */
- if ((returnkey &&
- key != NULL && F_ISSET(key, DB_DBT_PARTIAL) && key->dlen != 0) ||
- (!returnkey && F_ISSET(key, DB_DBT_PARTIAL)))
- return (__db_ferr(env, "key DBT", 0));
-
- /* Check for partial puts in the presence of duplicates. */
- if (data != NULL && F_ISSET(data, DB_DBT_PARTIAL) &&
- (F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))) {
- __db_errx(env,
-"a partial put in the presence of duplicates requires a cursor operation");
- return (EINVAL);
- }
-
- if ((flags != DB_APPEND && (ret = __dbt_usercopy(env, key)) != 0) ||
- (!LF_ISSET(DB_MULTIPLE_KEY) &&
- (ret = __dbt_usercopy(env, data)) != 0))
- return (ret);
-
- return (0);
-}
-
-/*
- * __db_compact_pp --
- * DB->compact pre/post processing.
- *
- * PUBLIC: int __db_compact_pp __P((DB *, DB_TXN *,
- * PUBLIC: DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *));
- */
-int
-__db_compact_pp(dbp, txn, start, stop, c_data, flags, end)
- DB *dbp;
- DB_TXN *txn;
- DBT *start, *stop;
- DB_COMPACT *c_data;
- u_int32_t flags;
- DBT *end;
-{
- DB_COMPACT *dp, l_data;
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = dbp->env;
-
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->compact");
-
- /*
- * !!!
- * The actual argument checking is simple, do it inline, outside of
- * the replication block.
- */
- if ((ret = __db_fchk(
- env, "DB->compact", flags, DB_FREELIST_ONLY | DB_FREE_SPACE)) != 0)
- return (ret);
-
- /* Check for changes to a read-only database. */
- if (DB_IS_READONLY(dbp))
- return (__db_rdonly(env, "DB->compact"));
-
- if (start != NULL && (ret = __dbt_usercopy(env, start)) != 0)
- return (ret);
- if (stop != NULL && (ret = __dbt_usercopy(env, stop)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (ret = __db_rep_enter(dbp, 1, 0,
- txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- if (c_data == NULL) {
- dp = &l_data;
- memset(dp, 0, sizeof(*dp));
- } else
- dp = c_data;
-#ifdef HAVE_PARTITION
- if (DB_IS_PARTITIONED(dbp))
- ret = __part_compact(dbp, ip, txn, start, stop, dp, flags, end);
- else
-#endif
- switch (dbp->type) {
- case DB_HASH:
- if (!LF_ISSET(DB_FREELIST_ONLY))
- goto err;
- /* FALLTHROUGH */
- case DB_BTREE:
- case DB_RECNO:
- ret = __bam_compact(dbp, ip, txn, start, stop, dp, flags, end);
- break;
-
- default:
-err: ret = __dbh_am_chk(dbp, DB_OK_BTREE);
- break;
- }
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- __dbt_userfree(env, start, stop, NULL);
- return (ret);
-}
-
-/*
- * __db_associate_foreign_pp --
- * DB->associate_foreign pre/post processing.
- *
- * PUBLIC: int __db_associate_foreign_pp __P((DB *, DB *,
- * PUBLIC: int (*)(DB *, const DBT *, DBT *, const DBT *, int *),
- * PUBLIC: u_int32_t));
- */
-int
-__db_associate_foreign_pp(fdbp, dbp, callback, flags)
- DB *dbp, *fdbp;
- int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *));
- u_int32_t flags;
-{
- /* Most of this is based on the implementation of associate */
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = dbp->env;
-
- PANIC_CHECK(env);
- STRIP_AUTO_COMMIT(flags);
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check &&
- (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- if ((ret = __db_associate_foreign_arg(fdbp, dbp, callback, flags)) != 0)
- goto err;
-
- ret = __db_associate_foreign(fdbp, dbp, callback, flags);
-
-err: /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_associate_foreign_arg --
- * DB->associate_foreign argument checking.
- */
-static int
-__db_associate_foreign_arg(fdbp, dbp, callback, flags)
- DB *dbp, *fdbp;
- int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *));
- u_int32_t flags;
-{
- ENV *env;
-
- env = fdbp->env;
-
- if (F_ISSET(fdbp, DB_AM_SECONDARY)) {
- __db_errx(env,
- "Secondary indices may not be used as foreign databases");
- return (EINVAL);
- }
- if (F_ISSET(fdbp, DB_AM_DUP)) {
- __db_errx(env,
- "Foreign databases may not be configured with duplicates");
- return (EINVAL);
- }
- if (F_ISSET(fdbp, DB_AM_RENUMBER)) {
- __db_errx(env,
- "Renumbering recno databases may not be used as foreign databases");
- return (EINVAL);
- }
- if (!F_ISSET(dbp, DB_AM_SECONDARY)) {
- __db_errx(env,
- "The associating database must be a secondary index.");
- return (EINVAL);
- }
- if (LF_ISSET(DB_FOREIGN_NULLIFY) && callback == NULL) {
- __db_errx(env,
- "When specifying a delete action of nullify, a callback%s",
- " function needs to be configured");
- return (EINVAL);
- } else if (!LF_ISSET(DB_FOREIGN_NULLIFY) && callback != NULL) {
- __db_errx(env,
- "When not specifying a delete action of nullify, a%s",
- " callback function cannot be configured");
- return (EINVAL);
- }
-
- return (0);
-}
-
-/*
- * __db_sync_pp --
- * DB->sync pre/post processing.
- *
- * PUBLIC: int __db_sync_pp __P((DB *, u_int32_t));
- */
-int
-__db_sync_pp(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = dbp->env;
-
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync");
-
- /*
- * !!!
- * The actual argument checking is simple, do it inline, outside of
- * the replication block.
- */
- if (flags != 0)
- return (__db_ferr(env, "DB->sync", 0));
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- ret = __db_sync(dbp);
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
-err: ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __dbc_close_pp --
- * DBC->close pre/post processing.
- *
- * PUBLIC: int __dbc_close_pp __P((DBC *));
- */
-int
-__dbc_close_pp(dbc)
- DBC *dbc;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- /*
- * If the cursor is already closed we have a serious problem, and we
- * assume that the cursor isn't on the active queue. Don't do any of
- * the remaining cursor close processing.
- */
- if (!F_ISSET(dbc, DBC_ACTIVE)) {
- __db_errx(env, "Closing already-closed cursor");
- return (EINVAL);
- }
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = dbc->txn == NULL && IS_ENV_REPLICATED(env);
- ret = __dbc_close(dbc);
-
- /* Release replication block. */
- if (handle_check &&
- (t_ret = __op_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __dbc_cmp_pp --
- * DBC->cmp pre/post processing.
- *
- * PUBLIC: int __dbc_cmp_pp __P((DBC *, DBC *, int*, u_int32_t));
- */
-int
-__dbc_cmp_pp(dbc, other_cursor, result, flags)
- DBC *dbc, *other_cursor;
- int *result;
- u_int32_t flags;
-{
- DB *dbp, *odbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int ret;
-
- dbp = dbc->dbp;
- odbp = other_cursor->dbp;
- env = dbp->env;
-
- if (flags != 0)
- return (__db_ferr(env, "DBcursor->cmp", 0));
-
- if (other_cursor == NULL) {
- __db_errx(env, "DBcursor->cmp dbc pointer must not be null");
- return (EINVAL);
- }
-
- if (dbp != odbp) {
- __db_errx(env,
-"DBcursor->cmp both cursors must refer to the same database.");
- return (EINVAL);
- }
-
- ENV_ENTER(env, ip);
- ret = __dbc_cmp(dbc, other_cursor, result);
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __dbc_count_pp --
- * DBC->count pre/post processing.
- *
- * PUBLIC: int __dbc_count_pp __P((DBC *, db_recno_t *, u_int32_t));
- */
-int
-__dbc_count_pp(dbc, recnop, flags)
- DBC *dbc;
- db_recno_t *recnop;
- u_int32_t flags;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- /*
- * !!!
- * The actual argument checking is simple, do it inline, outside of
- * the replication block.
- *
- * The cursor must be initialized, return EINVAL for an invalid cursor.
- */
- if (flags != 0)
- return (__db_ferr(env, "DBcursor->count", 0));
-
- if (!IS_INITIALIZED(dbc))
- return (__db_curinval(env));
-
- ENV_ENTER(env, ip);
- ret = __dbc_count(dbc, recnop);
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __dbc_del_pp --
- * DBC->del pre/post processing.
- *
- * PUBLIC: int __dbc_del_pp __P((DBC *, u_int32_t));
- */
-int
-__dbc_del_pp(dbc, flags)
- DBC *dbc;
- u_int32_t flags;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- if ((ret = __dbc_del_arg(dbc, flags)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- /* Check for consistent transaction usage. */
- if ((ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0)
- goto err;
-
- DEBUG_LWRITE(dbc, dbc->txn, "DBcursor->del", NULL, NULL, flags);
- ret = __dbc_del(dbc, flags);
-
-err: ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __dbc_del_arg --
- * Check DBC->del arguments.
- */
-static int
-__dbc_del_arg(dbc, flags)
- DBC *dbc;
- u_int32_t flags;
-{
- DB *dbp;
- ENV *env;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- /* Check for changes to a read-only tree. */
- if (DB_IS_READONLY(dbp))
- return (__db_rdonly(env, "DBcursor->del"));
-
- /* Check for invalid function flags. */
- switch (flags) {
- case 0:
- break;
- case DB_CONSUME:
- if (dbp->type != DB_QUEUE)
- return (__db_ferr(env, "DBC->del", 0));
- break;
- case DB_UPDATE_SECONDARY:
- DB_ASSERT(env, F_ISSET(dbp, DB_AM_SECONDARY));
- break;
- default:
- return (__db_ferr(env, "DBcursor->del", 0));
- }
-
- /*
- * The cursor must be initialized, return EINVAL for an invalid cursor,
- * otherwise 0.
- */
- if (!IS_INITIALIZED(dbc))
- return (__db_curinval(env));
-
- return (0);
-}
-
-/*
- * __dbc_dup_pp --
- * DBC->dup pre/post processing.
- *
- * PUBLIC: int __dbc_dup_pp __P((DBC *, DBC **, u_int32_t));
- */
-int
-__dbc_dup_pp(dbc, dbcp, flags)
- DBC *dbc, **dbcp;
- u_int32_t flags;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- /*
- * !!!
- * The actual argument checking is simple, do it inline, outside of
- * the replication block.
- */
- if (flags != 0 && flags != DB_POSITION)
- return (__db_ferr(env, "DBcursor->dup", 0));
-
- ENV_ENTER(env, ip);
- ret = __dbc_dup(dbc, dbcp, flags);
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __dbc_get_pp --
- * DBC->get pre/post processing.
- *
- * PUBLIC: int __dbc_get_pp __P((DBC *, DBT *, DBT *, u_int32_t));
- */
-int
-__dbc_get_pp(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int ignore_lease, ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
- LF_CLR(DB_IGNORE_LEASE);
- if ((ret = __dbc_get_arg(dbc, key, data, flags)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- DEBUG_LREAD(dbc, dbc->txn, "DBcursor->get",
- flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags);
- ret = __dbc_get(dbc, key, data, flags);
-
- /*
- * Check for master leases.
- */
- if (ret == 0 &&
- IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
- ret = __rep_lease_check(env, 1);
-
- ENV_LEAVE(env, ip);
- __dbt_userfree(env, key, NULL, data);
- return (ret);
-}
-
-/*
- * __dbc_get_arg --
- * Common DBC->get argument checking, used by both DBC->get and DBC->pget.
- * PUBLIC: int __dbc_get_arg __P((DBC *, DBT *, DBT *, u_int32_t));
- */
-int
-__dbc_get_arg(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- DB *dbp;
- ENV *env;
- int dirty, multi, ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- /*
- * Typically in checking routines that modify the flags, we have
- * to save them and restore them, because the checking routine
- * calls the work routine. However, this is a pure-checking
- * routine which returns to a function that calls the work routine,
- * so it's OK that we do not save and restore the flags, even though
- * we modify them.
- *
- * Check for read-modify-write validity. DB_RMW doesn't make sense
- * with CDB cursors since if you're going to write the cursor, you
- * had to create it with DB_WRITECURSOR. Regardless, we check for
- * LOCKING_ON and not STD_LOCKING, as we don't want to disallow it.
- * If this changes, confirm that DB does not itself set the DB_RMW
- * flag in a path where CDB may have been configured.
- */
- dirty = 0;
- if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) {
- if (!LOCKING_ON(env))
- return (__db_fnl(env, "DBcursor->get"));
- if (LF_ISSET(DB_READ_UNCOMMITTED))
- dirty = 1;
- LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW);
- }
-
- multi = 0;
- if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
- multi = 1;
- if (LF_ISSET(DB_MULTIPLE) && LF_ISSET(DB_MULTIPLE_KEY))
- goto multi_err;
- LF_CLR(DB_MULTIPLE | DB_MULTIPLE_KEY);
- }
-
- /* Check for invalid function flags. */
- switch (flags) {
- case DB_CONSUME:
- case DB_CONSUME_WAIT:
- if (dirty) {
- __db_errx(env,
- "DB_READ_UNCOMMITTED is not supported with DB_CONSUME or DB_CONSUME_WAIT");
- return (EINVAL);
- }
- if (dbp->type != DB_QUEUE)
- goto err;
- break;
- case DB_CURRENT:
- case DB_FIRST:
- case DB_NEXT:
- case DB_NEXT_DUP:
- case DB_NEXT_NODUP:
- break;
- case DB_LAST:
- case DB_PREV:
- case DB_PREV_DUP:
- case DB_PREV_NODUP:
- if (multi)
-multi_err: return (__db_ferr(env, "DBcursor->get", 1));
- break;
- case DB_GET_BOTHC:
- if (dbp->type == DB_QUEUE)
- goto err;
- /* FALLTHROUGH */
- case DB_GET_BOTH:
- case DB_GET_BOTH_RANGE:
- if ((ret = __dbt_usercopy(env, data)) != 0)
- goto err;
- /* FALLTHROUGH */
- case DB_SET:
- case DB_SET_RANGE:
- if ((ret = __dbt_usercopy(env, key)) != 0)
- goto err;
- break;
- case DB_GET_RECNO:
- /*
- * The one situation in which this might be legal with a
- * non-RECNUM dbp is if dbp is a secondary and its primary is
- * DB_AM_RECNUM.
- */
- if (!F_ISSET(dbp, DB_AM_RECNUM) &&
- (!F_ISSET(dbp, DB_AM_SECONDARY) ||
- !F_ISSET(dbp->s_primary, DB_AM_RECNUM)))
- goto err;
- break;
- case DB_SET_RECNO:
- if (!F_ISSET(dbp, DB_AM_RECNUM))
- goto err;
- if ((ret = __dbt_usercopy(env, key)) != 0)
- goto err;
- break;
- default:
-err: __dbt_userfree(env, key, NULL, data);
- return (__db_ferr(env, "DBcursor->get", 0));
- }
-
- /* Check for invalid key/data flags. */
- if ((ret = __dbt_ferr(dbp, "key", key, 0)) != 0)
- return (ret);
- if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0)
- return (ret);
-
- if (multi) {
- if (!F_ISSET(data, DB_DBT_USERMEM)) {
- __db_errx(env,
- "DB_MULTIPLE/DB_MULTIPLE_KEY require DB_DBT_USERMEM be set");
- return (EINVAL);
- }
- if (F_ISSET(key, DB_DBT_PARTIAL) ||
- F_ISSET(data, DB_DBT_PARTIAL)) {
- __db_errx(env,
- "DB_MULTIPLE/DB_MULTIPLE_KEY do not support DB_DBT_PARTIAL");
- return (EINVAL);
- }
- if (data->ulen < 1024 ||
- data->ulen < dbp->pgsize || data->ulen % 1024 != 0) {
- __db_errx(env, "%s%s",
- "DB_MULTIPLE/DB_MULTIPLE_KEY buffers must be ",
- "aligned, at least page size and multiples of 1KB");
- return (EINVAL);
- }
- }
-
- /*
- * The cursor must be initialized for DB_CURRENT, DB_GET_RECNO,
- * DB_PREV_DUP and DB_NEXT_DUP. Return EINVAL for an invalid
- * cursor, otherwise 0.
- */
- if (!IS_INITIALIZED(dbc) && (flags == DB_CURRENT ||
- flags == DB_GET_RECNO ||
- flags == DB_NEXT_DUP || flags == DB_PREV_DUP))
- return (__db_curinval(env));
-
- /* Check for consistent transaction usage. */
- if (LF_ISSET(DB_RMW) &&
- (ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0)
- return (ret);
-
- return (0);
-}
-
-/*
- * __db_secondary_close_pp --
- * DB->close for secondaries
- *
- * PUBLIC: int __db_secondary_close_pp __P((DB *, u_int32_t));
- */
-int
-__db_secondary_close_pp(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = dbp->env;
- ret = 0;
-
- /*
- * As a DB handle destructor, we can't fail.
- *
- * !!!
- * The actual argument checking is simple, do it inline, outside of
- * the replication block.
- */
- if (flags != 0 && flags != DB_NOSYNC)
- ret = __db_ferr(env, "DB->close", 0);
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (t_ret = __db_rep_enter(dbp, 0, 0, 0)) != 0) {
- handle_check = 0;
- if (ret == 0)
- ret = t_ret;
- }
-
- if ((t_ret = __db_secondary_close(dbp, flags)) != 0 && ret == 0)
- ret = t_ret;
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __dbc_pget_pp --
- * DBC->pget pre/post processing.
- *
- * PUBLIC: int __dbc_pget_pp __P((DBC *, DBT *, DBT *, DBT *, u_int32_t));
- */
-int
-__dbc_pget_pp(dbc, skey, pkey, data, flags)
- DBC *dbc;
- DBT *skey, *pkey, *data;
- u_int32_t flags;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int ignore_lease, ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
- LF_CLR(DB_IGNORE_LEASE);
- if ((ret = __dbc_pget_arg(dbc, pkey, flags)) != 0 ||
- (ret = __dbc_get_arg(dbc, skey, data, flags)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
- ret = __dbc_pget(dbc, skey, pkey, data, flags);
- /*
- * Check for master leases.
- */
- if (ret == 0 &&
- IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
- ret = __rep_lease_check(env, 1);
-
- ENV_LEAVE(env, ip);
-
- __dbt_userfree(env, skey, pkey, data);
- return (ret);
-}
-
-/*
- * __dbc_pget_arg --
- * Check DBC->pget arguments.
- */
-static int
-__dbc_pget_arg(dbc, pkey, flags)
- DBC *dbc;
- DBT *pkey;
- u_int32_t flags;
-{
- DB *dbp;
- ENV *env;
- int ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- if (!F_ISSET(dbp, DB_AM_SECONDARY)) {
- __db_errx(env,
- "DBcursor->pget may only be used on secondary indices");
- return (EINVAL);
- }
-
- if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) {
- __db_errx(env,
- "DB_MULTIPLE and DB_MULTIPLE_KEY may not be used on secondary indices");
- return (EINVAL);
- }
-
- switch (LF_ISSET(DB_OPFLAGS_MASK)) {
- case DB_CONSUME:
- case DB_CONSUME_WAIT:
- /* These flags make no sense on a secondary index. */
- return (__db_ferr(env, "DBcursor->pget", 0));
- case DB_GET_BOTH:
- case DB_GET_BOTH_RANGE:
- /* BOTH is "get both the primary and the secondary". */
- if (pkey == NULL) {
- __db_errx(env,
- "%s requires both a secondary and a primary key",
- LF_ISSET(DB_GET_BOTH) ?
- "DB_GET_BOTH" : "DB_GET_BOTH_RANGE");
- return (EINVAL);
- }
- if ((ret = __dbt_usercopy(env, pkey)) != 0)
- return (ret);
- break;
- default:
- /* __dbc_get_arg will catch the rest. */
- break;
- }
-
- /*
- * We allow the pkey field to be NULL, so that we can make the
- * two-DBT get calls into wrappers for the three-DBT ones.
- */
- if (pkey != NULL &&
- (ret = __dbt_ferr(dbp, "primary key", pkey, 0)) != 0)
- return (ret);
-
- /* But the pkey field can't be NULL if we're doing a DB_GET_BOTH. */
- if (pkey == NULL && (flags & DB_OPFLAGS_MASK) == DB_GET_BOTH) {
- __db_errx(env,
- "DB_GET_BOTH on a secondary index requires a primary key");
- return (EINVAL);
- }
- return (0);
-}
-
-/*
- * __dbc_put_pp --
- * DBC->put pre/post processing.
- *
- * PUBLIC: int __dbc_put_pp __P((DBC *, DBT *, DBT *, u_int32_t));
- */
-int
-__dbc_put_pp(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- if ((ret = __dbc_put_arg(dbc, key, data, flags)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- /* Check for consistent transaction usage. */
- if ((ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0)
- goto err;
-
- DEBUG_LWRITE(dbc, dbc->txn, "DBcursor->put",
- flags == DB_KEYFIRST || flags == DB_KEYLAST ||
- flags == DB_NODUPDATA || flags == DB_UPDATE_SECONDARY ?
- key : NULL, data, flags);
- ret = __dbc_put(dbc, key, data, flags);
-
-err: ENV_LEAVE(env, ip);
- __dbt_userfree(env, key, NULL, data);
- return (ret);
-}
-
-/*
- * __dbc_put_arg --
- * Check DBC->put arguments.
- */
-static int
-__dbc_put_arg(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- DB *dbp;
- ENV *env;
- int key_flags, ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
- key_flags = 0;
-
- /* Check for changes to a read-only tree. */
- if (DB_IS_READONLY(dbp))
- return (__db_rdonly(env, "DBcursor->put"));
-
- /* Check for puts on a secondary. */
- if (F_ISSET(dbp, DB_AM_SECONDARY)) {
- if (flags == DB_UPDATE_SECONDARY)
- flags = 0;
- else {
- __db_errx(env,
- "DBcursor->put forbidden on secondary indices");
- return (EINVAL);
- }
- }
-
- if ((ret = __dbt_usercopy(env, data)) != 0)
- return (ret);
-
- /* Check for invalid function flags. */
- switch (flags) {
- case DB_AFTER:
- case DB_BEFORE:
- switch (dbp->type) {
- case DB_BTREE:
- case DB_HASH: /* Only with unsorted duplicates. */
- if (!F_ISSET(dbp, DB_AM_DUP))
- goto err;
- if (dbp->dup_compare != NULL)
- goto err;
- break;
- case DB_QUEUE: /* Not permitted. */
- goto err;
- case DB_RECNO: /* Only with mutable record numbers. */
- if (!F_ISSET(dbp, DB_AM_RENUMBER))
- goto err;
- key_flags = key == NULL ? 0 : 1;
- break;
- case DB_UNKNOWN:
- default:
- goto err;
- }
- break;
- case DB_CURRENT:
- /*
- * If there is a comparison function, doing a DB_CURRENT
- * must not change the part of the data item that is used
- * for the comparison.
- */
- break;
- case DB_NODUPDATA:
- if (!F_ISSET(dbp, DB_AM_DUPSORT))
- goto err;
- /* FALLTHROUGH */
- case DB_KEYFIRST:
- case DB_KEYLAST:
- case DB_OVERWRITE_DUP:
- key_flags = 1;
- if ((ret = __dbt_usercopy(env, key)) != 0)
- return (ret);
- break;
- default:
-err: return (__db_ferr(env, "DBcursor->put", 0));
- }
-
- /*
- * Check for invalid key/data flags. The key may reasonably be NULL
- * if DB_AFTER or DB_BEFORE is set and the application doesn't care
- * about the returned key, or if the DB_CURRENT flag is set.
- */
- if (key_flags && (ret = __dbt_ferr(dbp, "key", key, 0)) != 0)
- return (ret);
- if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0)
- return (ret);
-
- /*
- * The key parameter should not be NULL or have the "partial" flag set
- * in a put call unless the user doesn't care about a key value we'd
- * return. The user tells us they don't care about the returned key by
- * setting the key parameter to NULL or configuring the key DBT to not
- * return any information. (Returned keys from a put are always record
- * numbers, and returning part of a record number doesn't make sense:
- * only accept a partial return if the length returned is 0.)
- */
- if (key_flags && F_ISSET(key, DB_DBT_PARTIAL) && key->dlen != 0)
- return (__db_ferr(env, "key DBT", 0));
-
- /*
- * The cursor must be initialized for anything other than DB_KEYFIRST,
- * DB_KEYLAST or zero: return EINVAL for an invalid cursor, otherwise 0.
- */
- if (!IS_INITIALIZED(dbc) && flags != 0 && flags != DB_KEYFIRST &&
- flags != DB_KEYLAST && flags != DB_NODUPDATA &&
- flags != DB_OVERWRITE_DUP)
- return (__db_curinval(env));
-
- return (0);
-}
-
-/*
- * __dbt_ferr --
- * Check a DBT for flag errors.
- */
-static int
-__dbt_ferr(dbp, name, dbt, check_thread)
- const DB *dbp;
- const char *name;
- const DBT *dbt;
- int check_thread;
-{
- ENV *env;
- int ret;
-
- env = dbp->env;
-
- /*
- * Check for invalid DBT flags. We allow any of the flags to be
- * specified to any DB or DBcursor call so that applications can
- * set DB_DBT_MALLOC when retrieving a data item from a secondary
- * database and then specify that same DBT as a key to a primary
- * database, without having to clear flags.
- */
- if ((ret = __db_fchk(env, name, dbt->flags, DB_DBT_APPMALLOC |
- DB_DBT_BULK | DB_DBT_DUPOK | DB_DBT_MALLOC | DB_DBT_REALLOC |
- DB_DBT_USERCOPY | DB_DBT_USERMEM | DB_DBT_PARTIAL)) != 0)
- return (ret);
- switch (F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC |
- DB_DBT_USERCOPY | DB_DBT_USERMEM)) {
- case 0:
- case DB_DBT_MALLOC:
- case DB_DBT_REALLOC:
- case DB_DBT_USERCOPY:
- case DB_DBT_USERMEM:
- break;
- default:
- return (__db_ferr(env, name, 1));
- }
-
- if (F_ISSET(dbt, DB_DBT_BULK) && F_ISSET(dbt, DB_DBT_PARTIAL)) {
- __db_errx(env,
- "Bulk and partial operations cannot be combined on %s DBT", name);
- return (EINVAL);
- }
-
- if (check_thread && DB_IS_THREADED(dbp) &&
- !F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC |
- DB_DBT_USERCOPY | DB_DBT_USERMEM)) {
- __db_errx(env,
- "DB_THREAD mandates memory allocation flag on %s DBT",
- name);
- return (EINVAL);
- }
- return (0);
-}
-
-/*
- * __db_curinval
- * Report that a cursor is in an invalid state.
- */
-static int
-__db_curinval(env)
- const ENV *env;
-{
- __db_errx(env,
- "Cursor position must be set before performing this operation");
- return (EINVAL);
-}
-
-/*
- * __db_txn_auto_init --
- * Handle DB_AUTO_COMMIT initialization.
- *
- * PUBLIC: int __db_txn_auto_init __P((ENV *, DB_THREAD_INFO *, DB_TXN **));
- */
-int
-__db_txn_auto_init(env, ip, txnidp)
- ENV *env;
- DB_THREAD_INFO *ip;
- DB_TXN **txnidp;
-{
- /*
- * Method calls where applications explicitly specify DB_AUTO_COMMIT
- * require additional validation: the DB_AUTO_COMMIT flag cannot be
- * specified if a transaction cookie is also specified, nor can the
- * flag be specified in a non-transactional environment.
- */
- if (*txnidp != NULL) {
- __db_errx(env,
- "DB_AUTO_COMMIT may not be specified along with a transaction handle");
- return (EINVAL);
- }
-
- if (!TXN_ON(env)) {
- __db_errx(env,
- "DB_AUTO_COMMIT may not be specified in non-transactional environment");
- return (EINVAL);
- }
-
- /*
- * Our caller checked to see if replication is making a state change.
- * Don't call the user-level API (which would repeat that check).
- */
- return (__txn_begin(env, ip, NULL, txnidp, 0));
-}
-
-/*
- * __db_txn_auto_resolve --
- * Resolve local transactions.
- *
- * PUBLIC: int __db_txn_auto_resolve __P((ENV *, DB_TXN *, int, int));
- */
-int
-__db_txn_auto_resolve(env, txn, nosync, ret)
- ENV *env;
- DB_TXN *txn;
- int nosync, ret;
-{
- int t_ret;
-
- /*
- * We're resolving a transaction for the user, and must decrement the
- * replication handle count. Call the user-level API.
- */
- if (ret == 0)
- return (__txn_commit(txn, nosync ? DB_TXN_NOSYNC : 0));
-
- if ((t_ret = __txn_abort(txn)) != 0)
- return (__env_panic(env, t_ret));
-
- return (ret);
-}
diff --git a/db/db_join.c b/db/db_join.c
deleted file mode 100644
index 05c11a4..0000000
--- a/db/db_join.c
+++ /dev/null
@@ -1,940 +0,0 @@
-/*
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1998-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_join.h"
-#include "dbinc/btree.h"
-#include "dbinc/lock.h"
-
-static int __db_join_close_pp __P((DBC *));
-static int __db_join_cmp __P((const void *, const void *));
-static int __db_join_del __P((DBC *, u_int32_t));
-static int __db_join_get __P((DBC *, DBT *, DBT *, u_int32_t));
-static int __db_join_get_pp __P((DBC *, DBT *, DBT *, u_int32_t));
-static int __db_join_getnext __P((DBC *, DBT *, DBT *, u_int32_t, u_int32_t));
-static int __db_join_primget __P((DB *, DB_THREAD_INFO *,
- DB_TXN *, DB_LOCKER *, DBT *, DBT *, u_int32_t));
-static int __db_join_put __P((DBC *, DBT *, DBT *, u_int32_t));
-
-/*
- * Check to see if the Nth secondary cursor of join cursor jc is pointing
- * to a sorted duplicate set.
- */
-#define SORTED_SET(jc, n) ((jc)->j_curslist[(n)]->dbp->dup_compare != NULL)
-
-/*
- * This is the duplicate-assisted join functionality. Right now we're
- * going to write it such that we return one item at a time, although
- * I think we may need to optimize it to return them all at once.
- * It should be easier to get it working this way, and I believe that
- * changing it should be fairly straightforward.
- *
- * We optimize the join by sorting cursors from smallest to largest
- * cardinality. In most cases, this is indeed optimal. However, if
- * a cursor with large cardinality has very few data in common with the
- * first cursor, it is possible that the join will be made faster by
- * putting it earlier in the cursor list. Since we have no way to detect
- * cases like this, we simply provide a flag, DB_JOIN_NOSORT, which retains
- * the sort order specified by the caller, who may know more about the
- * structure of the data.
- *
- * The first cursor moves sequentially through the duplicate set while
- * the others search explicitly for the duplicate in question.
- *
- */
-
-/*
- * __db_join --
- * This is the interface to the duplicate-assisted join functionality.
- * In the same way that cursors mark a position in a database, a cursor
- * can mark a position in a join. While most cursors are created by the
- * cursor method of a DB, join cursors are created through an explicit
- * call to DB->join.
- *
- * The curslist is an array of existing, initialized cursors and primary
- * is the DB of the primary file. The data item that joins all the
- * cursors in the curslist is used as the key into the primary and that
- * key and data are returned. When no more items are left in the join
- * set, the c_next operation off the join cursor will return DB_NOTFOUND.
- *
- * PUBLIC: int __db_join __P((DB *, DBC **, DBC **, u_int32_t));
- */
-int
-__db_join(primary, curslist, dbcp, flags)
- DB *primary;
- DBC **curslist, **dbcp;
- u_int32_t flags;
-{
- DBC *dbc;
- ENV *env;
- JOIN_CURSOR *jc;
- size_t ncurs, nslots;
- u_int32_t i;
- int ret;
-
- env = primary->env;
- dbc = NULL;
- jc = NULL;
-
- if ((ret = __os_calloc(env, 1, sizeof(DBC), &dbc)) != 0)
- goto err;
-
- if ((ret = __os_calloc(env, 1, sizeof(JOIN_CURSOR), &jc)) != 0)
- goto err;
-
- if ((ret = __os_malloc(env, 256, &jc->j_key.data)) != 0)
- goto err;
- jc->j_key.ulen = 256;
- F_SET(&jc->j_key, DB_DBT_USERMEM);
-
- F_SET(&jc->j_rdata, DB_DBT_REALLOC);
-
- for (jc->j_curslist = curslist;
- *jc->j_curslist != NULL; jc->j_curslist++)
- ;
-
- /*
- * The number of cursor slots we allocate is one greater than
- * the number of cursors involved in the join, because the
- * list is NULL-terminated.
- */
- ncurs = (size_t)(jc->j_curslist - curslist);
- nslots = ncurs + 1;
-
- /*
- * !!! -- A note on the various lists hanging off jc.
- *
- * j_curslist is the initial NULL-terminated list of cursors passed
- * into __db_join. The original cursors are not modified; pristine
- * copies are required because, in databases with unsorted dups, we
- * must reset all of the secondary cursors after the first each
- * time the first one is incremented, or else we will lose data
- * which happen to be sorted differently in two different cursors.
- *
- * j_workcurs is where we put those copies that we're planning to
- * work with. They're lazily c_dup'ed from j_curslist as we need
- * them, and closed when the join cursor is closed or when we need
- * to reset them to their original values (in which case we just
- * c_dup afresh).
- *
- * j_fdupcurs is an array of cursors which point to the first
- * duplicate in the duplicate set that contains the data value
- * we're currently interested in. We need this to make
- * __db_join_get correctly return duplicate duplicates; i.e., if a
- * given data value occurs twice in the set belonging to cursor #2,
- * and thrice in the set belonging to cursor #3, and once in all
- * the other cursors, successive calls to __db_join_get need to
- * return that data item six times. To make this happen, each time
- * cursor N is allowed to advance to a new datum, all cursors M
- * such that M > N have to be reset to the first duplicate with
- * that datum, so __db_join_get will return all the dup-dups again.
- * We could just reset them to the original cursor from j_curslist,
- * but that would be a bit slower in the unsorted case and a LOT
- * slower in the sorted one.
- *
- * j_exhausted is a list of boolean values which represent
- * whether or not their corresponding cursors are "exhausted",
- * i.e. whether the datum under the corresponding cursor has
- * been found not to exist in any unreturned combinations of
- * later secondary cursors, in which case they are ready to be
- * incremented.
- */
-
- /* We don't want to free regions whose callocs have failed. */
- jc->j_curslist = NULL;
- jc->j_workcurs = NULL;
- jc->j_fdupcurs = NULL;
- jc->j_exhausted = NULL;
-
- if ((ret = __os_calloc(env, nslots, sizeof(DBC *),
- &jc->j_curslist)) != 0)
- goto err;
- if ((ret = __os_calloc(env, nslots, sizeof(DBC *),
- &jc->j_workcurs)) != 0)
- goto err;
- if ((ret = __os_calloc(env, nslots, sizeof(DBC *),
- &jc->j_fdupcurs)) != 0)
- goto err;
- if ((ret = __os_calloc(env, nslots, sizeof(u_int8_t),
- &jc->j_exhausted)) != 0)
- goto err;
- for (i = 0; curslist[i] != NULL; i++) {
- jc->j_curslist[i] = curslist[i];
- jc->j_workcurs[i] = NULL;
- jc->j_fdupcurs[i] = NULL;
- jc->j_exhausted[i] = 0;
- }
- jc->j_ncurs = (u_int32_t)ncurs;
-
- /*
- * If DB_JOIN_NOSORT is not set, optimize secondary cursors by
- * sorting in order of increasing cardinality.
- */
- if (!LF_ISSET(DB_JOIN_NOSORT))
- qsort(jc->j_curslist, ncurs, sizeof(DBC *), __db_join_cmp);
-
- /*
- * We never need to reset the 0th cursor, so there's no
- * solid reason to use workcurs[0] rather than curslist[0] in
- * join_get. Nonetheless, it feels cleaner to do it for symmetry,
- * and this is the most logical place to copy it.
- *
- * !!!
- * There's no need to close the new cursor if we goto err only
- * because this is the last thing that can fail. Modifier of this
- * function beware!
- */
- if ((ret =
- __dbc_dup(jc->j_curslist[0], jc->j_workcurs, DB_POSITION)) != 0)
- goto err;
-
- dbc->close = dbc->c_close = __db_join_close_pp;
- dbc->del = dbc->c_del = __db_join_del;
- dbc->get = dbc->c_get = __db_join_get_pp;
- dbc->put = dbc->c_put = __db_join_put;
- dbc->internal = (DBC_INTERNAL *)jc;
- dbc->dbp = primary;
- jc->j_primary = primary;
-
- /* Stash the first cursor's transaction here for easy access. */
- dbc->txn = curslist[0]->txn;
-
- *dbcp = dbc;
-
- MUTEX_LOCK(env, primary->mutex);
- TAILQ_INSERT_TAIL(&primary->join_queue, dbc, links);
- MUTEX_UNLOCK(env, primary->mutex);
-
- return (0);
-
-err: if (jc != NULL) {
- if (jc->j_curslist != NULL)
- __os_free(env, jc->j_curslist);
- if (jc->j_workcurs != NULL) {
- if (jc->j_workcurs[0] != NULL)
- (void)__dbc_close(jc->j_workcurs[0]);
- __os_free(env, jc->j_workcurs);
- }
- if (jc->j_fdupcurs != NULL)
- __os_free(env, jc->j_fdupcurs);
- if (jc->j_exhausted != NULL)
- __os_free(env, jc->j_exhausted);
- __os_free(env, jc);
- }
- if (dbc != NULL)
- __os_free(env, dbc);
- return (ret);
-}
-
-/*
- * __db_join_close_pp --
- * DBC->close pre/post processing for join cursors.
- */
-static int
-__db_join_close_pp(dbc)
- DBC *dbc;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- ENV_ENTER(env, ip);
-
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check &&
- (ret = __db_rep_enter(dbp, 1, 0, dbc->txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- ret = __db_join_close(dbc);
-
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
-err: ENV_LEAVE(env, ip);
- return (ret);
-}
-
-static int
-__db_join_put(dbc, key, data, flags)
- DBC *dbc;
- DBT *key;
- DBT *data;
- u_int32_t flags;
-{
- COMPQUIET(dbc, NULL);
- COMPQUIET(key, NULL);
- COMPQUIET(data, NULL);
- COMPQUIET(flags, 0);
- return (EINVAL);
-}
-
-static int
-__db_join_del(dbc, flags)
- DBC *dbc;
- u_int32_t flags;
-{
- COMPQUIET(dbc, NULL);
- COMPQUIET(flags, 0);
- return (EINVAL);
-}
-
-/*
- * __db_join_get_pp --
- * DBjoin->get pre/post processing.
- */
-static int
-__db_join_get_pp(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- u_int32_t handle_check, save_flags;
- int ret, t_ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- /* Save the original flags value. */
- save_flags = flags;
-
- if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) {
- if (!LOCKING_ON(env))
- return (__db_fnl(env, "DBC->get"));
- LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW);
- }
-
- switch (flags) {
- case 0:
- case DB_JOIN_ITEM:
- break;
- default:
- return (__db_ferr(env, "DBC->get", 0));
- }
-
- /*
- * A partial get of the key of a join cursor don't make much sense;
- * the entire key is necessary to query the primary database
- * and find the datum, and so regardless of the size of the key
- * it would not be a performance improvement. Since it would require
- * special handling, we simply disallow it.
- *
- * A partial get of the data, however, potentially makes sense (if
- * all possible data are a predictable large structure, for instance)
- * and causes us no headaches, so we permit it.
- */
- if (F_ISSET(key, DB_DBT_PARTIAL)) {
- __db_errx(env,
- "DB_DBT_PARTIAL may not be set on key during join_get");
- return (EINVAL);
- }
-
- ENV_ENTER(env, ip);
-
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check &&
- (ret = __db_rep_enter(dbp, 1, 0, dbc->txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- /* Restore the original flags value. */
- flags = save_flags;
-
- ret = __db_join_get(dbc, key, data, flags);
-
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
-err: ENV_LEAVE(env, ip);
- __dbt_userfree(env, key, NULL, NULL);
- return (ret);
-}
-
-static int
-__db_join_get(dbc, key_arg, data_arg, flags)
- DBC *dbc;
- DBT *key_arg, *data_arg;
- u_int32_t flags;
-{
- DB *dbp;
- DBC *cp;
- DBT *key_n, key_n_mem;
- ENV *env;
- JOIN_CURSOR *jc;
- int db_manage_data, ret;
- u_int32_t i, j, operation, opmods;
-
- dbp = dbc->dbp;
- env = dbp->env;
- jc = (JOIN_CURSOR *)dbc->internal;
-
- operation = LF_ISSET(DB_OPFLAGS_MASK);
-
- /* !!!
- * If the set of flags here changes, check that __db_join_primget
- * is updated to handle them properly.
- */
- opmods = LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW);
-
- /*
- * Since we are fetching the key as a datum in the secondary indices,
- * we must be careful of caller-specified DB_DBT_* memory
- * management flags. If necessary, use a stack-allocated DBT;
- * we'll appropriately copy and/or allocate the data later.
- */
- if (F_ISSET(key_arg,
- DB_DBT_MALLOC | DB_DBT_USERCOPY | DB_DBT_USERMEM)) {
- /* We just use the default buffer; no need to go malloc. */
- key_n = &key_n_mem;
- memset(key_n, 0, sizeof(DBT));
- } else {
- /*
- * Either DB_DBT_REALLOC or the default buffer will work
- * fine if we have to reuse it, as we do.
- */
- key_n = key_arg;
- }
- if (F_ISSET(key_arg, DB_DBT_USERCOPY))
- key_arg->data = NULL;
-
- /*
- * If our last attempt to do a get on the primary key failed,
- * short-circuit the join and try again with the same key.
- */
- if (F_ISSET(jc, JOIN_RETRY))
- goto samekey;
- F_CLR(jc, JOIN_RETRY);
-
-retry: ret = __dbc_get(jc->j_workcurs[0], &jc->j_key, key_n,
- opmods | (jc->j_exhausted[0] ? DB_NEXT_DUP : DB_CURRENT));
-
- if (ret == DB_BUFFER_SMALL) {
- jc->j_key.ulen <<= 1;
- if ((ret = __os_realloc(env,
- jc->j_key.ulen, &jc->j_key.data)) != 0)
- goto mem_err;
- goto retry;
- }
-
- /*
- * If ret == DB_NOTFOUND, we're out of elements of the first
- * secondary cursor. This is how we finally finish the join
- * if all goes well.
- */
- if (ret != 0)
- goto err;
-
- /*
- * If jc->j_exhausted[0] == 1, we've just advanced the first cursor,
- * and we're going to want to advance all the cursors that point to
- * the first member of a duplicate duplicate set (j_fdupcurs[1..N]).
- * Close all the cursors in j_fdupcurs; we'll reopen them the
- * first time through the upcoming loop.
- */
- for (i = 1; i < jc->j_ncurs; i++) {
- if (jc->j_fdupcurs[i] != NULL &&
- (ret = __dbc_close(jc->j_fdupcurs[i])) != 0)
- goto err;
- jc->j_fdupcurs[i] = NULL;
- }
-
- /*
- * If jc->j_curslist[1] == NULL, we have only one cursor in the join.
- * Thus, we can safely increment that one cursor on each call
- * to __db_join_get, and we signal this by setting jc->j_exhausted[0]
- * right away.
- *
- * Otherwise, reset jc->j_exhausted[0] to 0, so that we don't
- * increment it until we know we're ready to.
- */
- if (jc->j_curslist[1] == NULL)
- jc->j_exhausted[0] = 1;
- else
- jc->j_exhausted[0] = 0;
-
- /* We have the first element; now look for it in the other cursors. */
- for (i = 1; i < jc->j_ncurs; i++) {
- DB_ASSERT(env, jc->j_curslist[i] != NULL);
- if (jc->j_workcurs[i] == NULL)
- /* If this is NULL, we need to dup curslist into it. */
- if ((ret = __dbc_dup(jc->j_curslist[i],
- &jc->j_workcurs[i], DB_POSITION)) != 0)
- goto err;
-
-retry2: cp = jc->j_workcurs[i];
-
- if ((ret = __db_join_getnext(cp, &jc->j_key, key_n,
- jc->j_exhausted[i], opmods)) == DB_NOTFOUND) {
- /*
- * jc->j_workcurs[i] has no more of the datum we're
- * interested in. Go back one cursor and get
- * a new dup. We can't just move to a new
- * element of the outer relation, because that way
- * we might miss duplicate duplicates in cursor i-1.
- *
- * If this takes us back to the first cursor,
- * -then- we can move to a new element of the outer
- * relation.
- */
- --i;
- jc->j_exhausted[i] = 1;
-
- if (i == 0) {
- for (j = 1; jc->j_workcurs[j] != NULL; j++) {
- /*
- * We're moving to a new element of
- * the first secondary cursor. If
- * that cursor is sorted, then any
- * other sorted cursors can be safely
- * reset to the first duplicate
- * duplicate in the current set if we
- * have a pointer to it (we can't just
- * leave them be, or we'll miss
- * duplicate duplicates in the outer
- * relation).
- *
- * If the first cursor is unsorted, or
- * if cursor j is unsorted, we can
- * make no assumptions about what
- * we're looking for next or where it
- * will be, so we reset to the very
- * beginning (setting workcurs NULL
- * will achieve this next go-round).
- *
- * XXX: This is likely to break
- * horribly if any two cursors are
- * both sorted, but have different
- * specified sort functions. For,
- * now, we dismiss this as pathology
- * and let strange things happen--we
- * can't make rope childproof.
- */
- if ((ret = __dbc_close(
- jc->j_workcurs[j])) != 0)
- goto err;
- if (!SORTED_SET(jc, 0) ||
- !SORTED_SET(jc, j) ||
- jc->j_fdupcurs[j] == NULL)
- /*
- * Unsafe conditions;
- * reset fully.
- */
- jc->j_workcurs[j] = NULL;
- else
- /* Partial reset suffices. */
- if ((__dbc_dup(
- jc->j_fdupcurs[j],
- &jc->j_workcurs[j],
- DB_POSITION)) != 0)
- goto err;
- jc->j_exhausted[j] = 0;
- }
- goto retry;
- /* NOTREACHED */
- }
-
- /*
- * We're about to advance the cursor and need to
- * reset all of the workcurs[j] where j>i, so that
- * we don't miss any duplicate duplicates.
- */
- for (j = i + 1;
- jc->j_workcurs[j] != NULL;
- j++) {
- if ((ret =
- __dbc_close(jc->j_workcurs[j])) != 0)
- goto err;
- jc->j_exhausted[j] = 0;
- if (jc->j_fdupcurs[j] == NULL)
- jc->j_workcurs[j] = NULL;
- else if ((ret = __dbc_dup(jc->j_fdupcurs[j],
- &jc->j_workcurs[j], DB_POSITION)) != 0)
- goto err;
- }
- goto retry2;
- /* NOTREACHED */
- }
-
- if (ret == DB_BUFFER_SMALL) {
- jc->j_key.ulen <<= 1;
- if ((ret = __os_realloc(env, jc->j_key.ulen,
- &jc->j_key.data)) != 0) {
-mem_err: __db_errx(env,
- "Allocation failed for join key, len = %lu",
- (u_long)jc->j_key.ulen);
- goto err;
- }
- goto retry2;
- }
-
- if (ret != 0)
- goto err;
-
- /*
- * If we made it this far, we've found a matching
- * datum in cursor i. Mark the current cursor
- * unexhausted, so we don't miss any duplicate
- * duplicates the next go-round--unless this is the
- * very last cursor, in which case there are none to
- * miss, and we'll need that exhausted flag to finally
- * get a DB_NOTFOUND and move on to the next datum in
- * the outermost cursor.
- */
- if (i + 1 != jc->j_ncurs)
- jc->j_exhausted[i] = 0;
- else
- jc->j_exhausted[i] = 1;
-
- /*
- * If jc->j_fdupcurs[i] is NULL and the ith cursor's dups are
- * sorted, then we're here for the first time since advancing
- * cursor 0, and we have a new datum of interest.
- * jc->j_workcurs[i] points to the beginning of a set of
- * duplicate duplicates; store this into jc->j_fdupcurs[i].
- */
- if (SORTED_SET(jc, i) && jc->j_fdupcurs[i] == NULL && (ret =
- __dbc_dup(cp, &jc->j_fdupcurs[i], DB_POSITION)) != 0)
- goto err;
- }
-
-err: if (ret != 0)
- return (ret);
-
- if (0) {
-samekey: /*
- * Get the key we tried and failed to return last time;
- * it should be the current datum of all the secondary cursors.
- */
- if ((ret = __dbc_get(jc->j_workcurs[0],
- &jc->j_key, key_n, DB_CURRENT | opmods)) != 0)
- return (ret);
- F_CLR(jc, JOIN_RETRY);
- }
-
- /*
- * ret == 0; we have a key to return.
- *
- * If DB_DBT_USERMEM or DB_DBT_MALLOC is set, we need to copy the key
- * back into the dbt we were given for the key; call __db_retcopy.
- * Otherwise, assert that we do not need to copy anything and proceed.
- */
- DB_ASSERT(env, F_ISSET(key_arg, DB_DBT_USERMEM | DB_DBT_MALLOC |
- DB_DBT_USERCOPY) || key_n == key_arg);
-
- if ((F_ISSET(key_arg, DB_DBT_USERMEM | DB_DBT_MALLOC |
- DB_DBT_USERCOPY)) &&
- (ret = __db_retcopy(env,
- key_arg, key_n->data, key_n->size, NULL, NULL)) != 0) {
- /*
- * The retcopy failed, most commonly because we have a user
- * buffer for the key which is too small. Set things up to
- * retry next time, and return.
- */
- F_SET(jc, JOIN_RETRY);
- return (ret);
- }
-
- /*
- * If DB_JOIN_ITEM is set, we return it; otherwise we do the lookup
- * in the primary and then return.
- */
- if (operation == DB_JOIN_ITEM)
- return (0);
-
- /*
- * If data_arg->flags == 0--that is, if DB is managing the
- * data DBT's memory--it's not safe to just pass the DBT
- * through to the primary get call, since we don't want that
- * memory to belong to the primary DB handle (and if the primary
- * is free-threaded, it can't anyway).
- *
- * Instead, use memory that is managed by the join cursor, in
- * jc->j_rdata.
- */
- if (!F_ISSET(data_arg, DB_DBT_MALLOC | DB_DBT_REALLOC |
- DB_DBT_USERMEM | DB_DBT_USERCOPY))
- db_manage_data = 1;
- else
- db_manage_data = 0;
- if ((ret = __db_join_primget(jc->j_primary, dbc->thread_info,
- jc->j_curslist[0]->txn, jc->j_curslist[0]->locker, key_n,
- db_manage_data ? &jc->j_rdata : data_arg, opmods)) != 0) {
- if (ret == DB_NOTFOUND) {
- if (LF_ISSET(DB_READ_UNCOMMITTED) ||
- (jc->j_curslist[0]->txn != NULL && F_ISSET(
- jc->j_curslist[0]->txn, TXN_READ_UNCOMMITTED)))
- goto retry;
- /*
- * If ret == DB_NOTFOUND, the primary and secondary
- * are out of sync; every item in each secondary
- * should correspond to something in the primary,
- * or we shouldn't have done the join this way.
- * Wail.
- */
- ret = __db_secondary_corrupt(jc->j_primary);
- } else
- /*
- * The get on the primary failed for some other
- * reason, most commonly because we're using a user
- * buffer that's not big enough. Flag our failure
- * so we can return the same key next time.
- */
- F_SET(jc, JOIN_RETRY);
- }
- if (db_manage_data && ret == 0) {
- data_arg->data = jc->j_rdata.data;
- data_arg->size = jc->j_rdata.size;
- }
-
- return (ret);
-}
-
-/*
- * __db_join_close --
- * DBC->close for join cursors.
- *
- * PUBLIC: int __db_join_close __P((DBC *));
- */
-int
-__db_join_close(dbc)
- DBC *dbc;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- JOIN_CURSOR *jc;
- int ret, t_ret;
- u_int32_t i;
-
- jc = (JOIN_CURSOR *)dbc->internal;
- dbp = dbc->dbp;
- env = dbp->env;
- ret = t_ret = 0;
-
- /*
- * Remove from active list of join cursors. Note that this
- * must happen before any action that can fail and return, or else
- * __db_close may loop indefinitely.
- */
- MUTEX_LOCK(env, dbp->mutex);
- TAILQ_REMOVE(&dbp->join_queue, dbc, links);
- MUTEX_UNLOCK(env, dbp->mutex);
-
- ENV_ENTER(env, ip);
- /*
- * Close any open scratch cursors. In each case, there may
- * not be as many outstanding as there are cursors in
- * curslist, but we want to close whatever's there.
- *
- * If any close fails, there's no reason not to close everything else;
- * we'll just return the error code of the last one to fail. There's
- * not much the caller can do anyway, since these cursors only exist
- * hanging off a db-internal data structure that they shouldn't be
- * mucking with.
- */
- for (i = 0; i < jc->j_ncurs; i++) {
- if (jc->j_workcurs[i] != NULL &&
- (t_ret = __dbc_close(jc->j_workcurs[i])) != 0)
- ret = t_ret;
- if (jc->j_fdupcurs[i] != NULL &&
- (t_ret = __dbc_close(jc->j_fdupcurs[i])) != 0)
- ret = t_ret;
- }
- ENV_LEAVE(env, ip);
-
- __os_free(env, jc->j_exhausted);
- __os_free(env, jc->j_curslist);
- __os_free(env, jc->j_workcurs);
- __os_free(env, jc->j_fdupcurs);
- __os_free(env, jc->j_key.data);
- if (jc->j_rdata.data != NULL)
- __os_ufree(env, jc->j_rdata.data);
- __os_free(env, jc);
- __os_free(env, dbc);
-
- return (ret);
-}
-
-/*
- * __db_join_getnext --
- * This function replaces the DBC_CONTINUE and DBC_KEYSET
- * functionality inside the various cursor get routines.
- *
- * If exhausted == 0, we're not done with the current datum;
- * return it if it matches "matching", otherwise search
- * using DB_GET_BOTHC (which is faster than iteratively doing
- * DB_NEXT_DUP) forward until we find one that does.
- *
- * If exhausted == 1, we are done with the current datum, so just
- * leap forward to searching NEXT_DUPs.
- *
- * If no matching datum exists, returns DB_NOTFOUND, else 0.
- */
-static int
-__db_join_getnext(dbc, key, data, exhausted, opmods)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t exhausted, opmods;
-{
- int ret, cmp;
- DB *dbp;
- DBT ldata;
- int (*func) __P((DB *, const DBT *, const DBT *));
-
- dbp = dbc->dbp;
- func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare;
-
- switch (exhausted) {
- case 0:
- /*
- * We don't want to step on data->data; use a new
- * DBT and malloc so we don't step on dbc's rdata memory.
- */
- memset(&ldata, 0, sizeof(DBT));
- F_SET(&ldata, DB_DBT_MALLOC);
- if ((ret = __dbc_get(dbc,
- key, &ldata, opmods | DB_CURRENT)) != 0)
- break;
- cmp = func(dbp, data, &ldata);
- if (cmp == 0) {
- /*
- * We have to return the real data value. Copy
- * it into data, then free the buffer we malloc'ed
- * above.
- */
- if ((ret = __db_retcopy(dbp->env, data, ldata.data,
- ldata.size, &data->data, &data->size)) != 0)
- return (ret);
- __os_ufree(dbp->env, ldata.data);
- return (0);
- }
-
- /*
- * Didn't match--we want to fall through and search future
- * dups. We just forget about ldata and free
- * its buffer--data contains the value we're searching for.
- */
- __os_ufree(dbp->env, ldata.data);
- /* FALLTHROUGH */
- case 1:
- ret = __dbc_get(dbc, key, data, opmods | DB_GET_BOTHC);
- break;
- default:
- ret = EINVAL;
- break;
- }
-
- return (ret);
-}
-
-/*
- * __db_join_cmp --
- * Comparison function for sorting DBCs in cardinality order.
- */
-static int
-__db_join_cmp(a, b)
- const void *a, *b;
-{
- DBC *dbca, *dbcb;
- db_recno_t counta, countb;
-
- dbca = *((DBC * const *)a);
- dbcb = *((DBC * const *)b);
-
- if (__dbc_count(dbca, &counta) != 0 ||
- __dbc_count(dbcb, &countb) != 0)
- return (0);
-
- return ((long)counta - (long)countb);
-}
-
-/*
- * __db_join_primget --
- * Perform a DB->get in the primary, being careful not to use a new
- * locker ID if we're doing CDB locking.
- */
-static int
-__db_join_primget(dbp, ip, txn, locker, key, data, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- DB_LOCKER *locker;
- DBT *key, *data;
- u_int32_t flags;
-{
- DBC *dbc;
- u_int32_t rmw;
- int ret, t_ret;
-
- if ((ret = __db_cursor_int(dbp, ip,
- txn, dbp->type, PGNO_INVALID, 0, locker, &dbc)) != 0)
- return (ret);
-
- /*
- * The only allowable flags here are the two flags copied into "opmods"
- * in __db_join_get, DB_RMW and DB_READ_UNCOMMITTED. The former is an
- * op on the c_get call, the latter on the cursor call. It's a DB bug
- * if we allow any other flags down in here.
- */
- rmw = LF_ISSET(DB_RMW);
- if (LF_ISSET(DB_READ_UNCOMMITTED) ||
- (txn != NULL && F_ISSET(txn, TXN_READ_UNCOMMITTED)))
- F_SET(dbc, DBC_READ_UNCOMMITTED);
-
- if (LF_ISSET(DB_READ_COMMITTED) ||
- (txn != NULL && F_ISSET(txn, TXN_READ_COMMITTED)))
- F_SET(dbc, DBC_READ_COMMITTED);
-
- LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW);
- DB_ASSERT(dbp->env, flags == 0);
-
- F_SET(dbc, DBC_TRANSIENT);
-
- /*
- * This shouldn't be necessary, thanks to the fact that join cursors
- * swap in their own DB_DBT_REALLOC'ed buffers, but just for form's
- * sake, we mirror what __db_get does.
- */
- SET_RET_MEM(dbc, dbp);
-
- ret = __dbc_get(dbc, key, data, DB_SET | rmw);
-
- if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __db_secondary_corrupt --
- * Report primary/secondary inconsistencies.
- *
- * PUBLIC: int __db_secondary_corrupt __P((DB *));
- */
-int
-__db_secondary_corrupt(dbp)
- DB *dbp;
-{
- __db_err(dbp->env, DB_SECONDARY_BAD, "%s%s%s",
- dbp->fname == NULL ? "unnamed" : dbp->fname,
- dbp->dname == NULL ? "" : "/",
- dbp->dname == NULL ? "" : dbp->dname);
- return (DB_SECONDARY_BAD);
-}
diff --git a/db/db_meta.c b/db/db_meta.c
deleted file mode 100644
index ef42e44..0000000
--- a/db/db_meta.c
+++ /dev/null
@@ -1,1299 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Mike Olson.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/lock.h"
-#include "dbinc/log.h"
-#include "dbinc/mp.h"
-#include "dbinc/txn.h"
-#include "dbinc/db_am.h"
-#include "dbinc/hash.h"
-
-static void __db_init_meta __P((DB *, void *, db_pgno_t, u_int32_t));
-#ifdef HAVE_FTRUNCATE
-static int __db_pglistcmp __P((const void *, const void *));
-static int __db_truncate_freelist __P((DBC *, DBMETA *,
- PAGE *, db_pgno_t *, u_int32_t, u_int32_t));
-#endif
-
-/*
- * __db_init_meta --
- * Helper function for __db_new that initializes the important fields in
- * a meta-data page (used instead of P_INIT). We need to make sure that we
- * retain the page number and LSN of the existing page.
- */
-static void
-__db_init_meta(dbp, p, pgno, pgtype)
- DB *dbp;
- void *p;
- db_pgno_t pgno;
- u_int32_t pgtype;
-{
- DBMETA *meta;
- DB_LSN save_lsn;
-
- meta = (DBMETA *)p;
- save_lsn = meta->lsn;
- memset(meta, 0, sizeof(DBMETA));
- meta->lsn = save_lsn;
- meta->pagesize = dbp->pgsize;
- if (F_ISSET(dbp, DB_AM_CHKSUM))
- FLD_SET(meta->metaflags, DBMETA_CHKSUM);
- meta->pgno = pgno;
- meta->type = (u_int8_t)pgtype;
-}
-
-/*
- * __db_new --
- * Get a new page, preferably from the freelist.
- *
- * PUBLIC: int __db_new __P((DBC *, u_int32_t, DB_LOCK *, PAGE **));
- */
-int
-__db_new(dbc, type, lockp, pagepp)
- DBC *dbc;
- u_int32_t type;
- DB_LOCK *lockp;
- PAGE **pagepp;
-{
- DB *dbp;
- DBMETA *meta;
- DB_LOCK metalock;
- DB_LSN lsn;
- DB_MPOOLFILE *mpf;
- ENV *env;
- PAGE *h;
- db_pgno_t last, *list, pgno, newnext;
- int extend, hash, ret, t_ret;
-
- meta = NULL;
- dbp = dbc->dbp;
- env = dbp->env;
- mpf = dbp->mpf;
- h = NULL;
- newnext = PGNO_INVALID;
- if (lockp != NULL)
- LOCK_INIT(*lockp);
-
- hash = 0;
- ret = 0;
- LOCK_INIT(metalock);
-
-#ifdef HAVE_HASH
- if (dbp->type == DB_HASH) {
- if ((ret = __ham_return_meta(dbc, DB_MPOOL_DIRTY, &meta)) != 0)
- goto err;
- if (meta != NULL)
- hash = 1;
- }
-#endif
- if (meta == NULL) {
- pgno = PGNO_BASE_MD;
- if ((ret = __db_lget(dbc,
- LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
- goto err;
- if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn,
- DB_MPOOL_DIRTY, &meta)) != 0)
- goto err;
- }
-
- last = meta->last_pgno;
- if (meta->free == PGNO_INVALID) {
- if (FLD_ISSET(type, P_DONTEXTEND)) {
- *pagepp = NULL;
- goto err;
- }
- last = pgno = meta->last_pgno + 1;
- ZERO_LSN(lsn);
- extend = 1;
- } else {
- pgno = meta->free;
- /*
- * Lock the new page. Do this here because we must do it
- * before getting the page and the caller may need the lock
- * to keep readers from seeing the page before the transaction
- * commits. We can do this because no one will hold a free
- * page locked.
- */
- if (lockp != NULL && (ret =
- __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, lockp)) != 0)
- goto err;
- if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn,
- DB_MPOOL_DIRTY, &h)) != 0)
- goto err;
-
- /*
- * We want to take the first page off the free list and
- * then set meta->free to the that page's next_pgno, but
- * we need to log the change first.
- */
- newnext = h->next_pgno;
- lsn = h->lsn;
- extend = 0;
- DB_ASSERT(env, TYPE(h) == P_INVALID);
-
- if (TYPE(h) != P_INVALID) {
- __db_errx(env,
- "%s page %lu is on free list with type %lu",
- dbp->fname, (u_long)PGNO(h), (u_long)TYPE(h));
- return (__env_panic(env, EINVAL));
- }
-
- }
-
- FLD_CLR(type, P_DONTEXTEND);
-
- /*
- * Log the allocation before fetching the new page. If we
- * don't have room in the log then we don't want to tell
- * mpool to extend the file.
- */
- if (DBC_LOGGING(dbc)) {
- if ((ret = __db_pg_alloc_log(dbp, dbc->txn, &LSN(meta), 0,
- &LSN(meta), PGNO_BASE_MD, &lsn,
- pgno, (u_int32_t)type, newnext, meta->last_pgno)) != 0)
- goto err;
- } else
- LSN_NOT_LOGGED(LSN(meta));
-
- meta->free = newnext;
-
- if (extend == 1) {
- if (lockp != NULL && (ret =
- __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, lockp)) != 0)
- goto err;
- if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn,
- DB_MPOOL_NEW, &h)) != 0)
- goto err;
- DB_ASSERT(env, last == pgno);
- meta->last_pgno = pgno;
- ZERO_LSN(h->lsn);
- h->pgno = pgno;
- }
- LSN(h) = LSN(meta);
-
- if (hash == 0)
- ret = __memp_fput(mpf, dbc->thread_info, meta, dbc->priority);
- meta = NULL;
- if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0)
- ret = t_ret;
- if (ret != 0)
- goto err;
-
- switch (type) {
- case P_BTREEMETA:
- case P_HASHMETA:
- case P_QAMMETA:
- __db_init_meta(dbp, h, h->pgno, type);
- break;
- default:
- P_INIT(h, dbp->pgsize,
- h->pgno, PGNO_INVALID, PGNO_INVALID, 0, type);
- break;
- }
-
- /* Fix up the sorted free list if necessary. */
-#ifdef HAVE_FTRUNCATE
- if (extend == 0) {
- u_int32_t nelems = 0;
-
- if ((ret = __memp_get_freelist(dbp->mpf, &nelems, &list)) != 0)
- goto err;
- if (nelems != 0) {
- DB_ASSERT(env, h->pgno == list[0]);
- memmove(list, &list[1], (nelems - 1) * sizeof(*list));
- if ((ret = __memp_extend_freelist(
- dbp->mpf, nelems - 1, &list)) != 0)
- goto err;
- }
- }
-#else
- COMPQUIET(list, NULL);
-#endif
-
- *pagepp = h;
- return (0);
-
-err: if (h != NULL)
- (void)__memp_fput(mpf, dbc->thread_info, h, dbc->priority);
- if (meta != NULL && hash == 0)
- (void)__memp_fput(mpf, dbc->thread_info, meta, dbc->priority);
- (void)__TLPUT(dbc, metalock);
- if (lockp != NULL)
- (void)__LPUT(dbc, *lockp);
- return (ret);
-}
-
-/*
- * __db_free --
- * Add a page to the head of the freelist.
- *
- * PUBLIC: int __db_free __P((DBC *, PAGE *));
- */
-int
-__db_free(dbc, h)
- DBC *dbc;
- PAGE *h;
-{
- DB *dbp;
- DBMETA *meta;
- DBT ddbt, ldbt;
- DB_LOCK metalock;
- DB_LSN *lsnp;
- DB_MPOOLFILE *mpf;
- PAGE *prev;
- db_pgno_t last_pgno, next_pgno, pgno, prev_pgno;
- u_int32_t lflag;
- int hash, ret, t_ret;
-#ifdef HAVE_FTRUNCATE
- db_pgno_t *list, *lp;
- u_int32_t nelem, position, start;
- int do_truncate;
-#endif
-
- dbp = dbc->dbp;
- mpf = dbp->mpf;
- prev_pgno = PGNO_INVALID;
- meta = NULL;
- prev = NULL;
- LOCK_INIT(metalock);
-#ifdef HAVE_FTRUNCATE
- lp = NULL;
- nelem = 0;
- do_truncate = 0;
-#endif
-
- /*
- * Retrieve the metadata page. If we are not keeping a sorted
- * free list put the page at the head of the the free list.
- * If we are keeping a sorted free list, for truncation,
- * then figure out where this page belongs and either
- * link it in or truncate the file as much as possible.
- * If either the lock get or page get routines
- * fail, then we need to put the page with which we were called
- * back because our caller assumes we take care of it.
- */
- hash = 0;
-
- pgno = PGNO_BASE_MD;
-#ifdef HAVE_HASH
- if (dbp->type == DB_HASH) {
- if ((ret = __ham_return_meta(dbc,
-#ifdef HAVE_FTRUNCATE
- 0,
-#else
- DB_MPOOL_DIRTY,
-#endif
- &meta)) != 0)
- goto err;
- if (meta != NULL)
- hash = 1;
- }
-#endif
- if (meta == NULL) {
- if ((ret = __db_lget(dbc,
- LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
- goto err;
-
- /* If we support truncate, we might not dirty the meta page. */
- if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn,
-#ifdef HAVE_FTRUNCATE
- 0,
-#else
- DB_MPOOL_DIRTY,
-#endif
- &meta)) != 0)
- goto err1;
- }
-
- last_pgno = meta->last_pgno;
- next_pgno = meta->free;
- /*
- * Assign lsnp here so it always initialized when
- * HAVE_FTRUNCATE is not defined.
- */
- lsnp = &LSN(meta);
-
- DB_ASSERT(dbp->env, h->pgno != next_pgno);
-
-#ifdef HAVE_FTRUNCATE
- /*
- * If we are maintaining a sorted free list see if we either have a
- * new truncation point or the page goes somewhere in the middle of
- * the list. If it goes in the middle of the list, we will drop the
- * meta page and get the previous page.
- */
- if ((ret = __memp_get_freelist(mpf, &nelem, &list)) != 0)
- goto err1;
- if (list == NULL)
- goto no_sort;
-
- if (h->pgno != last_pgno) {
- /*
- * Put the page number in the sorted list.
- * Finds its position and the previous page,
- * extend the list, make room and insert.
- */
- position = 0;
- if (nelem != 0) {
- __db_freelist_pos(h->pgno, list, nelem, &position);
-
- DB_ASSERT(dbp->env, h->pgno != list[position]);
-
- /* Get the previous page if this is not the smallest. */
- if (position != 0 || h->pgno > list[0])
- prev_pgno = list[position];
- }
-
- } else if (nelem != 0) {
- /* Find the truncation point. */
- for (lp = &list[nelem - 1]; lp >= list; lp--)
- if (--last_pgno != *lp)
- break;
- if (lp < list || last_pgno < h->pgno - 1)
- do_truncate = 1;
- last_pgno = meta->last_pgno;
- }
-
-no_sort:
- if (prev_pgno == PGNO_INVALID) {
-#ifdef HAVE_HASH
- if (hash) {
- if ((ret =
- __ham_return_meta(dbc, DB_MPOOL_DIRTY, &meta)) != 0)
- goto err1;
- } else
-#endif
- if ((ret = __memp_dirty(mpf,
- &meta, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0)
- goto err1;
- lsnp = &LSN(meta);
- } else {
- pgno = prev_pgno;
- if ((ret = __memp_fget(mpf, &pgno,
- dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &prev)) != 0)
- goto err1;
- next_pgno = NEXT_PGNO(prev);
- lsnp = &LSN(prev);
- }
-#endif
-
- /*
- * Log the change.
- * We are either logging an update to the metapage or to the
- * previous page in the sorted list.
- */
- if (DBC_LOGGING(dbc)) {
- memset(&ldbt, 0, sizeof(ldbt));
- ldbt.data = h;
- ldbt.size = P_OVERHEAD(dbp);
- /*
- * If we are truncating the file, we need to make sure
- * the logging happens before the truncation. If we
- * are truncating multiple pages we don't need to flush the
- * log here as it will be flushed by __db_truncate_freelist.
- * If we are zeroing pages rather than truncating we still
- * need to flush since they will not have valid LSNs.
- */
- lflag = 0;
-
- if (h->pgno == last_pgno
-#ifdef HAVE_FTRUNCATE
- && do_truncate == 0
-#endif
- )
- lflag = DB_FLUSH;
- switch (h->type) {
- case P_HASH:
- case P_IBTREE:
- case P_IRECNO:
- case P_LBTREE:
- case P_LRECNO:
- case P_LDUP:
- if (h->entries > 0) {
- ldbt.size += h->entries * sizeof(db_indx_t);
- ddbt.data = (u_int8_t *)h + HOFFSET(h);
- ddbt.size = dbp->pgsize - HOFFSET(h);
- if ((ret = __db_pg_freedata_log(dbp, dbc->txn,
- lsnp, lflag,
- h->pgno, lsnp, pgno,
- &ldbt, next_pgno, last_pgno, &ddbt)) != 0)
- goto err1;
- goto logged;
- }
- break;
- case P_HASHMETA:
- ldbt.size = sizeof(HMETA);
- break;
- case P_BTREEMETA:
- ldbt.size = sizeof(BTMETA);
- break;
- case P_OVERFLOW:
- ldbt.size += OV_LEN(h);
- break;
- default:
- DB_ASSERT(dbp->env, h->type != P_QAMDATA);
- }
-
- if ((ret = __db_pg_free_log(dbp,
- dbc->txn, lsnp, lflag, h->pgno,
- lsnp, pgno, &ldbt, next_pgno, last_pgno)) != 0)
- goto err1;
- } else
- LSN_NOT_LOGGED(*lsnp);
-
-logged:
-#ifdef HAVE_FTRUNCATE
- if (do_truncate) {
- start = (u_int32_t) (lp - list) + 1;
- meta->last_pgno--;
- ret = __db_truncate_freelist(
- dbc, meta, h, list, start, nelem);
- h = NULL;
- } else
-#endif
- if (h->pgno == last_pgno) {
- /*
- * We are going to throw this page away, but if we are
- * using MVCC then this version may stick around and we
- * might have to make a copy.
- */
- if (mpf->mfp->multiversion && (ret = __memp_dirty(mpf,
- &h, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0)
- goto err1;
- LSN(h) = *lsnp;
- P_INIT(h, dbp->pgsize,
- h->pgno, PGNO_INVALID, next_pgno, 0, P_INVALID);
- if ((ret = __memp_fput(mpf,
- dbc->thread_info, h, DB_PRIORITY_VERY_LOW)) != 0)
- goto err1;
- h = NULL;
- /* Give the page back to the OS. */
- if ((ret = __memp_ftruncate(mpf, dbc->txn, dbc->thread_info,
- last_pgno, 0)) != 0)
- goto err1;
- DB_ASSERT(dbp->env, meta->pgno == PGNO_BASE_MD);
- meta->last_pgno--;
- h = NULL;
- } else {
-#ifdef HAVE_FTRUNCATE
- if (list != NULL) {
- /* Put the page number into the list. */
- if ((ret =
- __memp_extend_freelist(mpf, nelem + 1, &list)) != 0)
- goto err1;
- if (prev_pgno != PGNO_INVALID)
- lp = &list[position + 1];
- else
- lp = list;
- if (nelem != 0 && position != nelem)
- memmove(lp + 1, lp, (size_t)
- ((u_int8_t*)&list[nelem] - (u_int8_t*)lp));
- *lp = h->pgno;
- }
-#endif
- /*
- * If we are not truncating the page then we
- * reinitialize it and put it at the head of
- * the free list.
- */
- if ((ret = __memp_dirty(mpf,
- &h, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0)
- goto err1;
- LSN(h) = *lsnp;
- P_INIT(h, dbp->pgsize,
- h->pgno, PGNO_INVALID, next_pgno, 0, P_INVALID);
-#ifdef DIAGNOSTIC
- memset((u_int8_t *) h + P_OVERHEAD(dbp),
- CLEAR_BYTE, dbp->pgsize - P_OVERHEAD(dbp));
-#endif
- if (prev_pgno == PGNO_INVALID)
- meta->free = h->pgno;
- else
- NEXT_PGNO(prev) = h->pgno;
- }
-
- /* Discard the metadata or previous page. */
-err1: if (hash == 0 && meta != NULL && (t_ret = __memp_fput(mpf,
- dbc->thread_info, (PAGE *)meta, dbc->priority)) != 0 && ret == 0)
- ret = t_ret;
- if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0)
- ret = t_ret;
- if (prev != (PAGE*) meta && prev != NULL && (t_ret = __memp_fput(mpf,
- dbc->thread_info, prev, dbc->priority)) != 0 && ret == 0)
- ret = t_ret;
-
- /* Discard the caller's page reference. */
-err: if (h != NULL && (t_ret = __memp_fput(mpf,
- dbc->thread_info, h, dbc->priority)) != 0 && ret == 0)
- ret = t_ret;
-
- /*
- * XXX
- * We have to unlock the caller's page in the caller!
- */
- return (ret);
-}
-
-#ifdef HAVE_FTRUNCATE
-/*
- * __db_freelist_pos -- find the position of a page in the freelist.
- * The list is sorted, we do a binary search.
- *
- * PUBLIC: #ifdef HAVE_FTRUNCATE
- * PUBLIC: void __db_freelist_pos __P((db_pgno_t,
- * PUBLIC: db_pgno_t *, u_int32_t, u_int32_t *));
- * PUBLIC: #endif
- */
-void
-__db_freelist_pos(pgno, list, nelem, posp)
- db_pgno_t pgno;
- db_pgno_t *list;
- u_int32_t nelem;
- u_int32_t *posp;
-{
- u_int32_t base, indx, lim;
-
- indx = 0;
- for (base = 0, lim = nelem; lim != 0; lim >>= 1) {
- indx = base + (lim >> 1);
- if (pgno == list[indx]) {
- *posp = indx;
- return;
- }
- if (pgno > list[indx]) {
- base = indx + 1;
- --lim;
- }
- }
- if (base != 0)
- base--;
- *posp = base;
- return;
-}
-
-static int
-__db_pglistcmp(a, b)
- const void *a, *b;
-{
- db_pglist_t *ap, *bp;
-
- ap = (db_pglist_t *)a;
- bp = (db_pglist_t *)b;
-
- return ((ap->pgno > bp->pgno) ? 1 : (ap->pgno < bp->pgno) ? -1: 0);
-}
-
-/*
- * __db_freelist_sort -- sort a list of free pages.
- * PUBLIC: void __db_freelist_sort __P((db_pglist_t *, u_int32_t));
- */
-void
-__db_freelist_sort(list, nelems)
- db_pglist_t *list;
- u_int32_t nelems;
-{
- qsort(list, (size_t)nelems, sizeof(db_pglist_t), __db_pglistcmp);
-}
-
-/*
- * __db_pg_truncate -- find the truncation point in a sorted freelist.
- *
- * PUBLIC: #ifdef HAVE_FTRUNCATE
- * PUBLIC: int __db_pg_truncate __P((DBC *, DB_TXN *,
- * PUBLIC: db_pglist_t *, DB_COMPACT *, u_int32_t *,
- * PUBLIC: db_pgno_t , db_pgno_t *, DB_LSN *, int));
- * PUBLIC: #endif
- */
-int
-__db_pg_truncate(dbc, txn,
- list, c_data, nelemp, free_pgno, last_pgno, lsnp, in_recovery)
- DBC *dbc;
- DB_TXN *txn;
- db_pglist_t *list;
- DB_COMPACT *c_data;
- u_int32_t *nelemp;
- db_pgno_t free_pgno, *last_pgno;
- DB_LSN *lsnp;
- int in_recovery;
-{
- DB *dbp;
- DBT ddbt;
- DB_LSN null_lsn;
- DB_MPOOLFILE *mpf;
- PAGE *h;
- db_pglist_t *lp, *slp;
- db_pgno_t lpgno, pgno;
- u_int32_t elems, log_size, tpoint;
- int last, ret;
-
- ret = 0;
- h = NULL;
-
- dbp = dbc->dbp;
- mpf = dbp->mpf;
- elems = tpoint = *nelemp;
-
- /*
- * Figure out what (if any) pages can be truncated immediately and
- * record the place from which we can truncate, so we can do the
- * memp_ftruncate below. We also use this to avoid ever putting
- * these pages on the freelist, which we are about to relink.
- */
- pgno = *last_pgno;
- lp = &list[elems - 1];
- last = 1;
- while (tpoint != 0) {
- if (lp->pgno != pgno)
- break;
- pgno--;
- tpoint--;
- lp--;
- }
-
- lp = list;
- slp = &list[elems];
- /*
- * Log the sorted list. We log the whole list so it can be rebuilt.
- * Don't overflow the log file.
- */
-again: if (DBC_LOGGING(dbc)) {
- last = 1;
- lpgno = *last_pgno;
- ddbt.size = elems * sizeof(*lp);
- ddbt.data = lp;
- log_size = ((LOG *)dbc->env->
- lg_handle->reginfo.primary)->log_size;
- if (ddbt.size > log_size / 2) {
- elems = (log_size / 2) / sizeof(*lp);
- ddbt.size = elems * sizeof(*lp);
- last = 0;
- /*
- * If we stopped after the truncation point
- * then we need to truncate from here.
- */
- if (lp + elems >= &list[tpoint])
- lpgno = lp[elems - 1].pgno;
- }
- /*
- * If this is not the begining of the list fetch the end
- * of the previous segment. This page becomes the last_free
- * page and will link to this segment if it is not truncated.
- */
- if (lp != list) {
- if ((ret = __memp_fget(mpf, &lp[-1].pgno,
- dbc->thread_info, txn, 0, &h)) != 0)
- goto err;
- }
-
- slp = &lp[elems];
-
- ZERO_LSN(null_lsn);
- if ((ret = __db_pg_trunc_log(dbp, dbc->txn,
- lsnp, last == 1 ? DB_FLUSH : 0, PGNO_BASE_MD,
- lsnp, h != NULL ? PGNO(h) : PGNO_INVALID,
- h != NULL ? &LSN(h) : &null_lsn,
- free_pgno, lpgno, &ddbt)) != 0)
- goto err;
- if (h != NULL) {
- LSN(h) = *lsnp;
- if ((ret = __memp_fput(mpf,
- dbc->thread_info, h, dbc->priority)) != 0)
- goto err;
- }
- h = NULL;
- } else if (!in_recovery)
- LSN_NOT_LOGGED(*lsnp);
-
- for (; lp < slp && lp < &list[tpoint]; lp++) {
- if ((ret = __memp_fget(mpf, &lp->pgno, dbc->thread_info,
- txn, !in_recovery ? DB_MPOOL_DIRTY : 0, &h)) != 0) {
- /* Page may have been truncated later. */
- if (in_recovery && ret == DB_PAGE_NOTFOUND) {
- ret = 0;
- continue;
- }
- goto err;
- }
- if (in_recovery) {
- if (LOG_COMPARE(&LSN(h), &lp->lsn) == 0) {
- if ((ret = __memp_dirty(mpf, &h,
- dbc->thread_info,
- txn, dbp->priority, 0)) != 0) {
- (void)__memp_fput(mpf,
- dbc->thread_info, h, dbp->priority);
- goto err;
- }
- } else
- goto skip;
- }
-
- if (lp == &list[tpoint - 1])
- NEXT_PGNO(h) = PGNO_INVALID;
- else
- NEXT_PGNO(h) = lp[1].pgno;
- DB_ASSERT(mpf->env, NEXT_PGNO(h) < *last_pgno);
-
- LSN(h) = *lsnp;
-skip: if ((ret = __memp_fput(mpf,
- dbc->thread_info, h, dbp->priority)) != 0)
- goto err;
- h = NULL;
- }
-
- /*
- * If we did not log everything try again. We start from slp and
- * try to go to the end of the list.
- */
- if (last == 0) {
- elems = (u_int32_t)(&list[*nelemp] - slp);
- lp = slp;
- goto again;
- }
-
- /*
- * Truncate the file. Its possible that the last page is the
- * only one that got truncated and that's done in the caller.
- */
- if (pgno != *last_pgno) {
- if (tpoint != *nelemp &&
- (ret = __memp_ftruncate(mpf, dbc->txn, dbc->thread_info,
- pgno + 1, in_recovery ? MP_TRUNC_RECOVER : 0)) != 0)
- goto err;
- if (c_data)
- c_data->compact_pages_truncated += *last_pgno - pgno;
- *last_pgno = pgno;
- }
- *nelemp = tpoint;
-
- if (0) {
-err: if (h != NULL)
- (void)__memp_fput(mpf,
- dbc->thread_info, h, dbc->priority);
- }
- return (ret);
-}
-
-/*
- * __db_free_truncate --
- * Build a sorted free list and truncate free pages at the end
- * of the file.
- *
- * PUBLIC: #ifdef HAVE_FTRUNCATE
- * PUBLIC: int __db_free_truncate __P((DB *, DB_THREAD_INFO *, DB_TXN *,
- * PUBLIC: u_int32_t, DB_COMPACT *, db_pglist_t **, u_int32_t *,
- * PUBLIC: db_pgno_t *));
- * PUBLIC: #endif
- */
-int
-__db_free_truncate(dbp, ip, txn, flags, c_data, listp, nelemp, last_pgnop)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- u_int32_t flags;
- DB_COMPACT *c_data;
- db_pglist_t **listp;
- u_int32_t *nelemp;
- db_pgno_t *last_pgnop;
-{
- DBC *dbc;
- DBMETA *meta;
- DB_LOCK metalock;
- DB_MPOOLFILE *mpf;
- ENV *env;
- PAGE *h;
- db_pglist_t *list, *lp;
- db_pgno_t pgno;
- u_int32_t nelems;
- int ret, t_ret;
- size_t size;
-
- COMPQUIET(flags, 0);
- list = NULL;
- meta = NULL;
- env = dbp->env;
- mpf = dbp->mpf;
- h = NULL;
- nelems = 0;
- if (listp != NULL) {
- *listp = NULL;
- DB_ASSERT(env, nelemp != NULL);
- *nelemp = 0;
- }
-
- if ((ret = __db_cursor(dbp, ip, txn, &dbc, DB_WRITELOCK)) != 0)
- return (ret);
-
- pgno = PGNO_BASE_MD;
- if ((ret = __db_lget(dbc,
- LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
- goto err;
- if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, 0,
- &meta)) != 0)
- goto err;
-
- if (last_pgnop != NULL)
- *last_pgnop = meta->last_pgno;
- if ((pgno = meta->free) == PGNO_INVALID)
- goto done;
-
- size = 128;
- if ((ret = __os_malloc(env, size * sizeof(*list), &list)) != 0)
- goto err;
- lp = list;
-
- do {
- if (lp == &list[size]) {
- size *= 2;
- if ((ret = __os_realloc(env,
- size * sizeof(*list), &list)) != 0)
- goto err;
- lp = &list[size / 2];
- }
- if ((ret = __memp_fget(mpf, &pgno,
- dbc->thread_info, dbc->txn, 0, &h)) != 0)
- goto err;
-
- lp->pgno = pgno;
- lp->next_pgno = NEXT_PGNO(h);
- lp->lsn = LSN(h);
- pgno = NEXT_PGNO(h);
- if ((ret = __memp_fput(mpf,
- dbc->thread_info, h, dbc->priority)) != 0)
- goto err;
- lp++;
- } while (pgno != PGNO_INVALID);
- nelems = (u_int32_t)(lp - list);
-
- if ((ret = __memp_dirty(mpf,
- &meta, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0)
- goto err;
-
- /* Sort the list */
- __db_freelist_sort(list, nelems);
-
- if ((ret = __db_pg_truncate(dbc, txn, list, c_data,
- &nelems, meta->free, &meta->last_pgno, &LSN(meta), 0)) != 0)
- goto err;
-
- if (nelems == 0)
- meta->free = PGNO_INVALID;
- else
- meta->free = list[0].pgno;
-
-done: if (last_pgnop != NULL)
- *last_pgnop = meta->last_pgno;
-
- /*
- * The truncate point is the number of pages in the free
- * list back from the last page. The number of pages
- * in the free list are the number that we can swap in.
- */
- if (c_data)
- c_data->compact_truncate = (u_int32_t)meta->last_pgno - nelems;
-
- if (nelems != 0 && listp != NULL) {
- *listp = list;
- *nelemp = nelems;
- list = NULL;
- }
-
-err: if (list != NULL)
- __os_free(env, list);
- if (meta != NULL && (t_ret = __memp_fput(mpf,
- dbc->thread_info, (PAGE *)meta, dbc->priority)) != 0 && ret == 0)
- ret = t_ret;
- if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0)
- ret = t_ret;
- if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
-}
-
-static int
-__db_truncate_freelist(dbc, meta, h, list, start, nelem)
- DBC *dbc;
- DBMETA *meta;
- PAGE *h;
- db_pgno_t *list;
- u_int32_t start, nelem;
-{
- DB *dbp;
- DBT ddbt;
- DB_LSN null_lsn;
- DB_MPOOLFILE *mpf;
- PAGE *last_free, *pg;
- db_pgno_t *lp, free_pgno, lpgno;
- db_pglist_t *plist, *pp, *spp;
- u_int32_t elem, log_size;
- int last, ret;
-
- dbp = dbc->dbp;
- mpf = dbp->mpf;
- plist = NULL;
- last_free = NULL;
- pg = NULL;
-
- if (start != 0 &&
- (ret = __memp_fget(mpf, &list[start - 1],
- dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &last_free)) != 0)
- goto err;
-
- if (DBC_LOGGING(dbc)) {
- if ((ret = __os_malloc(dbp->env,
- (nelem - start) * sizeof(*pp), &plist)) != 0)
- goto err;
-
- pp = plist;
- for (lp = &list[start]; lp < &list[nelem]; lp++) {
- pp->pgno = *lp;
- if ((ret = __memp_fget(mpf, lp,
- dbc->thread_info, dbc->txn, 0, &pg)) != 0)
- goto err;
- pp->lsn = LSN(pg);
- pp->next_pgno = NEXT_PGNO(pg);
- if ((ret = __memp_fput(mpf,
- dbc->thread_info, pg, DB_PRIORITY_VERY_LOW)) != 0)
- goto err;
- pg = NULL;
- pp++;
- }
- ZERO_LSN(null_lsn);
- pp = plist;
- elem = nelem - start;
- log_size = ((LOG *)dbc->env->
- lg_handle->reginfo.primary)->log_size;
-again: ddbt.data = spp = pp;
- free_pgno = pp->pgno;
- lpgno = meta->last_pgno;
- ddbt.size = elem * sizeof(*pp);
- if (ddbt.size > log_size / 2) {
- elem = (log_size / 2) / (u_int32_t)sizeof(*pp);
- ddbt.size = elem * sizeof(*pp);
- pp += elem;
- elem = (nelem - start) - (u_int32_t)(pp - plist);
- lpgno = pp[-1].pgno;
- last = 0;
- } else
- last = 1;
- /*
- * Get the page which will link to this section if we abort.
- * If this is the first segment then its last_free.
- */
- if (spp == plist)
- pg = last_free;
- else if ((ret = __memp_fget(mpf, &spp[-1].pgno,
- dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &pg)) != 0)
- goto err;
-
- if ((ret = __db_pg_trunc_log(dbp, dbc->txn,
- &LSN(meta), last == 1 ? DB_FLUSH : 0,
- PGNO(meta), &LSN(meta),
- pg != NULL ? PGNO(pg) : PGNO_INVALID,
- pg != NULL ? &LSN(pg) : &null_lsn,
- free_pgno, lpgno, &ddbt)) != 0)
- goto err;
- if (pg != NULL) {
- LSN(pg) = LSN(meta);
- if (pg != last_free && (ret = __memp_fput(mpf,
- dbc->thread_info, pg, DB_PRIORITY_VERY_LOW)) != 0)
- goto err;
- pg = NULL;
- }
- if (last == 0)
- goto again;
- } else
- LSN_NOT_LOGGED(LSN(meta));
-
- if ((ret = __memp_fput(mpf,
- dbc->thread_info, h, DB_PRIORITY_VERY_LOW)) != 0)
- goto err;
- h = NULL;
- if ((ret = __memp_ftruncate(mpf, dbc->txn, dbc->thread_info,
- list[start], 0)) != 0)
- goto err;
- meta->last_pgno = list[start] - 1;
-
- if (start == 0)
- meta->free = PGNO_INVALID;
- else {
- NEXT_PGNO(last_free) = PGNO_INVALID;
- if ((ret = __memp_fput(mpf,
- dbc->thread_info, last_free, dbc->priority)) != 0)
- goto err;
- last_free = NULL;
- }
-
- /* Shrink the number of elements in the list. */
- ret = __memp_extend_freelist(mpf, start, &list);
-
-err: if (plist != NULL)
- __os_free(dbp->env, plist);
-
- /* We need to put the page on error. */
- if (h != NULL)
- (void)__memp_fput(mpf, dbc->thread_info, h, dbc->priority);
- if (pg != NULL && pg != last_free)
- (void)__memp_fput(mpf, dbc->thread_info, pg, dbc->priority);
- if (last_free != NULL)
- (void)__memp_fput(mpf,
- dbc->thread_info, last_free, dbc->priority);
-
- return (ret);
-}
-#endif
-
-#ifdef DEBUG
-/*
- * __db_lprint --
- * Print out the list of locks currently held by a cursor.
- *
- * PUBLIC: int __db_lprint __P((DBC *));
- */
-int
-__db_lprint(dbc)
- DBC *dbc;
-{
- DB *dbp;
- DB_LOCKREQ req;
- ENV *env;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- if (LOCKING_ON(env)) {
- req.op = DB_LOCK_DUMP;
- (void)__lock_vec(env, dbc->locker, 0, &req, 1, NULL);
- }
- return (0);
-}
-#endif
-
-/*
- * __db_lget --
- * The standard lock get call.
- *
- * PUBLIC: int __db_lget __P((DBC *,
- * PUBLIC: int, db_pgno_t, db_lockmode_t, u_int32_t, DB_LOCK *));
- */
-int
-__db_lget(dbc, action, pgno, mode, lkflags, lockp)
- DBC *dbc;
- int action;
- db_pgno_t pgno;
- db_lockmode_t mode;
- u_int32_t lkflags;
- DB_LOCK *lockp;
-{
- DB *dbp;
- DB_LOCKREQ couple[3], *reqp;
- DB_TXN *txn;
- ENV *env;
- int has_timeout, i, ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
- txn = dbc->txn;
-
- /*
- * We do not always check if we're configured for locking before
- * calling __db_lget to acquire the lock.
- */
- if (CDB_LOCKING(env) || !LOCKING_ON(env) ||
- (MULTIVERSION(dbp) && mode == DB_LOCK_READ &&
- dbc->txn != NULL && F_ISSET(dbc->txn, TXN_SNAPSHOT)) ||
- F_ISSET(dbc, DBC_DONTLOCK) || (F_ISSET(dbc, DBC_RECOVER) &&
- (action != LCK_ROLLBACK || IS_REP_CLIENT(env))) ||
- (action != LCK_ALWAYS && F_ISSET(dbc, DBC_OPD))) {
- LOCK_INIT(*lockp);
- return (0);
- }
-
- dbc->lock.pgno = pgno;
- if (lkflags & DB_LOCK_RECORD)
- dbc->lock.type = DB_RECORD_LOCK;
- else
- dbc->lock.type = DB_PAGE_LOCK;
- lkflags &= ~DB_LOCK_RECORD;
-
- /*
- * If the transaction enclosing this cursor has DB_LOCK_NOWAIT set,
- * pass that along to the lock call.
- */
- if (DB_NONBLOCK(dbc))
- lkflags |= DB_LOCK_NOWAIT;
-
- if (F_ISSET(dbc, DBC_READ_UNCOMMITTED) && mode == DB_LOCK_READ)
- mode = DB_LOCK_READ_UNCOMMITTED;
-
- has_timeout = F_ISSET(dbc, DBC_RECOVER) ||
- (txn != NULL && F_ISSET(txn, TXN_LOCKTIMEOUT));
-
- /*
- * Transactional locking.
- * Hold on to the previous read lock only if we are in full isolation.
- * COUPLE_ALWAYS indicates we are holding an interior node which need
- * not be isolated.
- * Downgrade write locks if we are supporting dirty readers.
- */
- if ((action != LCK_COUPLE && action != LCK_COUPLE_ALWAYS) ||
- !LOCK_ISSET(*lockp))
- action = 0;
- else if (dbc->txn == NULL || action == LCK_COUPLE_ALWAYS)
- action = LCK_COUPLE;
- else if (F_ISSET(dbc, DBC_READ_COMMITTED | DBC_WAS_READ_COMMITTED) &&
- lockp->mode == DB_LOCK_READ)
- action = LCK_COUPLE;
- else if (lockp->mode == DB_LOCK_READ_UNCOMMITTED)
- action = LCK_COUPLE;
- else if (F_ISSET(dbc->dbp,
- DB_AM_READ_UNCOMMITTED) && lockp->mode == DB_LOCK_WRITE)
- action = LCK_DOWNGRADE;
- else
- action = 0;
-
- i = 0;
- switch (action) {
- default:
- if (has_timeout)
- goto do_couple;
- ret = __lock_get(env,
- dbc->locker, lkflags, &dbc->lock_dbt, mode, lockp);
- break;
-
- case LCK_DOWNGRADE:
- couple[0].op = DB_LOCK_GET;
- couple[0].obj = NULL;
- couple[0].lock = *lockp;
- couple[0].mode = DB_LOCK_WWRITE;
- UMRW_SET(couple[0].timeout);
- i++;
- /* FALLTHROUGH */
- case LCK_COUPLE:
-do_couple: couple[i].op = has_timeout? DB_LOCK_GET_TIMEOUT : DB_LOCK_GET;
- couple[i].obj = &dbc->lock_dbt;
- couple[i].mode = mode;
- UMRW_SET(couple[i].timeout);
- i++;
- if (has_timeout)
- couple[0].timeout =
- F_ISSET(dbc, DBC_RECOVER) ? 0 : txn->lock_timeout;
- if (action == LCK_COUPLE || action == LCK_DOWNGRADE) {
- couple[i].op = DB_LOCK_PUT;
- couple[i].lock = *lockp;
- i++;
- }
-
- ret = __lock_vec(env,
- dbc->locker, lkflags, couple, i, &reqp);
- if (ret == 0 || reqp == &couple[i - 1])
- *lockp = i == 1 ? couple[0].lock : couple[i - 2].lock;
- break;
- }
-
- if (txn != NULL && ret == DB_LOCK_DEADLOCK)
- F_SET(txn, TXN_DEADLOCK);
- return ((ret == DB_LOCK_NOTGRANTED && !F_ISSET(env->dbenv,
- DB_ENV_TIME_NOTGRANTED)) ? DB_LOCK_DEADLOCK : ret);
-}
-
-/*
- * __db_lput --
- * The standard lock put call.
- *
- * PUBLIC: int __db_lput __P((DBC *, DB_LOCK *));
- */
-int
-__db_lput(dbc, lockp)
- DBC *dbc;
- DB_LOCK *lockp;
-{
- DB_LOCKREQ couple[2], *reqp;
- ENV *env;
- int action, ret;
-
- /*
- * Transactional locking.
- * Hold on to the read locks only if we are in full isolation.
- * Downgrade write locks if we are supporting dirty readers.
- */
- if (F_ISSET(dbc->dbp,
- DB_AM_READ_UNCOMMITTED) && lockp->mode == DB_LOCK_WRITE)
- action = LCK_DOWNGRADE;
- else if (dbc->txn == NULL)
- action = LCK_COUPLE;
- else if (F_ISSET(dbc, DBC_READ_COMMITTED | DBC_WAS_READ_COMMITTED) &&
- lockp->mode == DB_LOCK_READ)
- action = LCK_COUPLE;
- else if (lockp->mode == DB_LOCK_READ_UNCOMMITTED)
- action = LCK_COUPLE;
- else
- action = 0;
-
- env = dbc->env;
- switch (action) {
- case LCK_COUPLE:
- ret = __lock_put(env, lockp);
- break;
- case LCK_DOWNGRADE:
- couple[0].op = DB_LOCK_GET;
- couple[0].obj = NULL;
- couple[0].mode = DB_LOCK_WWRITE;
- couple[0].lock = *lockp;
- UMRW_SET(couple[0].timeout);
- couple[1].op = DB_LOCK_PUT;
- couple[1].lock = *lockp;
- ret = __lock_vec(env, dbc->locker, 0, couple, 2, &reqp);
- if (ret == 0 || reqp == &couple[1])
- *lockp = couple[0].lock;
- break;
- default:
- ret = 0;
- break;
- }
-
- return (ret);
-}
diff --git a/db/db_method.c b/db/db_method.c
deleted file mode 100644
index 1182f97..0000000
--- a/db/db_method.c
+++ /dev/null
@@ -1,1052 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1999-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/crypto.h"
-#include "dbinc/db_page.h"
-#include "dbinc/btree.h"
-#include "dbinc/hash.h"
-#include "dbinc/lock.h"
-#include "dbinc/mp.h"
-#include "dbinc/qam.h"
-#include "dbinc/txn.h"
-
-#ifdef HAVE_RPC
-#ifdef HAVE_SYSTEM_INCLUDE_FILES
-#include <rpc/rpc.h>
-#endif
-#include "db_server.h"
-#include "dbinc_auto/rpc_client_ext.h"
-#endif
-
-static int __db_get_byteswapped __P((DB *, int *));
-static int __db_get_dbname __P((DB *, const char **, const char **));
-static DB_ENV *__db_get_env __P((DB *));
-static void __db_get_msgcall
- __P((DB *, void (**)(const DB_ENV *, const char *)));
-static DB_MPOOLFILE *__db_get_mpf __P((DB *));
-static int __db_get_multiple __P((DB *));
-static int __db_get_transactional __P((DB *));
-static int __db_get_type __P((DB *, DBTYPE *dbtype));
-static int __db_init __P((DB *, u_int32_t));
-static int __db_get_alloc __P((DB *, void *(**)(size_t),
- void *(**)(void *, size_t), void (**)(void *)));
-static int __db_set_alloc __P((DB *, void *(*)(size_t),
- void *(*)(void *, size_t), void (*)(void *)));
-static int __db_get_append_recno __P((DB *,
- int (**)(DB *, DBT *, db_recno_t)));
-static int __db_set_append_recno __P((DB *, int (*)(DB *, DBT *, db_recno_t)));
-static int __db_get_cachesize __P((DB *, u_int32_t *, u_int32_t *, int *));
-static int __db_set_cachesize __P((DB *, u_int32_t, u_int32_t, int));
-static int __db_get_create_dir __P((DB *, const char **));
-static int __db_set_create_dir __P((DB *, const char *));
-static int __db_get_dup_compare
- __P((DB *, int (**)(DB *, const DBT *, const DBT *)));
-static int __db_set_dup_compare
- __P((DB *, int (*)(DB *, const DBT *, const DBT *)));
-static int __db_get_encrypt_flags __P((DB *, u_int32_t *));
-static int __db_set_encrypt __P((DB *, const char *, u_int32_t));
-static int __db_get_feedback __P((DB *, void (**)(DB *, int, int)));
-static int __db_set_feedback __P((DB *, void (*)(DB *, int, int)));
-static void __db_map_flags __P((DB *, u_int32_t *, u_int32_t *));
-static int __db_get_pagesize __P((DB *, u_int32_t *));
-static int __db_set_paniccall __P((DB *, void (*)(DB_ENV *, int)));
-static int __db_set_priority __P((DB *, DB_CACHE_PRIORITY));
-static int __db_get_priority __P((DB *, DB_CACHE_PRIORITY *));
-static void __db_get_errcall __P((DB *,
- void (**)(const DB_ENV *, const char *, const char *)));
-static void __db_set_errcall
- __P((DB *, void (*)(const DB_ENV *, const char *, const char *)));
-static void __db_get_errfile __P((DB *, FILE **));
-static void __db_set_errfile __P((DB *, FILE *));
-static void __db_get_errpfx __P((DB *, const char **));
-static void __db_set_errpfx __P((DB *, const char *));
-static void __db_set_msgcall
- __P((DB *, void (*)(const DB_ENV *, const char *)));
-static void __db_get_msgfile __P((DB *, FILE **));
-static void __db_set_msgfile __P((DB *, FILE *));
-static void __dbh_err __P((DB *, int, const char *, ...));
-static void __dbh_errx __P((DB *, const char *, ...));
-
-/*
- * db_create --
- * DB constructor.
- *
- * EXTERN: int db_create __P((DB **, DB_ENV *, u_int32_t));
- */
-int
-db_create(dbpp, dbenv, flags)
- DB **dbpp;
- DB_ENV *dbenv;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int ret;
-
- ip = NULL;
- env = dbenv == NULL ? NULL : dbenv->env;
-
- /* Check for invalid function flags. */
- if (flags != 0)
- return (__db_ferr(env, "db_create", 0));
-
- if (env != NULL)
- ENV_ENTER(env, ip);
- ret = __db_create_internal(dbpp, env, flags);
- if (env != NULL)
- ENV_LEAVE(env, ip);
-
- return (ret);
-}
-
-/*
- * __db_create_internal --
- * DB constructor internal routine.
- *
- * PUBLIC: int __db_create_internal __P((DB **, ENV *, u_int32_t));
- */
-int
-__db_create_internal(dbpp, env, flags)
- DB **dbpp;
- ENV *env;
- u_int32_t flags;
-{
- DB *dbp;
- DB_ENV *dbenv;
- DB_REP *db_rep;
- int ret;
-
- *dbpp = NULL;
-
- /* If we don't have an environment yet, allocate a local one. */
- if (env == NULL) {
- if ((ret = db_env_create(&dbenv, 0)) != 0)
- return (ret);
- env = dbenv->env;
- F_SET(env, ENV_DBLOCAL);
- } else
- dbenv = env->dbenv;
-
- /* Allocate and initialize the DB handle. */
- if ((ret = __os_calloc(env, 1, sizeof(*dbp), &dbp)) != 0)
- goto err;
-
- dbp->dbenv = env->dbenv;
- dbp->env = env;
- if ((ret = __db_init(dbp, flags)) != 0)
- goto err;
-
- MUTEX_LOCK(env, env->mtx_dblist);
- ++env->db_ref;
- MUTEX_UNLOCK(env, env->mtx_dblist);
-
- /*
- * Set the replication timestamp; it's 0 if we're not in a replicated
- * environment. Don't acquire a lock to read the value, even though
- * it's opaque: all we check later is value equality, nothing else.
- */
- dbp->timestamp = REP_ON(env) ?
- ((REGENV *)env->reginfo->primary)->rep_timestamp : 0;
- /*
- * Set the replication generation number for fid management; valid
- * replication generations start at 1. Don't acquire a lock to
- * read the value. All we check later is value equality.
- */
- db_rep = env->rep_handle;
- dbp->fid_gen = REP_ON(env) ? ((REP *)db_rep->region)->gen : 0;
-
- /* If not RPC, open a backing DB_MPOOLFILE handle in the memory pool. */
- if (!RPC_ON(dbenv) && (ret = __memp_fcreate(env, &dbp->mpf)) != 0)
- goto err;
-
- dbp->type = DB_UNKNOWN;
-
- *dbpp = dbp;
- return (0);
-
-err: if (dbp != NULL) {
- if (dbp->mpf != NULL)
- (void)__memp_fclose(dbp->mpf, 0);
- __os_free(env, dbp);
- }
-
- if (F_ISSET(env, ENV_DBLOCAL))
- (void)__env_close(dbp->dbenv, 0);
-
- return (ret);
-}
-
-/*
- * __db_init --
- * Initialize a DB structure.
- */
-static int
-__db_init(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- int ret;
-
- dbp->locker = NULL;
- LOCK_INIT(dbp->handle_lock);
-
- TAILQ_INIT(&dbp->free_queue);
- TAILQ_INIT(&dbp->active_queue);
- TAILQ_INIT(&dbp->join_queue);
- LIST_INIT(&dbp->s_secondaries);
-
- FLD_SET(dbp->am_ok,
- DB_OK_BTREE | DB_OK_HASH | DB_OK_QUEUE | DB_OK_RECNO);
-
- /* DB PUBLIC HANDLE LIST BEGIN */
- dbp->associate = __db_associate_pp;
- dbp->associate_foreign = __db_associate_foreign_pp;
- dbp->close = __db_close_pp;
- dbp->compact = __db_compact_pp;
- dbp->cursor = __db_cursor_pp;
- dbp->del = __db_del_pp;
- dbp->dump = __db_dump_pp;
- dbp->err = __dbh_err;
- dbp->errx = __dbh_errx;
- dbp->exists = __db_exists;
- dbp->fd = __db_fd_pp;
- dbp->get = __db_get_pp;
- dbp->get_alloc = __db_get_alloc;
- dbp->get_append_recno = __db_get_append_recno;
- dbp->get_byteswapped = __db_get_byteswapped;
- dbp->get_cachesize = __db_get_cachesize;
- dbp->get_create_dir = __db_get_create_dir;
- dbp->get_dbname = __db_get_dbname;
- dbp->get_dup_compare = __db_get_dup_compare;
- dbp->get_encrypt_flags = __db_get_encrypt_flags;
- dbp->get_env = __db_get_env;
- dbp->get_errcall = __db_get_errcall;
- dbp->get_errfile = __db_get_errfile;
- dbp->get_errpfx = __db_get_errpfx;
- dbp->get_feedback = __db_get_feedback;
- dbp->get_flags = __db_get_flags;
- dbp->get_lorder = __db_get_lorder;
- dbp->get_mpf = __db_get_mpf;
- dbp->get_msgcall = __db_get_msgcall;
- dbp->get_msgfile = __db_get_msgfile;
- dbp->get_multiple = __db_get_multiple;
- dbp->get_open_flags = __db_get_open_flags;
- dbp->get_partition_dirs = __partition_get_dirs;
- dbp->get_partition_callback = __partition_get_callback;
- dbp->get_partition_keys = __partition_get_keys;
- dbp->get_pagesize = __db_get_pagesize;
- dbp->get_priority = __db_get_priority;
- dbp->get_transactional = __db_get_transactional;
- dbp->get_type = __db_get_type;
- dbp->join = __db_join_pp;
- dbp->key_range = __db_key_range_pp;
- dbp->open = __db_open_pp;
- dbp->pget = __db_pget_pp;
- dbp->put = __db_put_pp;
- dbp->remove = __db_remove_pp;
- dbp->rename = __db_rename_pp;
- dbp->set_alloc = __db_set_alloc;
- dbp->set_append_recno = __db_set_append_recno;
- dbp->set_cachesize = __db_set_cachesize;
- dbp->set_create_dir = __db_set_create_dir;
- dbp->set_dup_compare = __db_set_dup_compare;
- dbp->set_encrypt = __db_set_encrypt;
- dbp->set_errcall = __db_set_errcall;
- dbp->set_errfile = __db_set_errfile;
- dbp->set_errpfx = __db_set_errpfx;
- dbp->set_feedback = __db_set_feedback;
- dbp->set_flags = __db_set_flags;
- dbp->set_lorder = __db_set_lorder;
- dbp->set_msgcall = __db_set_msgcall;
- dbp->set_msgfile = __db_set_msgfile;
- dbp->set_pagesize = __db_set_pagesize;
- dbp->set_paniccall = __db_set_paniccall;
- dbp->set_partition = __partition_set;
- dbp->set_partition_dirs = __partition_set_dirs;
- dbp->set_priority = __db_set_priority;
- dbp->sort_multiple = __db_sort_multiple;
- dbp->stat = __db_stat_pp;
- dbp->stat_print = __db_stat_print_pp;
- dbp->sync = __db_sync_pp;
- dbp->truncate = __db_truncate_pp;
- dbp->upgrade = __db_upgrade_pp;
- dbp->verify = __db_verify_pp;
- /* DB PUBLIC HANDLE LIST END */
-
- /* Access method specific. */
- if ((ret = __bam_db_create(dbp)) != 0)
- return (ret);
- if ((ret = __ham_db_create(dbp)) != 0)
- return (ret);
- if ((ret = __qam_db_create(dbp)) != 0)
- return (ret);
-
-#ifdef HAVE_RPC
- /*
- * RPC specific: must be last, as we replace methods set by the
- * access methods.
- */
- if (RPC_ON(dbp->dbenv)) {
- __dbcl_dbp_init(dbp);
- /*
- * !!!
- * We wrap the DB->open method for RPC, and the rpc.src file
- * can't handle that.
- */
- dbp->open = __dbcl_db_open_wrap;
- if ((ret = __dbcl_db_create(dbp, dbp->dbenv, flags)) != 0)
- return (ret);
- }
-#else
- COMPQUIET(flags, 0);
-#endif
-
- return (0);
-}
-
-/*
- * __dbh_am_chk --
- * Error if an unreasonable method is called.
- *
- * PUBLIC: int __dbh_am_chk __P((DB *, u_int32_t));
- */
-int
-__dbh_am_chk(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- /*
- * We start out allowing any access methods to be called, and as the
- * application calls the methods the options become restricted. The
- * idea is to quit as soon as an illegal method combination is called.
- */
- if ((LF_ISSET(DB_OK_BTREE) && FLD_ISSET(dbp->am_ok, DB_OK_BTREE)) ||
- (LF_ISSET(DB_OK_HASH) && FLD_ISSET(dbp->am_ok, DB_OK_HASH)) ||
- (LF_ISSET(DB_OK_QUEUE) && FLD_ISSET(dbp->am_ok, DB_OK_QUEUE)) ||
- (LF_ISSET(DB_OK_RECNO) && FLD_ISSET(dbp->am_ok, DB_OK_RECNO))) {
- FLD_CLR(dbp->am_ok, ~flags);
- return (0);
- }
-
- __db_errx(dbp->env,
- "call implies an access method which is inconsistent with previous calls");
- return (EINVAL);
-}
-
-/*
- * __dbh_err --
- * Db.err method.
- */
-static void
-#ifdef STDC_HEADERS
-__dbh_err(DB *dbp, int error, const char *fmt, ...)
-#else
-__dbh_err(dbp, error, fmt, va_alist)
- DB *dbp;
- int error;
- const char *fmt;
- va_dcl
-#endif
-{
- /* Message with error string, to stderr by default. */
- DB_REAL_ERR(dbp->dbenv, error, DB_ERROR_SET, 1, fmt);
-}
-
-/*
- * __dbh_errx --
- * Db.errx method.
- */
-static void
-#ifdef STDC_HEADERS
-__dbh_errx(DB *dbp, const char *fmt, ...)
-#else
-__dbh_errx(dbp, fmt, va_alist)
- DB *dbp;
- const char *fmt;
- va_dcl
-#endif
-{
- /* Message without error string, to stderr by default. */
- DB_REAL_ERR(dbp->dbenv, 0, DB_ERROR_NOT_SET, 1, fmt);
-}
-
-/*
- * __db_get_byteswapped --
- * Return if database requires byte swapping.
- */
-static int
-__db_get_byteswapped(dbp, isswapped)
- DB *dbp;
- int *isswapped;
-{
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_byteswapped");
-
- *isswapped = F_ISSET(dbp, DB_AM_SWAP) ? 1 : 0;
- return (0);
-}
-
-/*
- * __db_get_dbname --
- * Get the name of the database as passed to DB->open.
- */
-static int
-__db_get_dbname(dbp, fnamep, dnamep)
- DB *dbp;
- const char **fnamep, **dnamep;
-{
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_dbname");
-
- if (fnamep != NULL)
- *fnamep = dbp->fname;
- if (dnamep != NULL)
- *dnamep = dbp->dname;
- return (0);
-}
-
-/*
- * __db_get_env --
- * Get the DB_ENV handle that was passed to db_create.
- */
-static DB_ENV *
-__db_get_env(dbp)
- DB *dbp;
-{
- return (dbp->dbenv);
-}
-
-/*
- * __db_get_mpf --
- * Get the underlying DB_MPOOLFILE handle.
- */
-static DB_MPOOLFILE *
-__db_get_mpf(dbp)
- DB *dbp;
-{
- return (dbp->mpf);
-}
-
-/*
- * get_multiple --
- * Return whether this DB handle references a physical file with multiple
- * databases.
- */
-static int
-__db_get_multiple(dbp)
- DB *dbp;
-{
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_multiple");
-
- /*
- * Only return TRUE if the handle is for the master database, not for
- * any subdatabase in the physical file. If it's a Btree, with the
- * subdatabases flag set, and the meta-data page has the right value,
- * return TRUE. (We don't need to check it's a Btree, I suppose, but
- * it doesn't hurt.)
- */
- return (dbp->type == DB_BTREE &&
- F_ISSET(dbp, DB_AM_SUBDB) &&
- dbp->meta_pgno == PGNO_BASE_MD ? 1 : 0);
-}
-
-/*
- * get_transactional --
- * Return whether this database was created in a transaction.
- */
-static int
-__db_get_transactional(dbp)
- DB *dbp;
-{
- return (F_ISSET(dbp, DB_AM_TXN) ? 1 : 0);
-}
-
-/*
- * __db_get_type --
- * Return type of underlying database.
- */
-static int
-__db_get_type(dbp, dbtype)
- DB *dbp;
- DBTYPE *dbtype;
-{
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_type");
-
- *dbtype = dbp->type;
- return (0);
-}
-
-/*
- * __db_get_append_recno --
- * Get record number append routine.
- */
-static int
-__db_get_append_recno(dbp, funcp)
- DB *dbp;
- int (**funcp) __P((DB *, DBT *, db_recno_t));
-{
- DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO);
- if (funcp)
- *funcp = dbp->db_append_recno;
-
- return (0);
-}
-/*
- * __db_set_append_recno --
- * Set record number append routine.
- */
-static int
-__db_set_append_recno(dbp, func)
- DB *dbp;
- int (*func) __P((DB *, DBT *, db_recno_t));
-{
- DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_append_recno");
- DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO);
-
- dbp->db_append_recno = func;
-
- return (0);
-}
-
-/*
- * __db_get_cachesize --
- * Get underlying cache size.
- */
-static int
-__db_get_cachesize(dbp, cache_gbytesp, cache_bytesp, ncachep)
- DB *dbp;
- u_int32_t *cache_gbytesp, *cache_bytesp;
- int *ncachep;
-{
- DB_ILLEGAL_IN_ENV(dbp, "DB->get_cachesize");
-
- return (__memp_get_cachesize(dbp->dbenv,
- cache_gbytesp, cache_bytesp, ncachep));
-}
-
-/*
- * __db_set_cachesize --
- * Set underlying cache size.
- */
-static int
-__db_set_cachesize(dbp, cache_gbytes, cache_bytes, ncache)
- DB *dbp;
- u_int32_t cache_gbytes, cache_bytes;
- int ncache;
-{
- DB_ILLEGAL_IN_ENV(dbp, "DB->set_cachesize");
- DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_cachesize");
-
- return (__memp_set_cachesize(
- dbp->dbenv, cache_gbytes, cache_bytes, ncache));
-}
-
-static int
-__db_set_create_dir(dbp, dir)
- DB *dbp;
- const char *dir;
-{
- DB_ENV *dbenv;
- int i;
-
- dbenv = dbp->dbenv;
-
- for (i = 0; i < dbenv->data_next; i++)
- if (strcmp(dir, dbenv->db_data_dir[i]) == 0)
- break;
-
- if (i == dbenv->data_next) {
- __db_errx(dbp->env,
- "Directory %s not in environment list.", dir);
- return (EINVAL);
- }
-
- dbp->dirname = dbenv->db_data_dir[i];
- return (0);
-}
-
-static int
-__db_get_create_dir(dbp, dirp)
- DB *dbp;
- const char **dirp;
-{
- *dirp = dbp->dirname;
- return (0);
-}
-
-/*
- * __db_get_dup_compare --
- * Get duplicate comparison routine.
- */
-static int
-__db_get_dup_compare(dbp, funcp)
- DB *dbp;
- int (**funcp) __P((DB *, const DBT *, const DBT *));
-{
-
- DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE | DB_OK_HASH);
-
- if (funcp != NULL) {
-#ifdef HAVE_COMPRESSION
- if (DB_IS_COMPRESSED(dbp)) {
- *funcp =
- ((BTREE *)dbp->bt_internal)->compress_dup_compare;
- } else
-#endif
- *funcp = dbp->dup_compare;
- }
-
- return (0);
-}
-
-/*
- * __db_set_dup_compare --
- * Set duplicate comparison routine.
- */
-static int
-__db_set_dup_compare(dbp, func)
- DB *dbp;
- int (*func) __P((DB *, const DBT *, const DBT *));
-{
- int ret;
-
- DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_dup_compare");
- DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE | DB_OK_HASH);
-
- if ((ret = __db_set_flags(dbp, DB_DUPSORT)) != 0)
- return (ret);
-
-#ifdef HAVE_COMPRESSION
- if (DB_IS_COMPRESSED(dbp)) {
- dbp->dup_compare = __bam_compress_dupcmp;
- ((BTREE *)dbp->bt_internal)->compress_dup_compare = func;
- } else
-#endif
- dbp->dup_compare = func;
-
- return (0);
-}
-
-/*
- * __db_get_encrypt_flags --
- */
-static int
-__db_get_encrypt_flags(dbp, flagsp)
- DB *dbp;
- u_int32_t *flagsp;
-{
- DB_ILLEGAL_IN_ENV(dbp, "DB->get_encrypt_flags");
-
- return (__env_get_encrypt_flags(dbp->dbenv, flagsp));
-}
-
-/*
- * __db_set_encrypt --
- * Set database passwd.
- */
-static int
-__db_set_encrypt(dbp, passwd, flags)
- DB *dbp;
- const char *passwd;
- u_int32_t flags;
-{
- DB_CIPHER *db_cipher;
- int ret;
-
- DB_ILLEGAL_IN_ENV(dbp, "DB->set_encrypt");
- DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_encrypt");
-
- if ((ret = __env_set_encrypt(dbp->dbenv, passwd, flags)) != 0)
- return (ret);
-
- /*
- * In a real env, this gets initialized with the region. In a local
- * env, we must do it here.
- */
- db_cipher = dbp->env->crypto_handle;
- if (!F_ISSET(db_cipher, CIPHER_ANY) &&
- (ret = db_cipher->init(dbp->env, db_cipher)) != 0)
- return (ret);
-
- return (__db_set_flags(dbp, DB_ENCRYPT));
-}
-
-static void
-__db_get_errcall(dbp, errcallp)
- DB *dbp;
- void (**errcallp) __P((const DB_ENV *, const char *, const char *));
-{
- __env_get_errcall(dbp->dbenv, errcallp);
-}
-
-static void
-__db_set_errcall(dbp, errcall)
- DB *dbp;
- void (*errcall) __P((const DB_ENV *, const char *, const char *));
-{
- __env_set_errcall(dbp->dbenv, errcall);
-}
-
-static void
-__db_get_errfile(dbp, errfilep)
- DB *dbp;
- FILE **errfilep;
-{
- __env_get_errfile(dbp->dbenv, errfilep);
-}
-
-static void
-__db_set_errfile(dbp, errfile)
- DB *dbp;
- FILE *errfile;
-{
- __env_set_errfile(dbp->dbenv, errfile);
-}
-
-static void
-__db_get_errpfx(dbp, errpfxp)
- DB *dbp;
- const char **errpfxp;
-{
- __env_get_errpfx(dbp->dbenv, errpfxp);
-}
-
-static void
-__db_set_errpfx(dbp, errpfx)
- DB *dbp;
- const char *errpfx;
-{
- __env_set_errpfx(dbp->dbenv, errpfx);
-}
-
-static int
-__db_get_feedback(dbp, feedbackp)
- DB *dbp;
- void (**feedbackp) __P((DB *, int, int));
-{
- if (feedbackp != NULL)
- *feedbackp = dbp->db_feedback;
- return (0);
-}
-
-static int
-__db_set_feedback(dbp, feedback)
- DB *dbp;
- void (*feedback) __P((DB *, int, int));
-{
- dbp->db_feedback = feedback;
- return (0);
-}
-
-/*
- * __db_map_flags --
- * Maps between public and internal flag values.
- * This function doesn't check for validity, so it can't fail.
- */
-static void
-__db_map_flags(dbp, inflagsp, outflagsp)
- DB *dbp;
- u_int32_t *inflagsp, *outflagsp;
-{
- COMPQUIET(dbp, NULL);
-
- if (FLD_ISSET(*inflagsp, DB_CHKSUM)) {
- FLD_SET(*outflagsp, DB_AM_CHKSUM);
- FLD_CLR(*inflagsp, DB_CHKSUM);
- }
- if (FLD_ISSET(*inflagsp, DB_ENCRYPT)) {
- FLD_SET(*outflagsp, DB_AM_ENCRYPT | DB_AM_CHKSUM);
- FLD_CLR(*inflagsp, DB_ENCRYPT);
- }
- if (FLD_ISSET(*inflagsp, DB_TXN_NOT_DURABLE)) {
- FLD_SET(*outflagsp, DB_AM_NOT_DURABLE);
- FLD_CLR(*inflagsp, DB_TXN_NOT_DURABLE);
- }
-}
-
-/*
- * __db_get_flags --
- * The DB->get_flags method.
- *
- * PUBLIC: int __db_get_flags __P((DB *, u_int32_t *));
- */
-int
-__db_get_flags(dbp, flagsp)
- DB *dbp;
- u_int32_t *flagsp;
-{
- static const u_int32_t db_flags[] = {
- DB_CHKSUM,
- DB_DUP,
- DB_DUPSORT,
- DB_ENCRYPT,
-#ifdef HAVE_QUEUE
- DB_INORDER,
-#endif
- DB_RECNUM,
- DB_RENUMBER,
- DB_REVSPLITOFF,
- DB_SNAPSHOT,
- DB_TXN_NOT_DURABLE,
- 0
- };
- u_int32_t f, flags, mapped_flag;
- int i;
-
- flags = 0;
- for (i = 0; (f = db_flags[i]) != 0; i++) {
- mapped_flag = 0;
- __db_map_flags(dbp, &f, &mapped_flag);
- __bam_map_flags(dbp, &f, &mapped_flag);
- __ram_map_flags(dbp, &f, &mapped_flag);
-#ifdef HAVE_QUEUE
- __qam_map_flags(dbp, &f, &mapped_flag);
-#endif
- DB_ASSERT(dbp->env, f == 0);
- if (F_ISSET(dbp, mapped_flag) == mapped_flag)
- LF_SET(db_flags[i]);
- }
-
- *flagsp = flags;
- return (0);
-}
-
-/*
- * __db_set_flags --
- * DB->set_flags.
- *
- * PUBLIC: int __db_set_flags __P((DB *, u_int32_t));
- */
-int
-__db_set_flags(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- ENV *env;
- int ret;
-
- env = dbp->env;
-
- if (LF_ISSET(DB_ENCRYPT) && !CRYPTO_ON(env)) {
- __db_errx(env,
- "Database environment not configured for encryption");
- return (EINVAL);
- }
- if (LF_ISSET(DB_TXN_NOT_DURABLE))
- ENV_REQUIRES_CONFIG(env,
- env->tx_handle, "DB_NOT_DURABLE", DB_INIT_TXN);
-
- __db_map_flags(dbp, &flags, &dbp->flags);
-
- if ((ret = __bam_set_flags(dbp, &flags)) != 0)
- return (ret);
- if ((ret = __ram_set_flags(dbp, &flags)) != 0)
- return (ret);
-#ifdef HAVE_QUEUE
- if ((ret = __qam_set_flags(dbp, &flags)) != 0)
- return (ret);
-#endif
-
- return (flags == 0 ? 0 : __db_ferr(env, "DB->set_flags", 0));
-}
-
-/*
- * __db_get_lorder --
- * Get whether lorder is swapped or not.
- *
- * PUBLIC: int __db_get_lorder __P((DB *, int *));
- */
-int
-__db_get_lorder(dbp, db_lorderp)
- DB *dbp;
- int *db_lorderp;
-{
- int ret;
-
- /* Flag if the specified byte order requires swapping. */
- switch (ret = __db_byteorder(dbp->env, 1234)) {
- case 0:
- *db_lorderp = F_ISSET(dbp, DB_AM_SWAP) ? 4321 : 1234;
- break;
- case DB_SWAPBYTES:
- *db_lorderp = F_ISSET(dbp, DB_AM_SWAP) ? 1234 : 4321;
- break;
- default:
- return (ret);
- /* NOTREACHED */
- }
-
- return (0);
-}
-
-/*
- * __db_set_lorder --
- * Set whether lorder is swapped or not.
- *
- * PUBLIC: int __db_set_lorder __P((DB *, int));
- */
-int
-__db_set_lorder(dbp, db_lorder)
- DB *dbp;
- int db_lorder;
-{
- int ret;
-
- DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_lorder");
-
- /* Flag if the specified byte order requires swapping. */
- switch (ret = __db_byteorder(dbp->env, db_lorder)) {
- case 0:
- F_CLR(dbp, DB_AM_SWAP);
- break;
- case DB_SWAPBYTES:
- F_SET(dbp, DB_AM_SWAP);
- break;
- default:
- return (ret);
- /* NOTREACHED */
- }
- return (0);
-}
-
-static int
-__db_get_alloc(dbp, mal_funcp, real_funcp, free_funcp)
- DB *dbp;
- void *(**mal_funcp) __P((size_t));
- void *(**real_funcp) __P((void *, size_t));
- void (**free_funcp) __P((void *));
-{
- DB_ILLEGAL_IN_ENV(dbp, "DB->get_alloc");
-
- return (__env_get_alloc(dbp->dbenv, mal_funcp,
- real_funcp, free_funcp));
-}
-
-static int
-__db_set_alloc(dbp, mal_func, real_func, free_func)
- DB *dbp;
- void *(*mal_func) __P((size_t));
- void *(*real_func) __P((void *, size_t));
- void (*free_func) __P((void *));
-{
- DB_ILLEGAL_IN_ENV(dbp, "DB->set_alloc");
- DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_alloc");
-
- return (__env_set_alloc(dbp->dbenv, mal_func, real_func, free_func));
-}
-
-static void
-__db_get_msgcall(dbp, msgcallp)
- DB *dbp;
- void (**msgcallp) __P((const DB_ENV *, const char *));
-{
- __env_get_msgcall(dbp->dbenv, msgcallp);
-}
-
-static void
-__db_set_msgcall(dbp, msgcall)
- DB *dbp;
- void (*msgcall) __P((const DB_ENV *, const char *));
-{
- __env_set_msgcall(dbp->dbenv, msgcall);
-}
-
-static void
-__db_get_msgfile(dbp, msgfilep)
- DB *dbp;
- FILE **msgfilep;
-{
- __env_get_msgfile(dbp->dbenv, msgfilep);
-}
-
-static void
-__db_set_msgfile(dbp, msgfile)
- DB *dbp;
- FILE *msgfile;
-{
- __env_set_msgfile(dbp->dbenv, msgfile);
-}
-
-static int
-__db_get_pagesize(dbp, db_pagesizep)
- DB *dbp;
- u_int32_t *db_pagesizep;
-{
- *db_pagesizep = dbp->pgsize;
- return (0);
-}
-
-/*
- * __db_set_pagesize --
- * DB->set_pagesize
- *
- * PUBLIC: int __db_set_pagesize __P((DB *, u_int32_t));
- */
-int
-__db_set_pagesize(dbp, db_pagesize)
- DB *dbp;
- u_int32_t db_pagesize;
-{
- DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_pagesize");
-
- if (db_pagesize < DB_MIN_PGSIZE) {
- __db_errx(dbp->env, "page sizes may not be smaller than %lu",
- (u_long)DB_MIN_PGSIZE);
- return (EINVAL);
- }
- if (db_pagesize > DB_MAX_PGSIZE) {
- __db_errx(dbp->env, "page sizes may not be larger than %lu",
- (u_long)DB_MAX_PGSIZE);
- return (EINVAL);
- }
-
- /*
- * We don't want anything that's not a power-of-2, as we rely on that
- * for alignment of various types on the pages.
- */
- if (!POWER_OF_TWO(db_pagesize)) {
- __db_errx(dbp->env, "page sizes must be a power-of-2");
- return (EINVAL);
- }
-
- /*
- * XXX
- * Should we be checking for a page size that's not a multiple of 512,
- * so that we never try and write less than a disk sector?
- */
- dbp->pgsize = db_pagesize;
-
- return (0);
-}
-
-static int
-__db_set_paniccall(dbp, paniccall)
- DB *dbp;
- void (*paniccall) __P((DB_ENV *, int));
-{
- return (__env_set_paniccall(dbp->dbenv, paniccall));
-}
-
-static int
-__db_set_priority(dbp, priority)
- DB *dbp;
- DB_CACHE_PRIORITY priority;
-{
- dbp->priority = priority;
- return (0);
-}
-
-static int
-__db_get_priority(dbp, priority)
- DB *dbp;
- DB_CACHE_PRIORITY *priority;
-{
- *priority = dbp->priority;
- return (0);
-}
diff --git a/db/db_open.c b/db/db_open.c
deleted file mode 100644
index 5c5db09..0000000
--- a/db/db_open.c
+++ /dev/null
@@ -1,628 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_swap.h"
-#include "dbinc/btree.h"
-#include "dbinc/crypto.h"
-#include "dbinc/hmac.h"
-#include "dbinc/fop.h"
-#include "dbinc/hash.h"
-#include "dbinc/lock.h"
-#include "dbinc/log.h"
-#include "dbinc/mp.h"
-#include "dbinc/qam.h"
-#include "dbinc/txn.h"
-
-/*
- * __db_open --
- * DB->open method.
- *
- * This routine gets called in three different ways:
- *
- * 1. It can be called to open a file/database. In this case, subdb will
- * be NULL and meta_pgno will be PGNO_BASE_MD.
- * 2. It can be called to open a subdatabase during normal operation. In
- * this case, name and subname will both be non-NULL and meta_pgno will
- * be PGNO_BASE_MD (also PGNO_INVALID).
- * 3. It can be called to open an in-memory database (name == NULL;
- * subname = name).
- * 4. It can be called during recovery to open a file/database, in which case
- * name will be non-NULL, subname will be NULL, and meta-pgno will be
- * PGNO_BASE_MD.
- * 5. It can be called during recovery to open a subdatabase, in which case
- * name will be non-NULL, subname may be NULL and meta-pgno will be
- * a valid pgno (i.e., not PGNO_BASE_MD).
- * 6. It can be called during recovery to open an in-memory database.
- *
- * PUBLIC: int __db_open __P((DB *, DB_THREAD_INFO *, DB_TXN *,
- * PUBLIC: const char *, const char *, DBTYPE, u_int32_t, int, db_pgno_t));
- */
-int
-__db_open(dbp, ip, txn, fname, dname, type, flags, mode, meta_pgno)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *fname, *dname;
- DBTYPE type;
- u_int32_t flags;
- int mode;
- db_pgno_t meta_pgno;
-{
- DB *tdbp;
- ENV *env;
- int ret;
- u_int32_t id;
-
- env = dbp->env;
- id = TXN_INVALID;
-
- /*
- * We must flush any existing pages before truncating the file
- * since they could age out of mpool and overwrite new pages.
- */
- if (LF_ISSET(DB_TRUNCATE)) {
- if ((ret = __db_create_internal(&tdbp, dbp->env, 0)) != 0)
- goto err;
- ret = __db_open(tdbp, ip, txn, fname, dname, DB_UNKNOWN,
- DB_NOERROR | (flags & ~(DB_TRUNCATE|DB_CREATE)),
- mode, meta_pgno);
- if (ret == 0)
- ret = __memp_ftruncate(tdbp->mpf, txn, ip, 0, 0);
- (void)__db_close(tdbp, txn, DB_NOSYNC);
- if (ret != 0 && ret != ENOENT && ret != EINVAL)
- goto err;
- ret = 0;
- }
-
- DB_TEST_RECOVERY(dbp, DB_TEST_PREOPEN, ret, fname);
-
- /*
- * If the environment was configured with threads, the DB handle
- * must also be free-threaded, so we force the DB_THREAD flag on.
- * (See SR #2033 for why this is a requirement--recovery needs
- * to be able to grab a dbp using __db_fileid_to_dbp, and it has
- * no way of knowing which dbp goes with which thread, so whichever
- * one it finds has to be usable in any of them.)
- */
- if (F_ISSET(env, ENV_THREAD))
- LF_SET(DB_THREAD);
-
- /* Convert any DB->open flags. */
- if (LF_ISSET(DB_RDONLY))
- F_SET(dbp, DB_AM_RDONLY);
- if (LF_ISSET(DB_READ_UNCOMMITTED))
- F_SET(dbp, DB_AM_READ_UNCOMMITTED);
-
- if (IS_REAL_TXN(txn))
- F_SET(dbp, DB_AM_TXN);
-
- /* Fill in the type. */
- dbp->type = type;
-
- /*
- * If both fname and subname are NULL, it's always a create, so make
- * sure that we have both DB_CREATE and a type specified. It would
- * be nice if this checking were done in __db_open where most of the
- * interface checking is done, but this interface (__db_dbopen) is
- * used by the recovery and limbo system, so we need to safeguard
- * this interface as well.
- */
- if (fname == NULL) {
- if (dbp->p_internal != NULL) {
- __db_errx(env,
- "Partitioned databases may not be in memory.");
- return (ENOENT);
- }
- if (dname == NULL) {
- if (!LF_ISSET(DB_CREATE)) {
- __db_errx(env,
- "DB_CREATE must be specified to create databases.");
- return (ENOENT);
- }
-
- F_SET(dbp, DB_AM_INMEM);
- F_SET(dbp, DB_AM_CREATED);
-
- if (dbp->type == DB_UNKNOWN) {
- __db_errx(env,
- "DBTYPE of unknown without existing file");
- return (EINVAL);
- }
-
- if (dbp->pgsize == 0)
- dbp->pgsize = DB_DEF_IOSIZE;
-
- /*
- * If the file is a temporary file and we're
- * doing locking, then we have to create a
- * unique file ID. We can't use our normal
- * dev/inode pair (or whatever this OS uses
- * in place of dev/inode pairs) because no
- * backing file will be created until the
- * mpool cache is filled forcing the buffers
- * to disk. Grab a random locker ID to use
- * as a file ID. The created ID must never
- * match a potential real file ID -- we know
- * it won't because real file IDs contain a
- * time stamp after the dev/inode pair, and
- * we're simply storing a 4-byte value.
-
- * !!!
- * Store the locker in the file id structure
- * -- we can get it from there as necessary,
- * and it saves having two copies.
- */
- if (LOCKING_ON(env) && (ret = __lock_id(env,
- (u_int32_t *)dbp->fileid, NULL)) != 0)
- return (ret);
- } else
- MAKE_INMEM(dbp);
-
- /*
- * Normally we would do handle locking here, however, with
- * in-memory files, we cannot do any database manipulation
- * until the mpool is open, so it happens later.
- */
- } else if (dname == NULL && meta_pgno == PGNO_BASE_MD) {
- /* Open/create the underlying file. Acquire locks. */
- if ((ret = __fop_file_setup(dbp, ip,
- txn, fname, mode, flags, &id)) != 0)
- return (ret);
- } else {
- if (dbp->p_internal != NULL) {
- __db_errx(env,
- "Partitioned databases may not be included with multiple databases.");
- return (ENOENT);
- }
- if ((ret = __fop_subdb_setup(dbp, ip,
- txn, fname, dname, mode, flags)) != 0)
- return (ret);
- meta_pgno = dbp->meta_pgno;
- }
-
- /* Set up the underlying environment. */
- if ((ret = __env_setup(dbp, txn, fname, dname, id, flags)) != 0)
- return (ret);
-
- /* For in-memory databases, we now need to open/create the database. */
- if (F_ISSET(dbp, DB_AM_INMEM)) {
- if (dname == NULL)
- ret = __db_new_file(dbp, ip, txn, NULL, NULL);
- else {
- id = TXN_INVALID;
- if ((ret = __fop_file_setup(dbp, ip,
- txn, dname, mode, flags, &id)) == 0 &&
- DBENV_LOGGING(env) && !F_ISSET(dbp, DB_AM_RECOVER)
-#if !defined(DEBUG_ROP) && !defined(DEBUG_WOP) && !defined(DIAGNOSTIC)
- && txn != NULL
-#endif
-#if !defined(DEBUG_ROP)
- && !F_ISSET(dbp, DB_AM_RDONLY)
-#endif
- )
- ret = __dbreg_log_id(dbp,
- txn, dbp->log_filename->id, 1);
- }
- if (ret != 0)
- goto err;
- }
-
- switch (dbp->type) {
- case DB_BTREE:
- ret = __bam_open(dbp, ip, txn, fname, meta_pgno, flags);
- break;
- case DB_HASH:
- ret = __ham_open(dbp, ip, txn, fname, meta_pgno, flags);
- break;
- case DB_RECNO:
- ret = __ram_open(dbp, ip, txn, fname, meta_pgno, flags);
- break;
- case DB_QUEUE:
- ret = __qam_open(
- dbp, ip, txn, fname, meta_pgno, mode, flags);
- break;
- case DB_UNKNOWN:
- return (
- __db_unknown_type(env, "__db_dbopen", dbp->type));
- }
- if (ret != 0)
- goto err;
-
-#ifdef HAVE_PARTITION
- if (dbp->p_internal != NULL && (ret =
- __partition_open(dbp, ip, txn, fname, type, flags, mode, 1)) != 0)
- goto err;
-#endif
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTOPEN, ret, fname);
-
- /*
- * Temporary files don't need handle locks, so we only have to check
- * for a handle lock downgrade or lockevent in the case of named
- * files.
- */
- if (!F_ISSET(dbp, DB_AM_RECOVER) && (fname != NULL || dname != NULL) &&
- LOCK_ISSET(dbp->handle_lock)) {
- if (IS_REAL_TXN(txn))
- ret = __txn_lockevent(env,
- txn, dbp, &dbp->handle_lock, dbp->locker);
- else if (LOCKING_ON(env))
- /* Trade write handle lock for read handle lock. */
- ret = __lock_downgrade(env,
- &dbp->handle_lock, DB_LOCK_READ, 0);
- }
-DB_TEST_RECOVERY_LABEL
-err:
- return (ret);
-}
-
-/*
- * __db_get_open_flags --
- * Accessor for flags passed into DB->open call
- *
- * PUBLIC: int __db_get_open_flags __P((DB *, u_int32_t *));
- */
-int
-__db_get_open_flags(dbp, flagsp)
- DB *dbp;
- u_int32_t *flagsp;
-{
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_open_flags");
-
- *flagsp = dbp->open_flags;
- return (0);
-}
-
-/*
- * __db_new_file --
- * Create a new database file.
- *
- * PUBLIC: int __db_new_file __P((DB *,
- * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *));
- */
-int
-__db_new_file(dbp, ip, txn, fhp, name)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- DB_FH *fhp;
- const char *name;
-{
- int ret;
-
- switch (dbp->type) {
- case DB_BTREE:
- case DB_RECNO:
- ret = __bam_new_file(dbp, ip, txn, fhp, name);
- break;
- case DB_HASH:
- ret = __ham_new_file(dbp, ip, txn, fhp, name);
- break;
- case DB_QUEUE:
- ret = __qam_new_file(dbp, ip, txn, fhp, name);
- break;
- case DB_UNKNOWN:
- default:
- __db_errx(dbp->env,
- "%s: Invalid type %d specified", name, dbp->type);
- ret = EINVAL;
- break;
- }
-
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOGMETA, ret, name);
- /* Sync the file in preparation for moving it into place. */
- if (ret == 0 && fhp != NULL)
- ret = __os_fsync(dbp->env, fhp);
-
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name);
-
-DB_TEST_RECOVERY_LABEL
- return (ret);
-}
-
-/*
- * __db_init_subdb --
- * Initialize the dbp for a subdb.
- *
- * PUBLIC: int __db_init_subdb __P((DB *,
- * PUBLIC: DB *, const char *, DB_THREAD_INFO *, DB_TXN *));
- */
-int
-__db_init_subdb(mdbp, dbp, name, ip, txn)
- DB *mdbp, *dbp;
- const char *name;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
-{
- DBMETA *meta;
- DB_MPOOLFILE *mpf;
- int ret, t_ret;
-
- ret = 0;
- if (!F_ISSET(dbp, DB_AM_CREATED)) {
- /* Subdb exists; read meta-data page and initialize. */
- mpf = mdbp->mpf;
- if ((ret = __memp_fget(mpf, &dbp->meta_pgno,
- ip, txn, 0, &meta)) != 0)
- goto err;
- ret = __db_meta_setup(mdbp->env, dbp, name, meta, 0, 0);
- if ((t_ret = __memp_fput(mpf,
- ip, meta, dbp->priority)) != 0 && ret == 0)
- ret = t_ret;
- /*
- * If __db_meta_setup found that the meta-page hadn't
- * been written out during recovery, we can just return.
- */
- if (ret == ENOENT)
- ret = 0;
- goto err;
- }
-
- /* Handle the create case here. */
- switch (dbp->type) {
- case DB_BTREE:
- case DB_RECNO:
- ret = __bam_new_subdb(mdbp, dbp, ip, txn);
- break;
- case DB_HASH:
- ret = __ham_new_subdb(mdbp, dbp, ip, txn);
- break;
- case DB_QUEUE:
- ret = EINVAL;
- break;
- case DB_UNKNOWN:
- default:
- __db_errx(dbp->env,
- "Invalid subdatabase type %d specified", dbp->type);
- return (EINVAL);
- }
-
-err: return (ret);
-}
-
-/*
- * __db_chk_meta --
- * Take a buffer containing a meta-data page and check it for a valid LSN,
- * checksum (and verify the checksum if necessary) and possibly decrypt it.
- *
- * Return 0 on success, >0 (errno) on error, -1 on checksum mismatch.
- *
- * PUBLIC: int __db_chk_meta __P((ENV *, DB *, DBMETA *, u_int32_t));
- */
-int
-__db_chk_meta(env, dbp, meta, flags)
- ENV *env;
- DB *dbp;
- DBMETA *meta;
- u_int32_t flags;
-{
- DB_LSN swap_lsn;
- int is_hmac, ret, swapped;
- u_int32_t magic, orig_chk;
- u_int8_t *chksum;
-
- ret = 0;
- swapped = 0;
-
- if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM)) {
- if (dbp != NULL)
- F_SET(dbp, DB_AM_CHKSUM);
-
- is_hmac = meta->encrypt_alg == 0 ? 0 : 1;
- chksum = ((BTMETA *)meta)->chksum;
-
- /*
- * If we need to swap, the checksum function overwrites the
- * original checksum with 0, so we need to save a copy of the
- * original for swapping later.
- */
- orig_chk = *(u_int32_t *)chksum;
-
- /*
- * We cannot add this to __db_metaswap because that gets done
- * later after we've verified the checksum or decrypted.
- */
- if (LF_ISSET(DB_CHK_META)) {
- swapped = 0;
-chk_retry: if ((ret =
- __db_check_chksum(env, NULL, env->crypto_handle,
- chksum, meta, DBMETASIZE, is_hmac)) != 0) {
- if (is_hmac || swapped)
- return (ret);
-
- M_32_SWAP(orig_chk);
- swapped = 1;
- *(u_int32_t *)chksum = orig_chk;
- goto chk_retry;
- }
- }
- } else if (dbp != NULL)
- F_CLR(dbp, DB_AM_CHKSUM);
-
-#ifdef HAVE_CRYPTO
- ret = __crypto_decrypt_meta(env,
- dbp, (u_int8_t *)meta, LF_ISSET(DB_CHK_META));
-#endif
-
- /* Now that we're decrypted, we can check LSN. */
- if (LOGGING_ON(env) && !LF_ISSET(DB_CHK_NOLSN)) {
- /*
- * This gets called both before and after swapping, so we
- * need to check ourselves. If we already swapped it above,
- * we'll know that here.
- */
-
- swap_lsn = meta->lsn;
- magic = meta->magic;
-lsn_retry:
- if (swapped) {
- M_32_SWAP(swap_lsn.file);
- M_32_SWAP(swap_lsn.offset);
- M_32_SWAP(magic);
- }
- switch (magic) {
- case DB_BTREEMAGIC:
- case DB_HASHMAGIC:
- case DB_QAMMAGIC:
- case DB_RENAMEMAGIC:
- break;
- default:
- if (swapped)
- return (EINVAL);
- swapped = 1;
- goto lsn_retry;
- }
- if (!IS_REP_CLIENT(env) &&
- !IS_NOT_LOGGED_LSN(swap_lsn) && !IS_ZERO_LSN(swap_lsn))
- /* Need to do check. */
- ret = __log_check_page_lsn(env, dbp, &swap_lsn);
- }
- return (ret);
-}
-
-/*
- * __db_meta_setup --
- *
- * Take a buffer containing a meta-data page and figure out if it's
- * valid, and if so, initialize the dbp from the meta-data page.
- *
- * PUBLIC: int __db_meta_setup __P((ENV *,
- * PUBLIC: DB *, const char *, DBMETA *, u_int32_t, u_int32_t));
- */
-int
-__db_meta_setup(env, dbp, name, meta, oflags, flags)
- ENV *env;
- DB *dbp;
- const char *name;
- DBMETA *meta;
- u_int32_t oflags;
- u_int32_t flags;
-{
- u_int32_t magic;
- int ret;
-
- ret = 0;
-
- /*
- * Figure out what access method we're dealing with, and then
- * call access method specific code to check error conditions
- * based on conflicts between the found file and application
- * arguments. A found file overrides some user information --
- * we don't consider it an error, for example, if the user set
- * an expected byte order and the found file doesn't match it.
- */
- F_CLR(dbp, DB_AM_SWAP | DB_AM_IN_RENAME);
- magic = meta->magic;
-
-swap_retry:
- switch (magic) {
- case DB_BTREEMAGIC:
- case DB_HASHMAGIC:
- case DB_QAMMAGIC:
- case DB_RENAMEMAGIC:
- break;
- case 0:
- /*
- * The only time this should be 0 is if we're in the
- * midst of opening a subdb during recovery and that
- * subdatabase had its meta-data page allocated, but
- * not yet initialized.
- */
- if (F_ISSET(dbp, DB_AM_SUBDB) && ((IS_RECOVERING(env) &&
- F_ISSET(env->lg_handle, DBLOG_FORCE_OPEN)) ||
- meta->pgno != PGNO_INVALID))
- return (ENOENT);
-
- goto bad_format;
- default:
- if (F_ISSET(dbp, DB_AM_SWAP))
- goto bad_format;
-
- M_32_SWAP(magic);
- F_SET(dbp, DB_AM_SWAP);
- goto swap_retry;
- }
-
- /*
- * We can only check the meta page if we are sure we have a meta page.
- * If it is random data, then this check can fail. So only now can we
- * checksum and decrypt. Don't distinguish between configuration and
- * checksum match errors here, because we haven't opened the database
- * and even a checksum error isn't a reason to panic the environment.
- */
- if ((ret = __db_chk_meta(env, dbp, meta, flags)) != 0) {
- if (ret == -1)
- __db_errx(env,
- "%s: metadata page checksum error", name);
- goto bad_format;
- }
-
- switch (magic) {
- case DB_BTREEMAGIC:
- if (dbp->type != DB_UNKNOWN &&
- dbp->type != DB_RECNO && dbp->type != DB_BTREE)
- goto bad_format;
-
- flags = meta->flags;
- if (F_ISSET(dbp, DB_AM_SWAP))
- M_32_SWAP(flags);
- if (LF_ISSET(BTM_RECNO))
- dbp->type = DB_RECNO;
- else
- dbp->type = DB_BTREE;
- if ((oflags & DB_TRUNCATE) == 0 && (ret =
- __bam_metachk(dbp, name, (BTMETA *)meta)) != 0)
- return (ret);
- break;
- case DB_HASHMAGIC:
- if (dbp->type != DB_UNKNOWN && dbp->type != DB_HASH)
- goto bad_format;
-
- dbp->type = DB_HASH;
- if ((oflags & DB_TRUNCATE) == 0 && (ret =
- __ham_metachk(dbp, name, (HMETA *)meta)) != 0)
- return (ret);
- break;
- case DB_QAMMAGIC:
- if (dbp->type != DB_UNKNOWN && dbp->type != DB_QUEUE)
- goto bad_format;
- dbp->type = DB_QUEUE;
- if ((oflags & DB_TRUNCATE) == 0 && (ret =
- __qam_metachk(dbp, name, (QMETA *)meta)) != 0)
- return (ret);
- break;
- case DB_RENAMEMAGIC:
- F_SET(dbp, DB_AM_IN_RENAME);
-
- /* Copy the file's ID. */
- memcpy(dbp->fileid, ((DBMETA *)meta)->uid, DB_FILE_ID_LEN);
-
- break;
- default:
- goto bad_format;
- }
-
- if (FLD_ISSET(meta->metaflags,
- DBMETA_PART_RANGE | DBMETA_PART_CALLBACK))
- if ((ret =
- __partition_init(dbp, meta->metaflags)) != 0)
- return (ret);
- return (0);
-
-bad_format:
- if (F_ISSET(dbp, DB_AM_RECOVER))
- ret = ENOENT;
- else
- __db_errx(env,
- "__db_meta_setup: %s: unexpected file type or format",
- name);
- return (ret == 0 ? EINVAL : ret);
-}
diff --git a/db/db_overflow.c b/db/db_overflow.c
deleted file mode 100644
index a718182..0000000
--- a/db/db_overflow.c
+++ /dev/null
@@ -1,706 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Mike Olson.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_am.h"
-#include "dbinc/mp.h"
-
-/*
- * Big key/data code.
- *
- * Big key and data entries are stored on linked lists of pages. The initial
- * reference is a structure with the total length of the item and the page
- * number where it begins. Each entry in the linked list contains a pointer
- * to the next page of data, and so on.
- */
-
-/*
- * __db_goff --
- * Get an offpage item.
- *
- * PUBLIC: int __db_goff __P((DBC *,
- * PUBLIC: DBT *, u_int32_t, db_pgno_t, void **, u_int32_t *));
- */
-int
-__db_goff(dbc, dbt, tlen, pgno, bpp, bpsz)
- DBC *dbc;
- DBT *dbt;
- u_int32_t tlen;
- db_pgno_t pgno;
- void **bpp;
- u_int32_t *bpsz;
-{
- DB *dbp;
- DB_MPOOLFILE *mpf;
- DB_TXN *txn;
- DBC_INTERNAL *cp;
- ENV *env;
- PAGE *h;
- DB_THREAD_INFO *ip;
- db_indx_t bytes;
- u_int32_t curoff, needed, start;
- u_int8_t *p, *src;
- int ret;
-
- dbp = dbc->dbp;
- cp = dbc->internal;
- env = dbp->env;
- ip = dbc->thread_info;
- mpf = dbp->mpf;
- txn = dbc->txn;
-
- /*
- * Check if the buffer is big enough; if it is not and we are
- * allowed to malloc space, then we'll malloc it. If we are
- * not (DB_DBT_USERMEM), then we'll set the dbt and return
- * appropriately.
- */
- if (F_ISSET(dbt, DB_DBT_PARTIAL)) {
- start = dbt->doff;
- if (start > tlen)
- needed = 0;
- else if (dbt->dlen > tlen - start)
- needed = tlen - start;
- else
- needed = dbt->dlen;
- } else {
- start = 0;
- needed = tlen;
- }
-
- /*
- * If the caller has not requested any data, return success. This
- * "early-out" also avoids setting up the streaming optimization when
- * no page would be retrieved. If it were removed, the streaming code
- * should only initialize when needed is not 0.
- */
- if (needed == 0) {
- dbt->size = 0;
- return (0);
- }
-
- if (F_ISSET(dbt, DB_DBT_USERCOPY))
- goto skip_alloc;
-
- /* Allocate any necessary memory. */
- if (F_ISSET(dbt, DB_DBT_USERMEM)) {
- if (needed > dbt->ulen) {
- dbt->size = needed;
- return (DB_BUFFER_SMALL);
- }
- } else if (F_ISSET(dbt, DB_DBT_MALLOC)) {
- if ((ret = __os_umalloc(env, needed, &dbt->data)) != 0)
- return (ret);
- } else if (F_ISSET(dbt, DB_DBT_REALLOC)) {
- if ((ret = __os_urealloc(env, needed, &dbt->data)) != 0)
- return (ret);
- } else if (bpsz != NULL && (*bpsz == 0 || *bpsz < needed)) {
- if ((ret = __os_realloc(env, needed, bpp)) != 0)
- return (ret);
- *bpsz = needed;
- dbt->data = *bpp;
- } else if (bpp != NULL)
- dbt->data = *bpp;
- else {
- DB_ASSERT(env,
- F_ISSET(dbt,
- DB_DBT_USERMEM | DB_DBT_MALLOC | DB_DBT_REALLOC) ||
- bpsz != NULL || bpp != NULL);
- return (DB_BUFFER_SMALL);
- }
-
-skip_alloc:
- /* Set up a start page in the overflow chain if streaming. */
- if (cp->stream_start_pgno != PGNO_INVALID &&
- pgno == cp->stream_start_pgno && start >= cp->stream_off &&
- start < cp->stream_off + P_MAXSPACE(dbp, dbp->pgsize)) {
- pgno = cp->stream_curr_pgno;
- curoff = cp->stream_off;
- } else {
- cp->stream_start_pgno = cp->stream_curr_pgno = pgno;
- cp->stream_off = curoff = 0;
- }
-
- /*
- * Step through the linked list of pages, copying the data on each
- * one into the buffer. Never copy more than the total data length.
- */
- dbt->size = needed;
- for (p = dbt->data; pgno != PGNO_INVALID && needed > 0;) {
- if ((ret = __memp_fget(mpf,
- &pgno, ip, txn, 0, &h)) != 0)
- return (ret);
- DB_ASSERT(env, TYPE(h) == P_OVERFLOW);
-
- /* Check if we need any bytes from this page. */
- if (curoff + OV_LEN(h) >= start) {
- bytes = OV_LEN(h);
- src = (u_int8_t *)h + P_OVERHEAD(dbp);
- if (start > curoff) {
- src += start - curoff;
- bytes -= start - curoff;
- }
- if (bytes > needed)
- bytes = needed;
- if (F_ISSET(dbt, DB_DBT_USERCOPY)) {
- /*
- * The offset into the DBT is the total size
- * less the amount of data still needed. Care
- * needs to be taken if doing a partial copy
- * beginning at an offset other than 0.
- */
- if ((ret = env->dbt_usercopy(
- dbt, dbt->size - needed,
- src, bytes, DB_USERCOPY_SETDATA)) != 0) {
- (void)__memp_fput(mpf,
- ip, h, dbp->priority);
- return (ret);
- }
- } else
- memcpy(p, src, bytes);
- p += bytes;
- needed -= bytes;
- }
- cp->stream_off = curoff;
- curoff += OV_LEN(h);
- cp->stream_curr_pgno = pgno;
- pgno = h->next_pgno;
- (void)__memp_fput(mpf, ip, h, dbp->priority);
- }
-
- return (0);
-}
-
-/*
- * __db_poff --
- * Put an offpage item.
- *
- * PUBLIC: int __db_poff __P((DBC *, const DBT *, db_pgno_t *));
- */
-int
-__db_poff(dbc, dbt, pgnop)
- DBC *dbc;
- const DBT *dbt;
- db_pgno_t *pgnop;
-{
- DB *dbp;
- DBT tmp_dbt;
- DB_LSN null_lsn;
- DB_MPOOLFILE *mpf;
- PAGE *pagep, *lastp;
- db_indx_t pagespace;
- db_pgno_t pgno;
- u_int32_t space, sz, tlen;
- u_int8_t *p;
- int ret, t_ret;
-
- /*
- * Allocate pages and copy the key/data item into them. Calculate the
- * number of bytes we get for pages we fill completely with a single
- * item.
- */
- dbp = dbc->dbp;
- lastp = NULL;
- mpf = dbp->mpf;
- pagespace = P_MAXSPACE(dbp, dbp->pgsize);
- p = dbt->data;
- sz = dbt->size;
-
- /*
- * Check whether we are streaming at the end of the overflow item.
- * If so, the last pgno and offset will be cached in the cursor.
- */
- if (F_ISSET(dbt, DB_DBT_STREAMING)) {
- tlen = dbt->size - dbt->dlen;
- pgno = dbc->internal->stream_curr_pgno;
- if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info,
- dbc->txn, DB_MPOOL_DIRTY, &lastp)) != 0)
- return (ret);
-
- /*
- * Calculate how much we can write on the last page of the
- * overflow item.
- */
- DB_ASSERT(dbp->env,
- OV_LEN(lastp) == (tlen - dbc->internal->stream_off));
- space = pagespace - OV_LEN(lastp);
-
- /* Only copy as much data as we have. */
- if (space > dbt->dlen)
- space = dbt->dlen;
-
- if (DBC_LOGGING(dbc)) {
- tmp_dbt.data = dbt->data;
- tmp_dbt.size = space;
- ZERO_LSN(null_lsn);
- if ((ret = __db_big_log(dbp, dbc->txn,
- &LSN(lastp), 0, DB_APPEND_BIG, pgno,
- PGNO_INVALID, PGNO_INVALID, &tmp_dbt,
- &LSN(lastp), &null_lsn, &null_lsn)) != 0)
- goto err;
- } else
- LSN_NOT_LOGGED(LSN(lastp));
-
- memcpy((u_int8_t *)lastp + P_OVERHEAD(dbp) + OV_LEN(lastp),
- dbt->data, space);
- OV_LEN(lastp) += space;
- sz -= space + dbt->doff;
- p += space;
- *pgnop = dbc->internal->stream_start_pgno;
- }
-
- ret = 0;
- for (; sz > 0; p += pagespace, sz -= pagespace) {
- /*
- * Reduce pagespace so we terminate the loop correctly and
- * don't copy too much data.
- */
- if (sz < pagespace)
- pagespace = sz;
-
- /*
- * Allocate and initialize a new page and copy all or part of
- * the item onto the page. If sz is less than pagespace, we
- * have a partial record.
- */
- if ((ret = __db_new(dbc, P_OVERFLOW, NULL, &pagep)) != 0)
- break;
- if (DBC_LOGGING(dbc)) {
- tmp_dbt.data = p;
- tmp_dbt.size = pagespace;
- ZERO_LSN(null_lsn);
- if ((ret = __db_big_log(dbp, dbc->txn,
- &LSN(pagep), 0, DB_ADD_BIG, PGNO(pagep),
- lastp ? PGNO(lastp) : PGNO_INVALID,
- PGNO_INVALID, &tmp_dbt, &LSN(pagep),
- lastp == NULL ? &null_lsn : &LSN(lastp),
- &null_lsn)) != 0) {
- (void)__memp_fput(mpf, dbc->thread_info,
- pagep, dbc->priority);
- goto err;
- }
- } else
- LSN_NOT_LOGGED(LSN(pagep));
-
- /* Move LSN onto page. */
- if (lastp != NULL)
- LSN(lastp) = LSN(pagep);
-
- OV_LEN(pagep) = pagespace;
- OV_REF(pagep) = 1;
- memcpy((u_int8_t *)pagep + P_OVERHEAD(dbp), p, pagespace);
-
- /*
- * If this is the first entry, update the user's info and
- * initialize the cursor to allow for streaming of subsequent
- * updates. Otherwise, update the entry on the last page
- * filled in and release that page.
- */
- if (lastp == NULL) {
- *pgnop = PGNO(pagep);
- dbc->internal->stream_start_pgno =
- dbc->internal->stream_curr_pgno = *pgnop;
- dbc->internal->stream_off = 0;
- } else {
- lastp->next_pgno = PGNO(pagep);
- pagep->prev_pgno = PGNO(lastp);
- if ((ret = __memp_fput(mpf,
- dbc->thread_info, lastp, dbc->priority)) != 0) {
- lastp = NULL;
- goto err;
- }
- }
- lastp = pagep;
- }
-err: if (lastp != NULL) {
- if (ret == 0) {
- dbc->internal->stream_curr_pgno = PGNO(lastp);
- dbc->internal->stream_off = dbt->size - OV_LEN(lastp);
- }
-
- if ((t_ret = __memp_fput(mpf, dbc->thread_info, lastp,
- dbc->priority)) != 0 && ret == 0)
- ret = t_ret;
- }
- return (ret);
-}
-
-/*
- * __db_ovref --
- * Decrement the reference count on an overflow page.
- *
- * PUBLIC: int __db_ovref __P((DBC *, db_pgno_t));
- */
-int
-__db_ovref(dbc, pgno)
- DBC *dbc;
- db_pgno_t pgno;
-{
- DB *dbp;
- DB_MPOOLFILE *mpf;
- PAGE *h;
- int ret;
-
- dbp = dbc->dbp;
- mpf = dbp->mpf;
-
- if ((ret = __memp_fget(mpf, &pgno,
- dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &h)) != 0)
- return (ret);
-
- if (DBC_LOGGING(dbc)) {
- if ((ret = __db_ovref_log(dbp,
- dbc->txn, &LSN(h), 0, h->pgno, -1, &LSN(h))) != 0) {
- (void)__memp_fput(mpf,
- dbc->thread_info, h, dbc->priority);
- return (ret);
- }
- } else
- LSN_NOT_LOGGED(LSN(h));
-
- /*
- * In BDB releases before 4.5, the overflow reference counts were
- * incremented when an overflow item was split onto an internal
- * page. There was a lock race in that code, and rather than fix
- * the race, we changed BDB to copy overflow items when splitting
- * them onto internal pages. The code to decrement reference
- * counts remains so databases already in the field continue to
- * work.
- */
- --OV_REF(h);
-
- return (__memp_fput(mpf, dbc->thread_info, h, dbc->priority));
-}
-
-/*
- * __db_doff --
- * Delete an offpage chain of overflow pages.
- *
- * PUBLIC: int __db_doff __P((DBC *, db_pgno_t));
- */
-int
-__db_doff(dbc, pgno)
- DBC *dbc;
- db_pgno_t pgno;
-{
- DB *dbp;
- DBT tmp_dbt;
- DB_LSN null_lsn;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- int ret;
-
- dbp = dbc->dbp;
- mpf = dbp->mpf;
-
- do {
- if ((ret = __memp_fget(mpf, &pgno,
- dbc->thread_info, dbc->txn, 0, &pagep)) != 0)
- return (ret);
-
- DB_ASSERT(dbp->env, TYPE(pagep) == P_OVERFLOW);
- /*
- * If it's referenced by more than one key/data item,
- * decrement the reference count and return.
- */
- if (OV_REF(pagep) > 1) {
- (void)__memp_fput(mpf,
- dbc->thread_info, pagep, dbc->priority);
- return (__db_ovref(dbc, pgno));
- }
-
- if ((ret = __memp_dirty(mpf, &pagep,
- dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) {
- if (pagep != NULL)
- (void)__memp_fput(mpf,
- dbc->thread_info, pagep, dbc->priority);
- return (ret);
- }
-
- if (DBC_LOGGING(dbc)) {
- tmp_dbt.data = (u_int8_t *)pagep + P_OVERHEAD(dbp);
- tmp_dbt.size = OV_LEN(pagep);
- ZERO_LSN(null_lsn);
- if ((ret = __db_big_log(dbp, dbc->txn,
- &LSN(pagep), 0, DB_REM_BIG,
- PGNO(pagep), PREV_PGNO(pagep),
- NEXT_PGNO(pagep), &tmp_dbt,
- &LSN(pagep), &null_lsn, &null_lsn)) != 0) {
- (void)__memp_fput(mpf,
- dbc->thread_info, pagep, dbc->priority);
- return (ret);
- }
- } else
- LSN_NOT_LOGGED(LSN(pagep));
- pgno = pagep->next_pgno;
- OV_LEN(pagep) = 0;
- if ((ret = __db_free(dbc, pagep)) != 0)
- return (ret);
- } while (pgno != PGNO_INVALID);
-
- return (0);
-}
-
-/*
- * __db_moff --
- * Match on overflow pages.
- *
- * Given a starting page number and a key, return <0, 0, >0 to indicate if the
- * key on the page is less than, equal to or greater than the key specified.
- * We optimize this by doing chunk at a time comparison unless the user has
- * specified a comparison function. In this case, we need to materialize
- * the entire object and call their comparison routine.
- *
- * __db_moff and __db_coff are generic functions useful in searching and
- * ordering off page items. __db_moff matches an overflow DBT with an offpage
- * item. __db_coff compares two offpage items for lexicographic sort order.
- *
- * PUBLIC: int __db_moff __P((DBC *, const DBT *, db_pgno_t, u_int32_t,
- * PUBLIC: int (*)(DB *, const DBT *, const DBT *), int *));
- */
-int
-__db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp)
- DBC *dbc;
- const DBT *dbt;
- db_pgno_t pgno;
- u_int32_t tlen;
- int (*cmpfunc) __P((DB *, const DBT *, const DBT *)), *cmpp;
-{
- DB *dbp;
- DBT local_dbt;
- DB_MPOOLFILE *mpf;
- DB_THREAD_INFO *ip;
- PAGE *pagep;
- void *buf;
- u_int32_t bufsize, cmp_bytes, key_left;
- u_int8_t *p1, *p2;
- int ret;
-
- dbp = dbc->dbp;
- ip = dbc->thread_info;
- mpf = dbp->mpf;
-
- /*
- * If there is a user-specified comparison function, build a
- * contiguous copy of the key, and call it.
- */
- if (cmpfunc != NULL) {
- memset(&local_dbt, 0, sizeof(local_dbt));
- buf = NULL;
- bufsize = 0;
-
- if ((ret = __db_goff(dbc,
- &local_dbt, tlen, pgno, &buf, &bufsize)) != 0)
- return (ret);
- /* Pass the key as the first argument */
- *cmpp = cmpfunc(dbp, dbt, &local_dbt);
- __os_free(dbp->env, buf);
- return (0);
- }
-
- /* While there are both keys to compare. */
- for (*cmpp = 0, p1 = dbt->data,
- key_left = dbt->size; key_left > 0 && pgno != PGNO_INVALID;) {
- if ((ret =
- __memp_fget(mpf, &pgno, ip, dbc->txn, 0, &pagep)) != 0)
- return (ret);
-
- cmp_bytes = OV_LEN(pagep) < key_left ? OV_LEN(pagep) : key_left;
- tlen -= cmp_bytes;
- key_left -= cmp_bytes;
- for (p2 = (u_int8_t *)pagep + P_OVERHEAD(dbp);
- cmp_bytes-- > 0; ++p1, ++p2)
- if (*p1 != *p2) {
- *cmpp = (long)*p1 - (long)*p2;
- break;
- }
- pgno = NEXT_PGNO(pagep);
- if ((ret = __memp_fput(mpf, ip, pagep, dbp->priority)) != 0)
- return (ret);
- if (*cmpp != 0)
- return (0);
- }
- if (key_left > 0) /* DBT is longer than the page key. */
- *cmpp = 1;
- else if (tlen > 0) /* DBT is shorter than the page key. */
- *cmpp = -1;
- else
- *cmpp = 0;
-
- return (0);
-}
-
-/*
- * __db_coff --
- * Match two offpage dbts.
- *
- * The DBTs must both refer to offpage items.
- * The match happens a chunk (page) at a time unless a user defined comparison
- * function exists. It is not possible to optimize this comparison away when
- * a lexicographic sort order is required on mismatch.
- *
- * NOTE: For now this function only works for H_OFFPAGE type items. It would
- * be simple to extend it for use with B_OVERFLOW type items. It would only
- * require extracting the total length, and page number, dependent on the
- * DBT type.
- *
- * PUBLIC: int __db_coff __P((DBC *, const DBT *, const DBT *,
- * PUBLIC: int (*)(DB *, const DBT *, const DBT *), int *));
- */
-int
-__db_coff(dbc, dbt, match, cmpfunc, cmpp)
- DBC *dbc;
- const DBT *dbt, *match;
- int (*cmpfunc) __P((DB *, const DBT *, const DBT *)), *cmpp;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_MPOOLFILE *mpf;
- DB_TXN *txn;
- DBT local_key, local_match;
- PAGE *dbt_pagep, *match_pagep;
- db_pgno_t dbt_pgno, match_pgno;
- u_int32_t cmp_bytes, dbt_bufsz, dbt_len, match_bufsz;
- u_int32_t match_len, max_data, page_sz;
- u_int8_t *p1, *p2;
- int ret;
- void *dbt_buf, *match_buf;
-
- dbp = dbc->dbp;
- ip = dbc->thread_info;
- txn = dbc->txn;
- mpf = dbp->mpf;
- page_sz = dbp->pgsize;
- *cmpp = 0;
- dbt_buf = match_buf = NULL;
-
- DB_ASSERT(dbp->env, HPAGE_PTYPE(dbt->data) == H_OFFPAGE);
- DB_ASSERT(dbp->env, HPAGE_PTYPE(match->data) == H_OFFPAGE);
-
- /* Extract potentially unaligned length and pgno fields from DBTs */
- memcpy(&dbt_len, HOFFPAGE_TLEN(dbt->data), sizeof(u_int32_t));
- memcpy(&dbt_pgno, HOFFPAGE_PGNO(dbt->data), sizeof(db_pgno_t));
- memcpy(&match_len, HOFFPAGE_TLEN(match->data), sizeof(u_int32_t));
- memcpy(&match_pgno, HOFFPAGE_PGNO(match->data), sizeof(db_pgno_t));
- max_data = (dbt_len < match_len ? dbt_len : match_len);
-
- /*
- * If there is a custom comparator, fully resolve both DBTs.
- * Then call the users comparator.
- */
- if (cmpfunc != NULL) {
- memset(&local_key, 0, sizeof(local_key));
- memset(&local_match, 0, sizeof(local_match));
- dbt_buf = match_buf = NULL;
- dbt_bufsz = match_bufsz = 0;
-
- if ((ret = __db_goff(dbc, &local_key, dbt_len,
- dbt_pgno, &dbt_buf, &dbt_bufsz)) != 0)
- goto err1;
- if ((ret = __db_goff(dbc, &local_match, match_len,
- match_pgno, &match_buf, &match_bufsz)) != 0)
- goto err1;
- /* The key needs to be the first argument for sort order */
- *cmpp = cmpfunc(dbp, &local_key, &local_match);
-
-err1: if (dbt_buf != NULL)
- __os_free(dbp->env, dbt_buf);
- if (match_buf != NULL)
- __os_free(dbp->env, match_buf);
- return (ret);
- }
-
- /* Match the offpage DBTs a page at a time. */
- while (dbt_pgno != PGNO_INVALID && match_pgno != PGNO_INVALID) {
- if ((ret =
- __memp_fget(mpf, &dbt_pgno, ip, txn, 0, &dbt_pagep)) != 0)
- return (ret);
- if ((ret =
- __memp_fget(mpf, &match_pgno,
- ip, txn, 0, &match_pagep)) != 0) {
- (void)__memp_fput(
- mpf, ip, dbt_pagep, DB_PRIORITY_UNCHANGED);
- return (ret);
- }
- cmp_bytes = page_sz < max_data ? page_sz : max_data;
- for (p1 = (u_int8_t *)dbt_pagep + P_OVERHEAD(dbp),
- p2 = (u_int8_t *)match_pagep + P_OVERHEAD(dbp);
- cmp_bytes-- > 0; ++p1, ++p2)
- if (*p1 != *p2) {
- *cmpp = (long)*p1 - (long)*p2;
- break;
- }
-
- dbt_pgno = NEXT_PGNO(dbt_pagep);
- match_pgno = NEXT_PGNO(match_pagep);
- max_data -= page_sz;
- if ((ret = __memp_fput(mpf,
- ip, dbt_pagep, DB_PRIORITY_UNCHANGED)) != 0) {
- (void)__memp_fput(mpf,
- ip, match_pagep, DB_PRIORITY_UNCHANGED);
- return (ret);
- }
- if ((ret = __memp_fput(mpf,
- ip, match_pagep, DB_PRIORITY_UNCHANGED)) != 0)
- return (ret);
- if (*cmpp != 0)
- return (0);
- }
-
- /* If a lexicographic mismatch was found, then the result has already
- * been returned. If the DBTs matched, consider the lengths of the
- * items, and return appropriately.
- */
- if (dbt_len > match_len) /* DBT is longer than the match key. */
- *cmpp = 1;
- else if (match_len > dbt_len) /* DBT is shorter than the match key. */
- *cmpp = -1;
- else
- *cmpp = 0;
-
- return (0);
-
-}
diff --git a/db/db_ovfl_vrfy.c b/db/db_ovfl_vrfy.c
deleted file mode 100644
index fdd0201..0000000
--- a/db/db_ovfl_vrfy.c
+++ /dev/null
@@ -1,409 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995, 1996
- * Keith Bostic. All rights reserved.
- */
-/*
- * Copyright (c) 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Mike Olson.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_am.h"
-#include "dbinc/db_verify.h"
-#include "dbinc/mp.h"
-
-/*
- * __db_vrfy_overflow --
- * Verify overflow page.
- *
- * PUBLIC: int __db_vrfy_overflow __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
- * PUBLIC: u_int32_t));
- */
-int
-__db_vrfy_overflow(dbp, vdp, h, pgno, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- PAGE *h;
- db_pgno_t pgno;
- u_int32_t flags;
-{
- VRFY_PAGEINFO *pip;
- int isbad, ret, t_ret;
-
- isbad = 0;
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
-
- if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
-
- pip->refcount = OV_REF(h);
- if (pip->refcount < 1) {
- EPRINT((dbp->env,
- "Page %lu: overflow page has zero reference count",
- (u_long)pgno));
- isbad = 1;
- }
-
- /* Just store for now. */
- pip->olen = HOFFSET(h);
-
-err: if ((t_ret = __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0)
- ret = t_ret;
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
-}
-
-/*
- * __db_vrfy_ovfl_structure --
- * Walk a list of overflow pages, avoiding cycles and marking
- * pages seen.
- *
- * PUBLIC: int __db_vrfy_ovfl_structure
- * PUBLIC: __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, u_int32_t));
- */
-int
-__db_vrfy_ovfl_structure(dbp, vdp, pgno, tlen, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- u_int32_t tlen;
- u_int32_t flags;
-{
- DB *pgset;
- ENV *env;
- VRFY_PAGEINFO *pip;
- db_pgno_t next, prev;
- int isbad, ret, seen_cnt, t_ret;
- u_int32_t refcount;
-
- env = dbp->env;
- pgset = vdp->pgset;
- DB_ASSERT(env, pgset != NULL);
- isbad = 0;
-
- /* This shouldn't happen, but just to be sure. */
- if (!IS_VALID_PGNO(pgno))
- return (DB_VERIFY_BAD);
-
- /*
- * Check the first prev_pgno; it ought to be PGNO_INVALID,
- * since there's no prev page.
- */
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
-
- /* The refcount is stored on the first overflow page. */
- refcount = pip->refcount;
-
- if (pip->type != P_OVERFLOW) {
- EPRINT((env,
- "Page %lu: overflow page of invalid type %lu",
- (u_long)pgno, (u_long)pip->type));
- ret = DB_VERIFY_BAD;
- goto err; /* Unsafe to continue. */
- }
-
- prev = pip->prev_pgno;
- if (prev != PGNO_INVALID) {
- EPRINT((env,
- "Page %lu: first page in overflow chain has a prev_pgno %lu",
- (u_long)pgno, (u_long)prev));
- isbad = 1;
- }
-
- for (;;) {
- /*
- * We may have seen this page elsewhere, if the overflow entry
- * has been promoted to an internal page; we just want to
- * make sure that each overflow page is seen exactly as many
- * times as its refcount dictates.
- *
- * Note that this code also serves to keep us from looping
- * infinitely if there's a cycle in an overflow chain.
- */
- if ((ret = __db_vrfy_pgset_get(pgset,
- vdp->thread_info, pgno, &seen_cnt)) != 0)
- goto err;
- if ((u_int32_t)seen_cnt > refcount) {
- EPRINT((env,
- "Page %lu: encountered too many times in overflow traversal",
- (u_long)pgno));
- ret = DB_VERIFY_BAD;
- goto err;
- }
- if ((ret =
- __db_vrfy_pgset_inc(pgset, vdp->thread_info, pgno)) != 0)
- goto err;
-
- /*
- * Each overflow page can be referenced multiple times,
- * because it's possible for overflow Btree keys to get
- * promoted to internal pages. We want to make sure that
- * each page is referenced from a Btree leaf (or Hash data
- * page, which we consider a "leaf" here) exactly once; if
- * the parent was a leaf, set a flag to indicate that we've
- * seen this page in a leaf context.
- *
- * If the parent is not a leaf--in which case it's a Btree
- * internal page--we don't need to bother doing any further
- * verification, as we'll do it when we hit the leaf (or
- * complain that we never saw the leaf). Only the first
- * page in an overflow chain should ever have a refcount
- * greater than 1, and the combination of the LEAFSEEN check
- * and the fact that we bail after the first page for
- * non-leaves should ensure this.
- *
- * Note that each "child" of a page, such as an overflow page,
- * is stored and verified in a structure check exactly once,
- * so this code does not need to contend with the fact that
- * overflow chains used as Btree duplicate keys may be
- * referenced multiply from a single Btree leaf page.
- */
- if (LF_ISSET(DB_ST_OVFL_LEAF)) {
- if (F_ISSET(pip, VRFY_OVFL_LEAFSEEN)) {
- EPRINT((env,
- "Page %lu: overflow page linked twice from leaf or data page",
- (u_long)pgno));
- ret = DB_VERIFY_BAD;
- goto err;
- }
- F_SET(pip, VRFY_OVFL_LEAFSEEN);
- }
-
- /*
- * We want to verify each overflow chain only once, and
- * although no chain should be linked more than once from a
- * leaf page, we can't guarantee that it'll be linked that
- * once if it's linked from an internal page and the key
- * is gone.
- *
- * seen_cnt is the number of times we'd encountered this page
- * before calling this function.
- */
- if (seen_cnt == 0) {
- /*
- * Keep a running tab on how much of the item we've
- * seen.
- */
- tlen -= pip->olen;
-
- /* Send the application feedback about our progress. */
- if (!LF_ISSET(DB_SALVAGE))
- __db_vrfy_struct_feedback(dbp, vdp);
- } else
- goto done;
-
- next = pip->next_pgno;
-
- /* Are we there yet? */
- if (next == PGNO_INVALID)
- break;
-
- /*
- * We've already checked this when we saved it, but just
- * to be sure...
- */
- if (!IS_VALID_PGNO(next)) {
- EPRINT((env,
- "Page %lu: bad next_pgno %lu on overflow page",
- (u_long)pgno, (u_long)next));
- ret = DB_VERIFY_BAD;
- goto err;
- }
-
- if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 ||
- (ret = __db_vrfy_getpageinfo(vdp, next, &pip)) != 0)
- return (ret);
- if (pip->prev_pgno != pgno) {
- EPRINT((env,
- "Page %lu: bad prev_pgno %lu on overflow page (should be %lu)",
- (u_long)next, (u_long)pip->prev_pgno,
- (u_long)pgno));
- isbad = 1;
- /*
- * It's safe to continue because we have separate
- * cycle detection.
- */
- }
-
- pgno = next;
- }
-
- if (tlen > 0) {
- isbad = 1;
- EPRINT((env,
- "Page %lu: overflow item incomplete", (u_long)pgno));
- }
-
-done:
-err: if ((t_ret =
- __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
-}
-
-/*
- * __db_safe_goff --
- * Get an overflow item, very carefully, from an untrusted database,
- * in the context of the salvager.
- *
- * PUBLIC: int __db_safe_goff __P((DB *, VRFY_DBINFO *,
- * PUBLIC: db_pgno_t, DBT *, void *, u_int32_t *, u_int32_t));
- */
-int
-__db_safe_goff(dbp, vdp, pgno, dbt, buf, bufsz, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- DBT *dbt;
- void *buf;
- u_int32_t *bufsz;
- u_int32_t flags;
-{
- DB_MPOOLFILE *mpf;
- PAGE *h;
- int ret, t_ret;
- u_int32_t bytesgot, bytes;
- u_int8_t *src, *dest;
-
- mpf = dbp->mpf;
- h = NULL;
- ret = t_ret = 0;
- bytesgot = bytes = 0;
-
- DB_ASSERT(dbp->env, bufsz != NULL);
-
- /*
- * Back up to the start of the overflow chain (if necessary) via the
- * prev pointer of the overflow page. This guarantees we transverse the
- * longest possible chains of overflow pages and won't be called again
- * with a pgno earlier in the chain, stepping on ourselves.
- */
- for (;;) {
- if ((ret = __memp_fget(
- mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
- return (ret);
-
- if (PREV_PGNO(h) == PGNO_INVALID ||
- !IS_VALID_PGNO(PREV_PGNO(h)))
- break;
-
- pgno = PREV_PGNO(h);
-
- if ((ret = __memp_fput(mpf,
- vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0)
- return (ret);
- }
- if ((ret = __memp_fput(
- mpf, vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0)
- return (ret);
-
- h = NULL;
-
- while ((pgno != PGNO_INVALID) && (IS_VALID_PGNO(pgno))) {
- /*
- * Mark that we're looking at this page; if we've seen it
- * already, quit.
- */
- if ((ret = __db_salvage_markdone(vdp, pgno)) != 0)
- break;
-
- if ((ret = __memp_fget(mpf, &pgno,
- vdp->thread_info, NULL, 0, &h)) != 0)
- break;
-
- /*
- * Make sure it's really an overflow page, unless we're
- * being aggressive, in which case we pretend it is.
- */
- if (!LF_ISSET(DB_AGGRESSIVE) && TYPE(h) != P_OVERFLOW) {
- ret = DB_VERIFY_BAD;
- break;
- }
-
- src = (u_int8_t *)h + P_OVERHEAD(dbp);
- bytes = OV_LEN(h);
-
- if (bytes + P_OVERHEAD(dbp) > dbp->pgsize)
- bytes = dbp->pgsize - P_OVERHEAD(dbp);
-
- /*
- * Realloc if buf is too small
- */
- if (bytesgot + bytes > *bufsz) {
- if ((ret =
- __os_realloc(dbp->env, bytesgot + bytes, buf)) != 0)
- break;
- *bufsz = bytesgot + bytes;
- }
-
- dest = *(u_int8_t **)buf + bytesgot;
- bytesgot += bytes;
-
- memcpy(dest, src, bytes);
-
- pgno = NEXT_PGNO(h);
-
- if ((ret = __memp_fput(mpf,
- vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0)
- break;
- h = NULL;
- }
-
- /*
- * If we're being aggressive, salvage a partial datum if there
- * was an error somewhere along the way.
- */
- if (ret == 0 || LF_ISSET(DB_AGGRESSIVE)) {
- dbt->size = bytesgot;
- dbt->data = *(void **)buf;
- }
-
- /* If we broke out on error, don't leave pages pinned. */
- if (h != NULL && (t_ret = __memp_fput(mpf,
- vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
diff --git a/db/db_pr.c b/db/db_pr.c
deleted file mode 100644
index 69fb7c7..0000000
--- a/db/db_pr.c
+++ /dev/null
@@ -1,1659 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/btree.h"
-#include "dbinc/hash.h"
-#include "dbinc/mp.h"
-#include "dbinc/partition.h"
-#include "dbinc/qam.h"
-#include "dbinc/db_verify.h"
-
-/*
- * __db_loadme --
- * A nice place to put a breakpoint.
- *
- * PUBLIC: void __db_loadme __P((void));
- */
-void
-__db_loadme()
-{
- pid_t pid;
-
- __os_id(NULL, &pid, NULL);
-}
-
-#ifdef HAVE_STATISTICS
-static int __db_bmeta __P((DB *, BTMETA *, u_int32_t));
-static int __db_hmeta __P((DB *, HMETA *, u_int32_t));
-static void __db_meta __P((DB *, DBMETA *, FN const *, u_int32_t));
-static const char *__db_pagetype_to_string __P((u_int32_t));
-static void __db_prdb __P((DB *, u_int32_t));
-static void __db_proff __P((ENV *, DB_MSGBUF *, void *));
-static int __db_prtree __P((DB *, DB_TXN *, u_int32_t));
-static int __db_qmeta __P((DB *, QMETA *, u_int32_t));
-
-/*
- * __db_dumptree --
- * Dump the tree to a file.
- *
- * PUBLIC: int __db_dumptree __P((DB *, DB_TXN *, char *, char *));
- */
-int
-__db_dumptree(dbp, txn, op, name)
- DB *dbp;
- DB_TXN *txn;
- char *op, *name;
-{
- ENV *env;
- FILE *fp, *orig_fp;
- u_int32_t flags;
- int ret;
-
- env = dbp->env;
-
- for (flags = 0; *op != '\0'; ++op)
- switch (*op) {
- case 'a':
- LF_SET(DB_PR_PAGE);
- break;
- case 'h':
- break;
- case 'r':
- LF_SET(DB_PR_RECOVERYTEST);
- break;
- default:
- return (EINVAL);
- }
-
- if (name != NULL) {
- if ((fp = fopen(name, "w")) == NULL)
- return (__os_get_errno());
-
- orig_fp = dbp->dbenv->db_msgfile;
- dbp->dbenv->db_msgfile = fp;
- } else
- fp = orig_fp = NULL;
-
- __db_prdb(dbp, flags);
-
- __db_msg(env, "%s", DB_GLOBAL(db_line));
-
- ret = __db_prtree(dbp, txn, flags);
-
- if (fp != NULL) {
- (void)fclose(fp);
- env->dbenv->db_msgfile = orig_fp;
- }
-
- return (ret);
-}
-
-static const FN __db_flags_fn[] = {
- { DB_AM_CHKSUM, "checksumming" },
- { DB_AM_COMPENSATE, "created by compensating transaction" },
- { DB_AM_CREATED, "database created" },
- { DB_AM_CREATED_MSTR, "encompassing file created" },
- { DB_AM_DBM_ERROR, "dbm/ndbm error" },
- { DB_AM_DELIMITER, "variable length" },
- { DB_AM_DISCARD, "discard cached pages" },
- { DB_AM_DUP, "duplicates" },
- { DB_AM_DUPSORT, "sorted duplicates" },
- { DB_AM_ENCRYPT, "encrypted" },
- { DB_AM_FIXEDLEN, "fixed-length records" },
- { DB_AM_INMEM, "in-memory" },
- { DB_AM_IN_RENAME, "file is being renamed" },
- { DB_AM_NOT_DURABLE, "changes not logged" },
- { DB_AM_OPEN_CALLED, "open called" },
- { DB_AM_PAD, "pad value" },
- { DB_AM_PGDEF, "default page size" },
- { DB_AM_RDONLY, "read-only" },
- { DB_AM_READ_UNCOMMITTED, "read-uncommitted" },
- { DB_AM_RECNUM, "Btree record numbers" },
- { DB_AM_RECOVER, "opened for recovery" },
- { DB_AM_RENUMBER, "renumber" },
- { DB_AM_REVSPLITOFF, "no reverse splits" },
- { DB_AM_SECONDARY, "secondary" },
- { DB_AM_SNAPSHOT, "load on open" },
- { DB_AM_SUBDB, "subdatabases" },
- { DB_AM_SWAP, "needswap" },
- { DB_AM_TXN, "transactional" },
- { DB_AM_VERIFYING, "verifier" },
- { 0, NULL }
-};
-
-/*
- * __db_get_flags_fn --
- * Return the __db_flags_fn array.
- *
- * PUBLIC: const FN * __db_get_flags_fn __P((void));
- */
-const FN *
-__db_get_flags_fn()
-{
- return (__db_flags_fn);
-}
-
-/*
- * __db_prdb --
- * Print out the DB structure information.
- */
-static void
-__db_prdb(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- BTREE *bt;
- DB_MSGBUF mb;
- ENV *env;
- HASH *h;
- QUEUE *q;
-
- env = dbp->env;
-
- DB_MSGBUF_INIT(&mb);
- __db_msg(env, "In-memory DB structure:");
- __db_msgadd(env, &mb, "%s: %#lx",
- __db_dbtype_to_string(dbp->type), (u_long)dbp->flags);
- __db_prflags(env, &mb, dbp->flags, __db_flags_fn, " (", ")");
- DB_MSGBUF_FLUSH(env, &mb);
-
- switch (dbp->type) {
- case DB_BTREE:
- case DB_RECNO:
- bt = dbp->bt_internal;
- __db_msg(env, "bt_meta: %lu bt_root: %lu",
- (u_long)bt->bt_meta, (u_long)bt->bt_root);
- __db_msg(env, "bt_minkey: %lu", (u_long)bt->bt_minkey);
- if (!LF_ISSET(DB_PR_RECOVERYTEST))
- __db_msg(env, "bt_compare: %#lx bt_prefix: %#lx",
- P_TO_ULONG(bt->bt_compare),
- P_TO_ULONG(bt->bt_prefix));
-#ifdef HAVE_COMPRESSION
- if (!LF_ISSET(DB_PR_RECOVERYTEST))
- __db_msg(env, "bt_compress: %#lx bt_decompress: %#lx",
- P_TO_ULONG(bt->bt_compress),
- P_TO_ULONG(bt->bt_decompress));
-#endif
- __db_msg(env, "bt_lpgno: %lu", (u_long)bt->bt_lpgno);
- if (dbp->type == DB_RECNO) {
- __db_msg(env,
- "re_pad: %#lx re_delim: %#lx re_len: %lu re_source: %s",
- (u_long)bt->re_pad, (u_long)bt->re_delim,
- (u_long)bt->re_len,
- bt->re_source == NULL ? "" : bt->re_source);
- __db_msg(env,
- "re_modified: %d re_eof: %d re_last: %lu",
- bt->re_modified, bt->re_eof, (u_long)bt->re_last);
- }
- break;
- case DB_HASH:
- h = dbp->h_internal;
- __db_msg(env, "meta_pgno: %lu", (u_long)h->meta_pgno);
- __db_msg(env, "h_ffactor: %lu", (u_long)h->h_ffactor);
- __db_msg(env, "h_nelem: %lu", (u_long)h->h_nelem);
- if (!LF_ISSET(DB_PR_RECOVERYTEST))
- __db_msg(env, "h_hash: %#lx", P_TO_ULONG(h->h_hash));
- break;
- case DB_QUEUE:
- q = dbp->q_internal;
- __db_msg(env, "q_meta: %lu", (u_long)q->q_meta);
- __db_msg(env, "q_root: %lu", (u_long)q->q_root);
- __db_msg(env, "re_pad: %#lx re_len: %lu",
- (u_long)q->re_pad, (u_long)q->re_len);
- __db_msg(env, "rec_page: %lu", (u_long)q->rec_page);
- __db_msg(env, "page_ext: %lu", (u_long)q->page_ext);
- break;
- case DB_UNKNOWN:
- default:
- break;
- }
-}
-
-/*
- * __db_prtree --
- * Print out the entire tree.
- */
-static int
-__db_prtree(dbp, txn, flags)
- DB *dbp;
- DB_TXN *txn;
- u_int32_t flags;
-{
- DB_MPOOLFILE *mpf;
- PAGE *h;
- db_pgno_t i, last;
- int ret;
-
- mpf = dbp->mpf;
-
- if (dbp->type == DB_QUEUE)
- return (__db_prqueue(dbp, flags));
-
- /*
- * Find out the page number of the last page in the database, then
- * dump each page.
- */
- if ((ret = __memp_get_last_pgno(mpf, &last)) != 0)
- return (ret);
- for (i = 0; i <= last; ++i) {
- if ((ret = __memp_fget(mpf, &i, NULL, txn, 0, &h)) != 0)
- return (ret);
- (void)__db_prpage(dbp, h, flags);
- if ((ret = __memp_fput(mpf, NULL, h, dbp->priority)) != 0)
- return (ret);
- }
-
- return (0);
-}
-
-/*
- * __db_meta --
- * Print out common metadata information.
- */
-static void
-__db_meta(dbp, dbmeta, fn, flags)
- DB *dbp;
- DBMETA *dbmeta;
- FN const *fn;
- u_int32_t flags;
-{
- DB_MPOOLFILE *mpf;
- DB_MSGBUF mb;
- ENV *env;
- PAGE *h;
- db_pgno_t pgno;
- u_int8_t *p;
- int cnt, ret;
- const char *sep;
-
- env = dbp->env;
- mpf = dbp->mpf;
- DB_MSGBUF_INIT(&mb);
-
- __db_msg(env, "\tmagic: %#lx", (u_long)dbmeta->magic);
- __db_msg(env, "\tversion: %lu", (u_long)dbmeta->version);
- __db_msg(env, "\tpagesize: %lu", (u_long)dbmeta->pagesize);
- __db_msg(env, "\ttype: %lu", (u_long)dbmeta->type);
- __db_msg(env, "\tmetaflags %#lx", (u_long)dbmeta->metaflags);
- __db_msg(env, "\tkeys: %lu\trecords: %lu",
- (u_long)dbmeta->key_count, (u_long)dbmeta->record_count);
- if (dbmeta->nparts)
- __db_msg(env, "\tnparts: %lu", (u_long)dbmeta->nparts);
-
- /*
- * If we're doing recovery testing, don't display the free list,
- * it may have changed and that makes the dump diff not work.
- */
- if (!LF_ISSET(DB_PR_RECOVERYTEST)) {
- __db_msgadd(
- env, &mb, "\tfree list: %lu", (u_long)dbmeta->free);
- for (pgno = dbmeta->free,
- cnt = 0, sep = ", "; pgno != PGNO_INVALID;) {
- if ((ret = __memp_fget(mpf,
- &pgno, NULL, NULL, 0, &h)) != 0) {
- DB_MSGBUF_FLUSH(env, &mb);
- __db_msg(env,
- "Unable to retrieve free-list page: %lu: %s",
- (u_long)pgno, db_strerror(ret));
- break;
- }
- pgno = h->next_pgno;
- (void)__memp_fput(mpf, NULL, h, dbp->priority);
- __db_msgadd(env, &mb, "%s%lu", sep, (u_long)pgno);
- if (++cnt % 10 == 0) {
- DB_MSGBUF_FLUSH(env, &mb);
- cnt = 0;
- sep = "\t";
- } else
- sep = ", ";
- }
- DB_MSGBUF_FLUSH(env, &mb);
- __db_msg(env, "\tlast_pgno: %lu", (u_long)dbmeta->last_pgno);
- }
-
- if (fn != NULL) {
- DB_MSGBUF_FLUSH(env, &mb);
- __db_msgadd(env, &mb, "\tflags: %#lx", (u_long)dbmeta->flags);
- __db_prflags(env, &mb, dbmeta->flags, fn, " (", ")");
- }
-
- DB_MSGBUF_FLUSH(env, &mb);
- __db_msgadd(env, &mb, "\tuid: ");
- for (p = (u_int8_t *)dbmeta->uid,
- cnt = 0; cnt < DB_FILE_ID_LEN; ++cnt) {
- __db_msgadd(env, &mb, "%x", *p++);
- if (cnt < DB_FILE_ID_LEN - 1)
- __db_msgadd(env, &mb, " ");
- }
- DB_MSGBUF_FLUSH(env, &mb);
-}
-
-/*
- * __db_bmeta --
- * Print out the btree meta-data page.
- */
-static int
-__db_bmeta(dbp, h, flags)
- DB *dbp;
- BTMETA *h;
- u_int32_t flags;
-{
- static const FN fn[] = {
- { BTM_DUP, "duplicates" },
- { BTM_RECNO, "recno" },
- { BTM_RECNUM, "btree:recnum" },
- { BTM_FIXEDLEN, "recno:fixed-length" },
- { BTM_RENUMBER, "recno:renumber" },
- { BTM_SUBDB, "multiple-databases" },
- { BTM_DUPSORT, "sorted duplicates" },
- { BTM_COMPRESS, "compressed" },
- { 0, NULL }
- };
- ENV *env;
-
- env = dbp->env;
-
- __db_meta(dbp, (DBMETA *)h, fn, flags);
-
- __db_msg(env, "\tminkey: %lu", (u_long)h->minkey);
- if (dbp->type == DB_RECNO)
- __db_msg(env, "\tre_len: %#lx re_pad: %#lx",
- (u_long)h->re_len, (u_long)h->re_pad);
- __db_msg(env, "\troot: %lu", (u_long)h->root);
-
- return (0);
-}
-
-/*
- * __db_hmeta --
- * Print out the hash meta-data page.
- */
-static int
-__db_hmeta(dbp, h, flags)
- DB *dbp;
- HMETA *h;
- u_int32_t flags;
-{
- static const FN fn[] = {
- { DB_HASH_DUP, "duplicates" },
- { DB_HASH_SUBDB, "multiple-databases" },
- { DB_HASH_DUPSORT, "sorted duplicates" },
- { 0, NULL }
- };
- ENV *env;
- DB_MSGBUF mb;
- int i;
-
- env = dbp->env;
- DB_MSGBUF_INIT(&mb);
-
- __db_meta(dbp, (DBMETA *)h, fn, flags);
-
- __db_msg(env, "\tmax_bucket: %lu", (u_long)h->max_bucket);
- __db_msg(env, "\thigh_mask: %#lx", (u_long)h->high_mask);
- __db_msg(env, "\tlow_mask: %#lx", (u_long)h->low_mask);
- __db_msg(env, "\tffactor: %lu", (u_long)h->ffactor);
- __db_msg(env, "\tnelem: %lu", (u_long)h->nelem);
- __db_msg(env, "\th_charkey: %#lx", (u_long)h->h_charkey);
- __db_msgadd(env, &mb, "\tspare points: ");
- for (i = 0; i < NCACHED; i++)
- __db_msgadd(env, &mb, "%lu ", (u_long)h->spares[i]);
- DB_MSGBUF_FLUSH(env, &mb);
-
- return (0);
-}
-
-/*
- * __db_qmeta --
- * Print out the queue meta-data page.
- */
-static int
-__db_qmeta(dbp, h, flags)
- DB *dbp;
- QMETA *h;
- u_int32_t flags;
-{
- ENV *env;
-
- env = dbp->env;
-
- __db_meta(dbp, (DBMETA *)h, NULL, flags);
-
- __db_msg(env, "\tfirst_recno: %lu", (u_long)h->first_recno);
- __db_msg(env, "\tcur_recno: %lu", (u_long)h->cur_recno);
- __db_msg(env, "\tre_len: %#lx re_pad: %lu",
- (u_long)h->re_len, (u_long)h->re_pad);
- __db_msg(env, "\trec_page: %lu", (u_long)h->rec_page);
- __db_msg(env, "\tpage_ext: %lu", (u_long)h->page_ext);
-
- return (0);
-}
-
-/*
- * __db_prnpage
- * -- Print out a specific page.
- *
- * PUBLIC: int __db_prnpage __P((DB *, DB_TXN *, db_pgno_t));
- */
-int
-__db_prnpage(dbp, txn, pgno)
- DB *dbp;
- DB_TXN *txn;
- db_pgno_t pgno;
-{
- DB_MPOOLFILE *mpf;
- PAGE *h;
- int ret, t_ret;
-
- mpf = dbp->mpf;
-
- if ((ret = __memp_fget(mpf, &pgno, NULL, txn, 0, &h)) != 0)
- return (ret);
-
- ret = __db_prpage(dbp, h, DB_PR_PAGE);
-
- if ((t_ret = __memp_fput(mpf, NULL, h, dbp->priority)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __db_prpage
- * -- Print out a page.
- *
- * PUBLIC: int __db_prpage __P((DB *, PAGE *, u_int32_t));
- */
-int
-__db_prpage(dbp, h, flags)
- DB *dbp;
- PAGE *h;
- u_int32_t flags;
-{
- BINTERNAL *bi;
- BKEYDATA *bk;
- DB_MSGBUF mb;
- ENV *env;
- HOFFPAGE a_hkd;
- QAMDATA *qp, *qep;
- RINTERNAL *ri;
- db_indx_t dlen, len, i, *inp;
- db_pgno_t pgno;
- db_recno_t recno;
- u_int32_t pagesize, qlen;
- u_int8_t *ep, *hk, *p;
- int deleted, ret;
- const char *s;
- void *sp;
-
- env = dbp->env;
- DB_MSGBUF_INIT(&mb);
-
- /*
- * If we're doing recovery testing and this page is P_INVALID,
- * assume it's a page that's on the free list, and don't display it.
- */
- if (LF_ISSET(DB_PR_RECOVERYTEST) && TYPE(h) == P_INVALID)
- return (0);
-
- if ((s = __db_pagetype_to_string(TYPE(h))) == NULL) {
- __db_msg(env, "ILLEGAL PAGE TYPE: page: %lu type: %lu",
- (u_long)h->pgno, (u_long)TYPE(h));
- return (EINVAL);
- }
-
- /*
- * !!!
- * Find out the page size. We don't want to do it the "right" way,
- * by reading the value from the meta-data page, that's going to be
- * slow. Reach down into the mpool region.
- */
- pagesize = (u_int32_t)dbp->mpf->mfp->stat.st_pagesize;
-
- /* Page number, page type. */
- __db_msgadd(env, &mb, "page %lu: %s:", (u_long)h->pgno, s);
-
- /*
- * LSNs on a metadata page will be different from the original after an
- * abort, in some cases. Don't display them if we're testing recovery.
- */
- if (!LF_ISSET(DB_PR_RECOVERYTEST) ||
- (TYPE(h) != P_BTREEMETA && TYPE(h) != P_HASHMETA &&
- TYPE(h) != P_QAMMETA && TYPE(h) != P_QAMDATA))
- __db_msgadd(env, &mb, " LSN [%lu][%lu]:",
- (u_long)LSN(h).file, (u_long)LSN(h).offset);
-
- /*
- * Page level (only applicable for Btree/Recno, but we always display
- * it, for no particular reason.
- */
- __db_msgadd(env, &mb, " level %lu", (u_long)h->level);
-
- /* Record count. */
- if (TYPE(h) == P_IBTREE ||
- TYPE(h) == P_IRECNO || (TYPE(h) == P_LRECNO &&
- h->pgno == ((BTREE *)dbp->bt_internal)->bt_root))
- __db_msgadd(env, &mb, " records: %lu", (u_long)RE_NREC(h));
- DB_MSGBUF_FLUSH(env, &mb);
-
- switch (TYPE(h)) {
- case P_BTREEMETA:
- return (__db_bmeta(dbp, (BTMETA *)h, flags));
- case P_HASHMETA:
- return (__db_hmeta(dbp, (HMETA *)h, flags));
- case P_QAMMETA:
- return (__db_qmeta(dbp, (QMETA *)h, flags));
- case P_QAMDATA: /* Should be meta->start. */
- if (!LF_ISSET(DB_PR_PAGE))
- return (0);
-
- qlen = ((QUEUE *)dbp->q_internal)->re_len;
- recno = (h->pgno - 1) * QAM_RECNO_PER_PAGE(dbp) + 1;
- i = 0;
- qep = (QAMDATA *)((u_int8_t *)h + pagesize - qlen);
- for (qp = QAM_GET_RECORD(dbp, h, i); qp < qep;
- recno++, i++, qp = QAM_GET_RECORD(dbp, h, i)) {
- if (!F_ISSET(qp, QAM_SET))
- continue;
-
- __db_msgadd(env, &mb, "%s",
- F_ISSET(qp, QAM_VALID) ? "\t" : " D");
- __db_msgadd(env, &mb, "[%03lu] %4lu ", (u_long)recno,
- (u_long)((u_int8_t *)qp - (u_int8_t *)h));
- __db_prbytes(env, &mb, qp->data, qlen);
- }
- return (0);
- default:
- break;
- }
-
- s = "\t";
- if (TYPE(h) != P_IBTREE && TYPE(h) != P_IRECNO) {
- __db_msgadd(env, &mb, "%sprev: %4lu next: %4lu",
- s, (u_long)PREV_PGNO(h), (u_long)NEXT_PGNO(h));
- s = " ";
- }
- if (TYPE(h) == P_OVERFLOW) {
- __db_msgadd(env, &mb,
- "%sref cnt: %4lu ", s, (u_long)OV_REF(h));
- __db_prbytes(env,
- &mb, (u_int8_t *)h + P_OVERHEAD(dbp), OV_LEN(h));
- return (0);
- }
- __db_msgadd(env, &mb, "%sentries: %4lu", s, (u_long)NUM_ENT(h));
- __db_msgadd(env, &mb, " offset: %4lu", (u_long)HOFFSET(h));
- DB_MSGBUF_FLUSH(env, &mb);
-
- if (TYPE(h) == P_INVALID || !LF_ISSET(DB_PR_PAGE))
- return (0);
-
- ret = 0;
- inp = P_INP(dbp, h);
- for (i = 0; i < NUM_ENT(h); i++) {
- if ((uintptr_t)(P_ENTRY(dbp, h, i) - (u_int8_t *)h) <
- (uintptr_t)(P_OVERHEAD(dbp)) ||
- (size_t)(P_ENTRY(dbp, h, i) - (u_int8_t *)h) >= pagesize) {
- __db_msg(env,
- "ILLEGAL PAGE OFFSET: indx: %lu of %lu",
- (u_long)i, (u_long)inp[i]);
- ret = EINVAL;
- continue;
- }
- deleted = 0;
- switch (TYPE(h)) {
- case P_HASH_UNSORTED:
- case P_HASH:
- case P_IBTREE:
- case P_IRECNO:
- sp = P_ENTRY(dbp, h, i);
- break;
- case P_LBTREE:
- sp = P_ENTRY(dbp, h, i);
- deleted = i % 2 == 0 &&
- B_DISSET(GET_BKEYDATA(dbp, h, i + O_INDX)->type);
- break;
- case P_LDUP:
- case P_LRECNO:
- sp = P_ENTRY(dbp, h, i);
- deleted = B_DISSET(GET_BKEYDATA(dbp, h, i)->type);
- break;
- default:
- goto type_err;
- }
- __db_msgadd(env, &mb, "%s", deleted ? " D" : "\t");
- __db_msgadd(
- env, &mb, "[%03lu] %4lu ", (u_long)i, (u_long)inp[i]);
- switch (TYPE(h)) {
- case P_HASH_UNSORTED:
- case P_HASH:
- hk = sp;
- switch (HPAGE_PTYPE(hk)) {
- case H_OFFDUP:
- memcpy(&pgno,
- HOFFDUP_PGNO(hk), sizeof(db_pgno_t));
- __db_msgadd(env, &mb,
- "%4lu [offpage dups]", (u_long)pgno);
- DB_MSGBUF_FLUSH(env, &mb);
- break;
- case H_DUPLICATE:
- /*
- * If this is the first item on a page, then
- * we cannot figure out how long it is, so
- * we only print the first one in the duplicate
- * set.
- */
- if (i != 0)
- len = LEN_HKEYDATA(dbp, h, 0, i);
- else
- len = 1;
-
- __db_msgadd(env, &mb, "Duplicates:");
- DB_MSGBUF_FLUSH(env, &mb);
- for (p = HKEYDATA_DATA(hk),
- ep = p + len; p < ep;) {
- memcpy(&dlen, p, sizeof(db_indx_t));
- p += sizeof(db_indx_t);
- __db_msgadd(env, &mb, "\t\t");
- __db_prbytes(env, &mb, p, dlen);
- p += sizeof(db_indx_t) + dlen;
- }
- break;
- case H_KEYDATA:
- __db_prbytes(env, &mb, HKEYDATA_DATA(hk),
- LEN_HKEYDATA(dbp, h, i == 0 ?
- pagesize : 0, i));
- break;
- case H_OFFPAGE:
- memcpy(&a_hkd, hk, HOFFPAGE_SIZE);
- __db_msgadd(env, &mb,
- "overflow: total len: %4lu page: %4lu",
- (u_long)a_hkd.tlen, (u_long)a_hkd.pgno);
- DB_MSGBUF_FLUSH(env, &mb);
- break;
- default:
- DB_MSGBUF_FLUSH(env, &mb);
- __db_msg(env, "ILLEGAL HASH PAGE TYPE: %lu",
- (u_long)HPAGE_PTYPE(hk));
- ret = EINVAL;
- break;
- }
- break;
- case P_IBTREE:
- bi = sp;
-
- if (F_ISSET(dbp, DB_AM_RECNUM))
- __db_msgadd(env, &mb,
- "count: %4lu ", (u_long)bi->nrecs);
- __db_msgadd(env, &mb,
- "pgno: %4lu type: %lu ",
- (u_long)bi->pgno, (u_long)bi->type);
- switch (B_TYPE(bi->type)) {
- case B_KEYDATA:
- __db_prbytes(env, &mb, bi->data, bi->len);
- break;
- case B_DUPLICATE:
- case B_OVERFLOW:
- __db_proff(env, &mb, bi->data);
- break;
- default:
- DB_MSGBUF_FLUSH(env, &mb);
- __db_msg(env, "ILLEGAL BINTERNAL TYPE: %lu",
- (u_long)B_TYPE(bi->type));
- ret = EINVAL;
- break;
- }
- break;
- case P_IRECNO:
- ri = sp;
- __db_msgadd(env, &mb, "entries %4lu pgno %4lu",
- (u_long)ri->nrecs, (u_long)ri->pgno);
- DB_MSGBUF_FLUSH(env, &mb);
- break;
- case P_LBTREE:
- case P_LDUP:
- case P_LRECNO:
- bk = sp;
- switch (B_TYPE(bk->type)) {
- case B_KEYDATA:
- __db_prbytes(env, &mb, bk->data, bk->len);
- break;
- case B_DUPLICATE:
- case B_OVERFLOW:
- __db_proff(env, &mb, bk);
- break;
- default:
- DB_MSGBUF_FLUSH(env, &mb);
- __db_msg(env,
- "ILLEGAL DUPLICATE/LBTREE/LRECNO TYPE: %lu",
- (u_long)B_TYPE(bk->type));
- ret = EINVAL;
- break;
- }
- break;
- default:
-type_err: DB_MSGBUF_FLUSH(env, &mb);
- __db_msg(env,
- "ILLEGAL PAGE TYPE: %lu", (u_long)TYPE(h));
- ret = EINVAL;
- continue;
- }
- }
- return (ret);
-}
-
-/*
- * __db_prbytes --
- * Print out a data element.
- *
- * PUBLIC: void __db_prbytes __P((ENV *, DB_MSGBUF *, u_int8_t *, u_int32_t));
- */
-void
-__db_prbytes(env, mbp, bytes, len)
- ENV *env;
- DB_MSGBUF *mbp;
- u_int8_t *bytes;
- u_int32_t len;
-{
- u_int8_t *p;
- u_int32_t i;
- int msg_truncated;
-
- __db_msgadd(env, mbp, "len: %3lu", (u_long)len);
- if (len != 0) {
- __db_msgadd(env, mbp, " data: ");
-
- /*
- * Print the first 20 bytes of the data. If that chunk is
- * all printable characters, print it as text, else print it
- * in hex. We have this heuristic because we're displaying
- * things like lock objects that could be either text or data.
- */
- if (len > 20) {
- len = 20;
- msg_truncated = 1;
- } else
- msg_truncated = 0;
- for (p = bytes, i = len; i > 0; --i, ++p)
- if (!isprint((int)*p) && *p != '\t' && *p != '\n')
- break;
- if (i == 0)
- for (p = bytes, i = len; i > 0; --i, ++p)
- __db_msgadd(env, mbp, "%c", *p);
- else
- for (p = bytes, i = len; i > 0; --i, ++p)
- __db_msgadd(env, mbp, "%#.2x", (u_int)*p);
- if (msg_truncated)
- __db_msgadd(env, mbp, "...");
- }
- DB_MSGBUF_FLUSH(env, mbp);
-}
-
-/*
- * __db_proff --
- * Print out an off-page element.
- */
-static void
-__db_proff(env, mbp, vp)
- ENV *env;
- DB_MSGBUF *mbp;
- void *vp;
-{
- BOVERFLOW *bo;
-
- bo = vp;
- switch (B_TYPE(bo->type)) {
- case B_OVERFLOW:
- __db_msgadd(env, mbp, "overflow: total len: %4lu page: %4lu",
- (u_long)bo->tlen, (u_long)bo->pgno);
- break;
- case B_DUPLICATE:
- __db_msgadd(
- env, mbp, "duplicate: page: %4lu", (u_long)bo->pgno);
- break;
- default:
- /* NOTREACHED */
- break;
- }
- DB_MSGBUF_FLUSH(env, mbp);
-}
-
-/*
- * __db_prflags --
- * Print out flags values.
- *
- * PUBLIC: void __db_prflags __P((ENV *, DB_MSGBUF *,
- * PUBLIC: u_int32_t, const FN *, const char *, const char *));
- */
-void
-__db_prflags(env, mbp, flags, fn, prefix, suffix)
- ENV *env;
- DB_MSGBUF *mbp;
- u_int32_t flags;
- FN const *fn;
- const char *prefix, *suffix;
-{
- DB_MSGBUF mb;
- const FN *fnp;
- int found, standalone;
- const char *sep;
-
- if (fn == NULL)
- return;
-
- /*
- * If it's a standalone message, output the suffix (which will be the
- * label), regardless of whether we found anything or not, and flush
- * the line.
- */
- if (mbp == NULL) {
- standalone = 1;
- mbp = &mb;
- DB_MSGBUF_INIT(mbp);
- } else
- standalone = 0;
-
- sep = prefix == NULL ? "" : prefix;
- for (found = 0, fnp = fn; fnp->mask != 0; ++fnp)
- if (LF_ISSET(fnp->mask)) {
- __db_msgadd(env, mbp, "%s%s", sep, fnp->name);
- sep = ", ";
- found = 1;
- }
-
- if ((standalone || found) && suffix != NULL)
- __db_msgadd(env, mbp, "%s", suffix);
- if (standalone)
- DB_MSGBUF_FLUSH(env, mbp);
-}
-
-/*
- * __db_lockmode_to_string --
- * Return the name of the lock mode.
- *
- * PUBLIC: const char * __db_lockmode_to_string __P((db_lockmode_t));
- */
-const char *
-__db_lockmode_to_string(mode)
- db_lockmode_t mode;
-{
- switch (mode) {
- case DB_LOCK_NG:
- return ("Not granted");
- case DB_LOCK_READ:
- return ("Shared/read");
- case DB_LOCK_WRITE:
- return ("Exclusive/write");
- case DB_LOCK_WAIT:
- return ("Wait for event");
- case DB_LOCK_IWRITE:
- return ("Intent exclusive/write");
- case DB_LOCK_IREAD:
- return ("Intent shared/read");
- case DB_LOCK_IWR:
- return ("Intent to read/write");
- case DB_LOCK_READ_UNCOMMITTED:
- return ("Read uncommitted");
- case DB_LOCK_WWRITE:
- return ("Was written");
- default:
- break;
- }
- return ("UNKNOWN LOCK MODE");
-}
-
-/*
- * __db_pagetype_to_string --
- * Return the name of the specified page type.
- */
-static const char *
-__db_pagetype_to_string(type)
- u_int32_t type;
-{
- char *s;
-
- s = NULL;
- switch (type) {
- case P_BTREEMETA:
- s = "btree metadata";
- break;
- case P_LDUP:
- s = "duplicate";
- break;
- case P_HASH_UNSORTED:
- s = "hash unsorted";
- break;
- case P_HASH:
- s = "hash";
- break;
- case P_HASHMETA:
- s = "hash metadata";
- break;
- case P_IBTREE:
- s = "btree internal";
- break;
- case P_INVALID:
- s = "invalid";
- break;
- case P_IRECNO:
- s = "recno internal";
- break;
- case P_LBTREE:
- s = "btree leaf";
- break;
- case P_LRECNO:
- s = "recno leaf";
- break;
- case P_OVERFLOW:
- s = "overflow";
- break;
- case P_QAMMETA:
- s = "queue metadata";
- break;
- case P_QAMDATA:
- s = "queue";
- break;
- default:
- /* Just return a NULL. */
- break;
- }
- return (s);
-}
-
-#else /* !HAVE_STATISTICS */
-
-/*
- * __db_dumptree --
- * Dump the tree to a file.
- *
- * PUBLIC: int __db_dumptree __P((DB *, DB_TXN *, char *, char *));
- */
-int
-__db_dumptree(dbp, txn, op, name)
- DB *dbp;
- DB_TXN *txn;
- char *op, *name;
-{
- COMPQUIET(txn, NULL);
- COMPQUIET(op, NULL);
- COMPQUIET(name, NULL);
-
- return (__db_stat_not_built(dbp->env));
-}
-
-/*
- * __db_get_flags_fn --
- * Return the __db_flags_fn array.
- *
- * PUBLIC: const FN * __db_get_flags_fn __P((void));
- */
-const FN *
-__db_get_flags_fn()
-{
- /*
- * !!!
- * The Tcl API uses this interface, stub it off.
- */
- return (NULL);
-}
-#endif
-
-/*
- * __db_dump_pp --
- * DB->dump pre/post processing.
- *
- * PUBLIC: int __db_dump_pp __P((DB *, const char *,
- * PUBLIC: int (*)(void *, const void *), void *, int, int));
- */
-int
-__db_dump_pp(dbp, subname, callback, handle, pflag, keyflag)
- DB *dbp;
- const char *subname;
- int (*callback) __P((void *, const void *));
- void *handle;
- int pflag, keyflag;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = dbp->env;
-
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->dump");
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 1)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- ret = __db_dump(dbp, subname, callback, handle, pflag, keyflag);
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
-err: ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_dump --
- * DB->dump.
- *
- * PUBLIC: int __db_dump __P((DB *, const char *,
- * PUBLIC: int (*)(void *, const void *), void *, int, int));
- */
-int
-__db_dump(dbp, subname, callback, handle, pflag, keyflag)
- DB *dbp;
- const char *subname;
- int (*callback) __P((void *, const void *));
- void *handle;
- int pflag, keyflag;
-{
- DBC *dbcp;
- DBT key, data;
- DBT keyret, dataret;
- ENV *env;
- db_recno_t recno;
- int is_recno, ret, t_ret;
- void *pointer;
-
- env = dbp->env;
-
- if ((ret = __db_prheader(
- dbp, subname, pflag, keyflag, handle, callback, NULL, 0)) != 0)
- return (ret);
-
- /*
- * Get a cursor and step through the database, printing out each
- * key/data pair.
- */
- if ((ret = __db_cursor(dbp, NULL, NULL, &dbcp, 0)) != 0)
- return (ret);
-
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
- if ((ret = __os_malloc(env, 1024 * 1024, &data.data)) != 0)
- goto err;
- data.ulen = 1024 * 1024;
- data.flags = DB_DBT_USERMEM;
- is_recno = (dbp->type == DB_RECNO || dbp->type == DB_QUEUE);
- keyflag = is_recno ? keyflag : 1;
- if (is_recno) {
- keyret.data = &recno;
- keyret.size = sizeof(recno);
- }
-
-retry: while ((ret =
- __dbc_get(dbcp, &key, &data, DB_NEXT | DB_MULTIPLE_KEY)) == 0) {
- DB_MULTIPLE_INIT(pointer, &data);
- for (;;) {
- if (is_recno)
- DB_MULTIPLE_RECNO_NEXT(pointer, &data,
- recno, dataret.data, dataret.size);
- else
- DB_MULTIPLE_KEY_NEXT(pointer,
- &data, keyret.data,
- keyret.size, dataret.data, dataret.size);
-
- if (dataret.data == NULL)
- break;
-
- if ((keyflag &&
- (ret = __db_prdbt(&keyret, pflag, " ",
- handle, callback, is_recno)) != 0) ||
- (ret = __db_prdbt(&dataret, pflag, " ",
- handle, callback, 0)) != 0)
- goto err;
- }
- }
- if (ret == DB_BUFFER_SMALL) {
- data.size = (u_int32_t)DB_ALIGN(data.size, 1024);
- if ((ret = __os_realloc(env, data.size, &data.data)) != 0)
- goto err;
- data.ulen = data.size;
- goto retry;
- }
- if (ret == DB_NOTFOUND)
- ret = 0;
-
- if ((t_ret = __db_prfooter(handle, callback)) != 0 && ret == 0)
- ret = t_ret;
-
-err: if ((t_ret = __dbc_close(dbcp)) != 0 && ret == 0)
- ret = t_ret;
- if (data.data != NULL)
- __os_free(env, data.data);
-
- return (ret);
-}
-
-/*
- * __db_prdbt --
- * Print out a DBT data element.
- *
- * PUBLIC: int __db_prdbt __P((DBT *, int, const char *, void *,
- * PUBLIC: int (*)(void *, const void *), int));
- */
-int
-__db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno)
- DBT *dbtp;
- int checkprint;
- const char *prefix;
- void *handle;
- int (*callback) __P((void *, const void *));
- int is_recno;
-{
- static const u_char hex[] = "0123456789abcdef";
- db_recno_t recno;
- size_t len;
- int ret;
-#define DBTBUFLEN 100
- u_int8_t *p, *hp;
- char buf[DBTBUFLEN], hbuf[DBTBUFLEN];
-
- /*
- * !!!
- * This routine is the routine that dumps out items in the format
- * used by db_dump(1) and db_load(1). This means that the format
- * cannot change.
- */
- if (prefix != NULL && (ret = callback(handle, prefix)) != 0)
- return (ret);
- if (is_recno) {
- /*
- * We're printing a record number, and this has to be done
- * in a platform-independent way. So we use the numeral in
- * straight ASCII.
- */
- (void)__ua_memcpy(&recno, dbtp->data, sizeof(recno));
- snprintf(buf, DBTBUFLEN, "%lu", (u_long)recno);
-
- /* If we're printing data as hex, print keys as hex too. */
- if (!checkprint) {
- for (len = strlen(buf), p = (u_int8_t *)buf,
- hp = (u_int8_t *)hbuf; len-- > 0; ++p) {
- *hp++ = hex[(u_int8_t)(*p & 0xf0) >> 4];
- *hp++ = hex[*p & 0x0f];
- }
- *hp = '\0';
- ret = callback(handle, hbuf);
- } else
- ret = callback(handle, buf);
-
- if (ret != 0)
- return (ret);
- } else if (checkprint) {
- for (len = dbtp->size, p = dbtp->data; len--; ++p)
- if (isprint((int)*p)) {
- if (*p == '\\' &&
- (ret = callback(handle, "\\")) != 0)
- return (ret);
- snprintf(buf, DBTBUFLEN, "%c", *p);
- if ((ret = callback(handle, buf)) != 0)
- return (ret);
- } else {
- snprintf(buf, DBTBUFLEN, "\\%c%c",
- hex[(u_int8_t)(*p & 0xf0) >> 4],
- hex[*p & 0x0f]);
- if ((ret = callback(handle, buf)) != 0)
- return (ret);
- }
- } else
- for (len = dbtp->size, p = dbtp->data; len--; ++p) {
- snprintf(buf, DBTBUFLEN, "%c%c",
- hex[(u_int8_t)(*p & 0xf0) >> 4],
- hex[*p & 0x0f]);
- if ((ret = callback(handle, buf)) != 0)
- return (ret);
- }
-
- return (callback(handle, "\n"));
-}
-
-/*
- * __db_prheader --
- * Write out header information in the format expected by db_load.
- *
- * PUBLIC: int __db_prheader __P((DB *, const char *, int, int, void *,
- * PUBLIC: int (*)(void *, const void *), VRFY_DBINFO *, db_pgno_t));
- */
-int
-__db_prheader(dbp, subname, pflag, keyflag, handle, callback, vdp, meta_pgno)
- DB *dbp;
- const char *subname;
- int pflag, keyflag;
- void *handle;
- int (*callback) __P((void *, const void *));
- VRFY_DBINFO *vdp;
- db_pgno_t meta_pgno;
-{
- DBT dbt;
- DBTYPE dbtype;
- ENV *env;
- VRFY_PAGEINFO *pip;
- u_int32_t flags, tmp_u_int32;
- size_t buflen;
- char *buf;
- int using_vdp, ret, t_ret, tmp_int;
-
- ret = 0;
- buf = NULL;
- COMPQUIET(buflen, 0);
-
- /*
- * If dbp is NULL, then pip is guaranteed to be non-NULL; we only ever
- * call __db_prheader with a NULL dbp from one case inside __db_prdbt,
- * and this is a special subdatabase for "lost" items. In this case
- * we have a vdp (from which we'll get a pip). In all other cases, we
- * will have a non-NULL dbp (and vdp may or may not be NULL depending
- * on whether we're salvaging).
- */
- if (dbp == NULL)
- env = NULL;
- else
- env = dbp->env;
- DB_ASSERT(env, dbp != NULL || vdp != NULL);
-
- /*
- * If we've been passed a verifier statistics object, use that; we're
- * being called in a context where dbp->stat is unsafe.
- *
- * Also, the verifier may set the pflag on a per-salvage basis. If so,
- * respect that.
- */
- if (vdp != NULL) {
- if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &pip)) != 0)
- return (ret);
-
- if (F_ISSET(vdp, SALVAGE_PRINTABLE))
- pflag = 1;
- using_vdp = 1;
- } else {
- pip = NULL;
- using_vdp = 0;
- }
-
- /*
- * If dbp is NULL, make it a btree. Otherwise, set dbtype to whatever
- * appropriate type for the specified meta page, or the type of the dbp.
- */
- if (dbp == NULL)
- dbtype = DB_BTREE;
- else if (using_vdp)
- switch (pip->type) {
- case P_BTREEMETA:
- if (F_ISSET(pip, VRFY_IS_RECNO))
- dbtype = DB_RECNO;
- else
- dbtype = DB_BTREE;
- break;
- case P_HASHMETA:
- dbtype = DB_HASH;
- break;
- case P_QAMMETA:
- dbtype = DB_QUEUE;
- break;
- default:
- /*
- * If the meta page is of a bogus type, it's because
- * we have a badly corrupt database. (We must be in
- * the verifier for pip to be non-NULL.) Pretend we're
- * a Btree and salvage what we can.
- */
- DB_ASSERT(env, F_ISSET(dbp, DB_AM_VERIFYING));
- dbtype = DB_BTREE;
- break;
- }
- else
- dbtype = dbp->type;
-
- if ((ret = callback(handle, "VERSION=3\n")) != 0)
- goto err;
- if (pflag) {
- if ((ret = callback(handle, "format=print\n")) != 0)
- goto err;
- } else if ((ret = callback(handle, "format=bytevalue\n")) != 0)
- goto err;
-
- /*
- * 64 bytes is long enough, as a minimum bound, for any of the
- * fields besides subname. Subname uses __db_prdbt and therefore
- * does not need buffer space here.
- */
- buflen = 64;
- if ((ret = __os_malloc(env, buflen, &buf)) != 0)
- goto err;
- if (subname != NULL) {
- snprintf(buf, buflen, "database=");
- if ((ret = callback(handle, buf)) != 0)
- goto err;
- DB_INIT_DBT(dbt, subname, strlen(subname));
- if ((ret = __db_prdbt(&dbt, 1, NULL, handle, callback, 0)) != 0)
- goto err;
- }
- switch (dbtype) {
- case DB_BTREE:
- if ((ret = callback(handle, "type=btree\n")) != 0)
- goto err;
- if (using_vdp)
- tmp_int = F_ISSET(pip, VRFY_HAS_RECNUMS) ? 1 : 0;
- else {
- if ((ret = __db_get_flags(dbp, &flags)) != 0) {
- __db_err(env, ret, "DB->get_flags");
- goto err;
- }
- tmp_int = F_ISSET(dbp, DB_AM_RECNUM) ? 1 : 0;
- }
- if (tmp_int && (ret = callback(handle, "recnum=1\n")) != 0)
- goto err;
-
- if (using_vdp)
- tmp_u_int32 = pip->bt_minkey;
- else
- if ((ret =
- __bam_get_bt_minkey(dbp, &tmp_u_int32)) != 0) {
- __db_err(env, ret, "DB->get_bt_minkey");
- goto err;
- }
- if (tmp_u_int32 != 0 && tmp_u_int32 != DEFMINKEYPAGE) {
- snprintf(buf, buflen,
- "bt_minkey=%lu\n", (u_long)tmp_u_int32);
- if ((ret = callback(handle, buf)) != 0)
- goto err;
- }
- break;
- case DB_HASH:
-#ifdef HAVE_HASH
- if ((ret = callback(handle, "type=hash\n")) != 0)
- goto err;
- if (using_vdp)
- tmp_u_int32 = pip->h_ffactor;
- else
- if ((ret =
- __ham_get_h_ffactor(dbp, &tmp_u_int32)) != 0) {
- __db_err(env, ret, "DB->get_h_ffactor");
- goto err;
- }
- if (tmp_u_int32 != 0) {
- snprintf(buf, buflen,
- "h_ffactor=%lu\n", (u_long)tmp_u_int32);
- if ((ret = callback(handle, buf)) != 0)
- goto err;
- }
-
- if (using_vdp)
- tmp_u_int32 = pip->h_nelem;
- else
- if ((ret = __ham_get_h_nelem(dbp, &tmp_u_int32)) != 0) {
- __db_err(env, ret, "DB->get_h_nelem");
- goto err;
- }
- /*
- * Hash databases have an h_nelem field of 0 or 1, neither
- * of those values is interesting.
- */
- if (tmp_u_int32 > 1) {
- snprintf(buf, buflen,
- "h_nelem=%lu\n", (u_long)tmp_u_int32);
- if ((ret = callback(handle, buf)) != 0)
- goto err;
- }
- break;
-#else
- ret = __db_no_hash_am(env);
- goto err;
-#endif
- case DB_QUEUE:
-#ifdef HAVE_QUEUE
- if ((ret = callback(handle, "type=queue\n")) != 0)
- goto err;
- if (using_vdp)
- tmp_u_int32 = vdp->re_len;
- else
- if ((ret = __ram_get_re_len(dbp, &tmp_u_int32)) != 0) {
- __db_err(env, ret, "DB->get_re_len");
- goto err;
- }
- snprintf(buf, buflen, "re_len=%lu\n", (u_long)tmp_u_int32);
- if ((ret = callback(handle, buf)) != 0)
- goto err;
-
- if (using_vdp)
- tmp_int = (int)vdp->re_pad;
- else
- if ((ret = __ram_get_re_pad(dbp, &tmp_int)) != 0) {
- __db_err(env, ret, "DB->get_re_pad");
- goto err;
- }
- if (tmp_int != 0 && tmp_int != ' ') {
- snprintf(buf, buflen, "re_pad=%#x\n", tmp_int);
- if ((ret = callback(handle, buf)) != 0)
- goto err;
- }
-
- if (using_vdp)
- tmp_u_int32 = vdp->page_ext;
- else
- if ((ret =
- __qam_get_extentsize(dbp, &tmp_u_int32)) != 0) {
- __db_err(env, ret, "DB->get_q_extentsize");
- goto err;
- }
- if (tmp_u_int32 != 0) {
- snprintf(buf, buflen,
- "extentsize=%lu\n", (u_long)tmp_u_int32);
- if ((ret = callback(handle, buf)) != 0)
- goto err;
- }
- break;
-#else
- ret = __db_no_queue_am(env);
- goto err;
-#endif
- case DB_RECNO:
- if ((ret = callback(handle, "type=recno\n")) != 0)
- goto err;
- if (using_vdp)
- tmp_int = F_ISSET(pip, VRFY_IS_RRECNO) ? 1 : 0;
- else
- tmp_int = F_ISSET(dbp, DB_AM_RENUMBER) ? 1 : 0;
- if (tmp_int != 0 &&
- (ret = callback(handle, "renumber=1\n")) != 0)
- goto err;
-
- if (using_vdp)
- tmp_int = F_ISSET(pip, VRFY_IS_FIXEDLEN) ? 1 : 0;
- else
- tmp_int = F_ISSET(dbp, DB_AM_FIXEDLEN) ? 1 : 0;
- if (tmp_int) {
- if (using_vdp)
- tmp_u_int32 = pip->re_len;
- else
- if ((ret =
- __ram_get_re_len(dbp, &tmp_u_int32)) != 0) {
- __db_err(env, ret, "DB->get_re_len");
- goto err;
- }
- snprintf(buf, buflen,
- "re_len=%lu\n", (u_long)tmp_u_int32);
- if ((ret = callback(handle, buf)) != 0)
- goto err;
-
- if (using_vdp)
- tmp_int = (int)pip->re_pad;
- else
- if ((ret =
- __ram_get_re_pad(dbp, &tmp_int)) != 0) {
- __db_err(env, ret, "DB->get_re_pad");
- goto err;
- }
- if (tmp_int != 0 && tmp_int != ' ') {
- snprintf(buf,
- buflen, "re_pad=%#x\n", (u_int)tmp_int);
- if ((ret = callback(handle, buf)) != 0)
- goto err;
- }
- }
- break;
- case DB_UNKNOWN: /* Impossible. */
- ret = __db_unknown_path(env, "__db_prheader");
- goto err;
- }
-
- if (using_vdp) {
- if (F_ISSET(pip, VRFY_HAS_CHKSUM))
- if ((ret = callback(handle, "chksum=1\n")) != 0)
- goto err;
- if (F_ISSET(pip, VRFY_HAS_DUPS))
- if ((ret = callback(handle, "duplicates=1\n")) != 0)
- goto err;
- if (F_ISSET(pip, VRFY_HAS_DUPSORT))
- if ((ret = callback(handle, "dupsort=1\n")) != 0)
- goto err;
-#ifdef HAVE_COMPRESSION
- if (F_ISSET(pip, VRFY_HAS_COMPRESS))
- if ((ret = callback(handle, "compressed=1\n")) != 0)
- goto err;
-#endif
- /*
- * !!!
- * We don't know if the page size was the default if we're
- * salvaging. It doesn't seem that interesting to have, so
- * we ignore it for now.
- */
- } else {
- if (F_ISSET(dbp, DB_AM_CHKSUM))
- if ((ret = callback(handle, "chksum=1\n")) != 0)
- goto err;
- if (F_ISSET(dbp, DB_AM_DUP))
- if ((ret = callback(handle, "duplicates=1\n")) != 0)
- goto err;
- if (F_ISSET(dbp, DB_AM_DUPSORT))
- if ((ret = callback(handle, "dupsort=1\n")) != 0)
- goto err;
-#ifdef HAVE_COMPRESSION
- if (DB_IS_COMPRESSED(dbp))
- if ((ret = callback(handle, "compressed=1\n")) != 0)
- goto err;
-#endif
- if (!F_ISSET(dbp, DB_AM_PGDEF)) {
- snprintf(buf, buflen,
- "db_pagesize=%lu\n", (u_long)dbp->pgsize);
- if ((ret = callback(handle, buf)) != 0)
- goto err;
- }
- }
-
-#ifdef HAVE_PARTITION
- if (DB_IS_PARTITIONED(dbp) &&
- F_ISSET((DB_PARTITION *)dbp->p_internal, PART_RANGE)) {
- DBT *keys;
- u_int32_t i;
-
- if ((ret = __partition_get_keys(dbp, &tmp_u_int32, &keys)) != 0)
- goto err;
- if (tmp_u_int32 != 0) {
- snprintf(buf,
- buflen, "nparts=%lu\n", (u_long)tmp_u_int32);
- if ((ret = callback(handle, buf)) != 0)
- goto err;
- for (i = 0; i < tmp_u_int32 - 1; i++)
- if ((ret = __db_prdbt(&keys[i],
- pflag, " ", handle, callback, 0)) != 0)
- goto err;
- }
- }
-#endif
-
- if (keyflag && (ret = callback(handle, "keys=1\n")) != 0)
- goto err;
-
- ret = callback(handle, "HEADER=END\n");
-
-err: if (using_vdp &&
- (t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
- if (buf != NULL)
- __os_free(env, buf);
-
- return (ret);
-}
-
-/*
- * __db_prfooter --
- * Print the footer that marks the end of a DB dump. This is trivial,
- * but for consistency's sake we don't want to put its literal contents
- * in multiple places.
- *
- * PUBLIC: int __db_prfooter __P((void *, int (*)(void *, const void *)));
- */
-int
-__db_prfooter(handle, callback)
- void *handle;
- int (*callback) __P((void *, const void *));
-{
- return (callback(handle, "DATA=END\n"));
-}
-
-/*
- * __db_pr_callback --
- * Callback function for using pr_* functions from C.
- *
- * PUBLIC: int __db_pr_callback __P((void *, const void *));
- */
-int
-__db_pr_callback(handle, str_arg)
- void *handle;
- const void *str_arg;
-{
- char *str;
- FILE *f;
-
- str = (char *)str_arg;
- f = (FILE *)handle;
-
- if (fprintf(f, "%s", str) != (int)strlen(str))
- return (EIO);
-
- return (0);
-}
-
-/*
- * __db_dbtype_to_string --
- * Return the name of the database type.
- *
- * PUBLIC: const char * __db_dbtype_to_string __P((DBTYPE));
- */
-const char *
-__db_dbtype_to_string(type)
- DBTYPE type;
-{
- switch (type) {
- case DB_BTREE:
- return ("btree");
- case DB_HASH:
- return ("hash");
- case DB_RECNO:
- return ("recno");
- case DB_QUEUE:
- return ("queue");
- case DB_UNKNOWN:
- default:
- break;
- }
- return ("UNKNOWN TYPE");
-}
diff --git a/db/db_rec.c b/db/db_rec.c
deleted file mode 100644
index 02fe096..0000000
--- a/db/db_rec.c
+++ /dev/null
@@ -1,1859 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996, 2010 Oracle and/or its affiliates. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/log.h"
-#include "dbinc/mp.h"
-#include "dbinc/hash.h"
-
-static int __db_pg_free_recover_int __P((ENV *, DB_THREAD_INFO *,
- __db_pg_freedata_args *, DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int));
-static int __db_pg_free_recover_42_int __P((ENV *, DB_THREAD_INFO *,
- __db_pg_freedata_42_args *,
- DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int));
-
-/*
- * PUBLIC: int __db_addrem_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- *
- * This log message is generated whenever we add or remove a duplicate
- * to/from a duplicate page. On recover, we just do the opposite.
- */
-int
-__db_addrem_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_addrem_args *argp;
- DB_THREAD_INFO *ip;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- int cmp_n, cmp_p, modified, ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- pagep = NULL;
- REC_PRINT(__db_addrem_print);
- REC_INTRO(__db_addrem_read, ip, 1);
-
- REC_FGET(mpf, ip, argp->pgno, &pagep, done);
- modified = 0;
-
- cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
- cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
- CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
- if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_DUP) ||
- (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_DUP)) {
- /* Need to redo an add, or undo a delete. */
- REC_DIRTY(mpf, ip, dbc->priority, &pagep);
- if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes,
- argp->hdr.size == 0 ? NULL : &argp->hdr,
- argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
- goto out;
- modified = 1;
-
- } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_DUP) ||
- (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_DUP)) {
- /* Need to undo an add, or redo a delete. */
- REC_DIRTY(mpf, ip, dbc->priority, &pagep);
- if ((ret = __db_ditem(dbc,
- pagep, argp->indx, argp->nbytes)) != 0)
- goto out;
- modified = 1;
- }
-
- if (modified) {
- if (DB_REDO(op))
- LSN(pagep) = *lsnp;
- else
- LSN(pagep) = argp->pagelsn;
- }
-
- if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0)
- goto out;
- pagep = NULL;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: if (pagep != NULL)
- (void)__memp_fput(mpf, ip, pagep, dbc->priority);
- REC_CLOSE;
-}
-
-/*
- * PUBLIC: int __db_big_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_big_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_big_args *argp;
- DB_THREAD_INFO *ip;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- int cmp_n, cmp_p, modified, ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- pagep = NULL;
- REC_PRINT(__db_big_print);
- REC_INTRO(__db_big_read, ip, 0);
-
- REC_FGET(mpf, ip, argp->pgno, &pagep, ppage);
- modified = 0;
-
- /*
- * There are three pages we need to check. The one on which we are
- * adding data, the previous one whose next_pointer may have
- * been updated, and the next one whose prev_pointer may have
- * been updated.
- */
- cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
- cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn);
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn);
- CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
- if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) ||
- (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_BIG)) {
- /* We are either redo-ing an add, or undoing a delete. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno,
- argp->next_pgno, 0, P_OVERFLOW);
- OV_LEN(pagep) = argp->dbt.size;
- OV_REF(pagep) = 1;
- memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data,
- argp->dbt.size);
- PREV_PGNO(pagep) = argp->prev_pgno;
- modified = 1;
- } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_BIG) ||
- (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_BIG)) {
- /*
- * We are either undo-ing an add or redo-ing a delete.
- * The page is about to be reclaimed in either case, so
- * there really isn't anything to do here.
- */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- modified = 1;
- } else if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_APPEND_BIG) {
- /* We are redoing an append. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
- OV_LEN(pagep), argp->dbt.data, argp->dbt.size);
- OV_LEN(pagep) += argp->dbt.size;
- modified = 1;
- } else if (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_APPEND_BIG) {
- /* We are undoing an append. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- OV_LEN(pagep) -= argp->dbt.size;
- memset((u_int8_t *)pagep + P_OVERHEAD(file_dbp) +
- OV_LEN(pagep), 0, argp->dbt.size);
- modified = 1;
- }
- if (modified)
- LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn;
-
- ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
- pagep = NULL;
- if (ret != 0)
- goto out;
-
- /*
- * We only delete a whole chain of overflow items, and appends only
- * apply to a single page. Adding a page is the only case that
- * needs to update the chain.
- */
- if (argp->opcode != DB_ADD_BIG)
- goto done;
-
- /* Now check the previous page. */
-ppage: if (argp->prev_pgno != PGNO_INVALID) {
- REC_FGET(mpf, ip, argp->prev_pgno, &pagep, npage);
- modified = 0;
-
- cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
- cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
- CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
-
- if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) {
- /* Redo add, undo delete. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- NEXT_PGNO(pagep) = argp->pgno;
- modified = 1;
- } else if (cmp_n == 0 &&
- DB_UNDO(op) && argp->opcode == DB_ADD_BIG) {
- /* Redo delete, undo add. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- NEXT_PGNO(pagep) = argp->next_pgno;
- modified = 1;
- }
- if (modified)
- LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn;
- ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
- pagep = NULL;
- if (ret != 0)
- goto out;
- }
- pagep = NULL;
-
- /* Now check the next page. Can only be set on a delete. */
-npage: if (argp->next_pgno != PGNO_INVALID) {
- REC_FGET(mpf, ip, argp->next_pgno, &pagep, done);
- modified = 0;
-
- cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
- cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn);
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn);
- CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
- if (cmp_p == 0 && DB_REDO(op)) {
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- PREV_PGNO(pagep) = PGNO_INVALID;
- modified = 1;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- PREV_PGNO(pagep) = argp->pgno;
- modified = 1;
- }
- if (modified)
- LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn;
- ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
- pagep = NULL;
- if (ret != 0)
- goto out;
- }
- pagep = NULL;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: if (pagep != NULL)
- (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
- REC_CLOSE;
-}
-
-/*
- * __db_ovref_recover --
- * Recovery function for __db_ovref().
- *
- * PUBLIC: int __db_ovref_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_ovref_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_ovref_args *argp;
- DB_THREAD_INFO *ip;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- int cmp, ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- pagep = NULL;
- REC_PRINT(__db_ovref_print);
- REC_INTRO(__db_ovref_read, ip, 0);
-
- REC_FGET(mpf, ip, argp->pgno, &pagep, done);
-
- cmp = LOG_COMPARE(&LSN(pagep), &argp->lsn);
- CHECK_LSN(env, op, cmp, &LSN(pagep), &argp->lsn);
- if (cmp == 0 && DB_REDO(op)) {
- /* Need to redo update described. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- OV_REF(pagep) += argp->adjust;
- pagep->lsn = *lsnp;
- } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
- /* Need to undo update described. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- OV_REF(pagep) -= argp->adjust;
- pagep->lsn = argp->lsn;
- }
- ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
- pagep = NULL;
- if (ret != 0)
- goto out;
- pagep = NULL;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: if (pagep != NULL)
- (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
- REC_CLOSE;
-}
-
-/*
- * __db_debug_recover --
- * Recovery function for debug.
- *
- * PUBLIC: int __db_debug_recover __P((ENV *,
- * PUBLIC: DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_debug_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_debug_args *argp;
- int ret;
-
- COMPQUIET(op, DB_TXN_ABORT);
- COMPQUIET(info, NULL);
-
- REC_PRINT(__db_debug_print);
- REC_NOOP_INTRO(__db_debug_read);
-
- *lsnp = argp->prev_lsn;
- ret = 0;
-
- REC_NOOP_CLOSE;
-}
-
-/*
- * __db_noop_recover --
- * Recovery function for noop.
- *
- * PUBLIC: int __db_noop_recover __P((ENV *,
- * PUBLIC: DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_noop_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_noop_args *argp;
- DB_THREAD_INFO *ip;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- int cmp_n, cmp_p, ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- pagep = NULL;
- REC_PRINT(__db_noop_print);
- REC_INTRO(__db_noop_read, ip, 0);
-
- REC_FGET(mpf, ip, argp->pgno, &pagep, done);
-
- cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
- cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn);
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn);
- CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
- if (cmp_p == 0 && DB_REDO(op)) {
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- LSN(pagep) = *lsnp;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- LSN(pagep) = argp->prevlsn;
- }
- ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
- pagep = NULL;
-
-done: *lsnp = argp->prev_lsn;
-out: if (pagep != NULL)
- (void)__memp_fput(mpf,
- ip, pagep, file_dbp->priority);
- REC_CLOSE;
-}
-
-/*
- * __db_pg_alloc_recover --
- * Recovery function for pg_alloc.
- *
- * PUBLIC: int __db_pg_alloc_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_pg_alloc_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_pg_alloc_args *argp;
- DB_THREAD_INFO *ip;
- DB *file_dbp;
- DBC *dbc;
- DBMETA *meta;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- db_pgno_t pgno;
- int cmp_n, cmp_p, created, level, ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- meta = NULL;
- pagep = NULL;
- created = 0;
- REC_PRINT(__db_pg_alloc_print);
- REC_INTRO(__db_pg_alloc_read, ip, 0);
-
- /*
- * Fix up the metadata page. If we're redoing the operation, we have
- * to get the metadata page and update its LSN and its free pointer.
- * If we're undoing the operation and the page was ever created, we put
- * it on the freelist.
- */
- pgno = PGNO_BASE_MD;
- if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) {
- /* The metadata page must always exist on redo. */
- if (DB_REDO(op)) {
- ret = __db_pgerr(file_dbp, pgno, ret);
- goto out;
- } else
- goto done;
- }
- cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
- cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
- CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
- CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
- if (cmp_p == 0 && DB_REDO(op)) {
- /* Need to redo update described. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
- LSN(meta) = *lsnp;
- meta->free = argp->next;
- if (argp->pgno > meta->last_pgno)
- meta->last_pgno = argp->pgno;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /* Need to undo update described. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
- LSN(meta) = argp->meta_lsn;
- /*
- * If the page has a zero LSN then its newly created and
- * will be truncated rather than go on the free list.
- */
- if (!IS_ZERO_LSN(argp->page_lsn))
- meta->free = argp->pgno;
- meta->last_pgno = argp->last_pgno;
- }
-
-#ifdef HAVE_FTRUNCATE
- /*
- * check to see if we are keeping a sorted freelist, if so put
- * this back in the in memory list. It must be the first element.
- */
- if (op == DB_TXN_ABORT && !IS_ZERO_LSN(argp->page_lsn)) {
- db_pgno_t *list;
- u_int32_t nelem;
-
- if ((ret = __memp_get_freelist(mpf, &nelem, &list)) != 0)
- goto out;
- if (list != NULL && (nelem == 0 || *list != argp->pgno)) {
- if ((ret =
- __memp_extend_freelist(mpf, nelem + 1, &list)) != 0)
- goto out;
- if (nelem != 0)
- memmove(list + 1, list, nelem * sizeof(*list));
- *list = argp->pgno;
- }
- }
-#endif
-
- /*
- * Fix up the allocated page. If the page does not exist
- * and we can truncate it then don't create it.
- * Otherwise if we're redoing the operation, we have
- * to get the page (creating it if it doesn't exist), and update its
- * LSN. If we're undoing the operation, we have to reset the page's
- * LSN and put it on the free list.
- */
- if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
- /*
- * We have to be able to identify if a page was newly
- * created so we can recover it properly. We cannot simply
- * look for an empty header, because hash uses a pgin
- * function that will set the header. Instead, we explicitly
- * try for the page without CREATE and if that fails, then
- * create it.
- */
- if (DB_UNDO(op))
- goto do_truncate;
- if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL,
- DB_MPOOL_CREATE, &pagep)) != 0) {
- if (DB_UNDO(op) && ret == ENOSPC)
- goto do_truncate;
- ret = __db_pgerr(file_dbp, argp->pgno, ret);
- goto out;
- }
- created = 1;
- }
-
- /* Fix up the allocated page. */
- cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
- cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn);
-
- /*
- * If an initial allocation is aborted and then reallocated during
- * an archival restore the log record will have an LSN for the page
- * but the page will be empty.
- */
- if (IS_ZERO_LSN(LSN(pagep)))
- cmp_p = 0;
-
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn);
- /*
- * Another special case we have to handle is if we ended up with a
- * page of all 0's which can happen if we abort between allocating a
- * page in mpool and initializing it. In that case, even if we're
- * undoing, we need to re-initialize the page.
- */
- if (DB_REDO(op) && cmp_p == 0) {
- /* Need to redo update described. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- switch (argp->ptype) {
- case P_LBTREE:
- case P_LRECNO:
- case P_LDUP:
- level = LEAFLEVEL;
- break;
- default:
- level = 0;
- break;
- }
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype);
-
- pagep->lsn = *lsnp;
- } else if (DB_UNDO(op) && (cmp_n == 0 || created)) {
- /*
- * This is where we handle the case of a 0'd page (pagep->pgno
- * is equal to PGNO_INVALID).
- * Undo the allocation, reinitialize the page and
- * link its next pointer to the free list.
- */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
-
- pagep->lsn = argp->page_lsn;
- }
-
-do_truncate:
- /*
- * If the page was newly created, give it back.
- */
- if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) &&
- IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) {
- /* Discard the page. */
- if (pagep != NULL) {
- if ((ret = __memp_fput(mpf, ip,
- pagep, DB_PRIORITY_VERY_LOW)) != 0)
- goto out;
- pagep = NULL;
- }
- /* Give the page back to the OS. */
- if (meta->last_pgno <= argp->pgno && (ret = __memp_ftruncate(
- mpf, NULL, ip, argp->pgno, MP_TRUNC_RECOVER)) != 0)
- goto out;
- }
-
- if (pagep != NULL) {
- ret = __memp_fput(mpf, ip, pagep, file_dbp->priority);
- pagep = NULL;
- if (ret != 0)
- goto out;
- }
-
- ret = __memp_fput(mpf, ip, meta, file_dbp->priority);
- meta = NULL;
- if (ret != 0)
- goto out;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: if (pagep != NULL)
- (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
- if (meta != NULL)
- (void)__memp_fput(mpf, ip, meta, file_dbp->priority);
- REC_CLOSE;
-}
-
-/*
- * __db_pg_free_recover_int --
- */
-static int
-__db_pg_free_recover_int(env, ip, argp, file_dbp, lsnp, mpf, op, data)
- ENV *env;
- DB_THREAD_INFO *ip;
- __db_pg_freedata_args *argp;
- DB *file_dbp;
- DB_LSN *lsnp;
- DB_MPOOLFILE *mpf;
- db_recops op;
- int data;
-{
- DBMETA *meta;
- DB_LSN copy_lsn;
- PAGE *pagep, *prevp;
- int cmp_n, cmp_p, is_meta, ret;
-
- meta = NULL;
- pagep = prevp = NULL;
-
- /*
- * Get the "metapage". This will either be the metapage
- * or the previous page in the free list if we are doing
- * sorted allocations. If its a previous page then
- * we will not be truncating.
- */
- is_meta = argp->meta_pgno == PGNO_BASE_MD;
-
- REC_FGET(mpf, ip, argp->meta_pgno, &meta, check_meta);
-
- if (argp->meta_pgno != PGNO_BASE_MD)
- prevp = (PAGE *)meta;
-
- cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
- cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
- CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
- CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp);
-
- /*
- * Fix up the metadata page. If we're redoing or undoing the operation
- * we get the page and update its LSN, last and free pointer.
- */
- if (cmp_p == 0 && DB_REDO(op)) {
- REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
- /*
- * If we are at the end of the file truncate, otherwise
- * put on the free list.
- */
- if (argp->pgno == argp->last_pgno)
- meta->last_pgno = argp->pgno - 1;
- else if (is_meta)
- meta->free = argp->pgno;
- else
- NEXT_PGNO(prevp) = argp->pgno;
- LSN(meta) = *lsnp;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /* Need to undo the deallocation. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
- if (is_meta) {
- if (meta->last_pgno < argp->pgno)
- meta->last_pgno = argp->pgno;
- meta->free = argp->next;
- } else
- NEXT_PGNO(prevp) = argp->next;
- LSN(meta) = argp->meta_lsn;
- }
-
-check_meta:
- if (ret != 0 && is_meta) {
- /* The metadata page must always exist. */
- ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
- goto out;
- }
-
- /*
- * Get the freed page. Don't create the page if we are going to
- * free it. If we're redoing the operation we get the page and
- * explicitly discard its contents, then update its LSN. If we're
- * undoing the operation, we get the page and restore its header.
- */
- if (DB_REDO(op) || (is_meta && meta->last_pgno < argp->pgno)) {
- if ((ret = __memp_fget(mpf, &argp->pgno,
- ip, NULL, 0, &pagep)) != 0) {
- if (ret != DB_PAGE_NOTFOUND)
- goto out;
- if (is_meta &&
- DB_REDO(op) && meta->last_pgno <= argp->pgno)
- goto trunc;
- goto done;
- }
- } else if ((ret = __memp_fget(mpf, &argp->pgno,
- ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0)
- goto out;
-
- (void)__ua_memcpy(&copy_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
- cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep));
- cmp_p = LOG_COMPARE(&LSN(pagep), &copy_lsn);
-
- /*
- * This page got extended by a later allocation,
- * but its allocation was not in the scope of this
- * recovery pass.
- */
- if (IS_ZERO_LSN(LSN(pagep)))
- cmp_p = 0;
-
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &copy_lsn);
- if (DB_REDO(op) &&
- (cmp_p == 0 ||
- (IS_ZERO_LSN(copy_lsn) &&
- LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) {
- /* Need to redo the deallocation. */
- /*
- * The page can be truncated if it was truncated at runtime
- * and the current metapage reflects the truncation.
- */
- if (is_meta && meta->last_pgno <= argp->pgno &&
- argp->last_pgno <= argp->pgno) {
- if ((ret = __memp_fput(mpf, ip,
- pagep, DB_PRIORITY_VERY_LOW)) != 0)
- goto out;
- pagep = NULL;
-trunc: if ((ret = __memp_ftruncate(mpf, NULL, ip,
- argp->pgno, MP_TRUNC_RECOVER)) != 0)
- goto out;
- } else if (argp->last_pgno == argp->pgno) {
- /* The page was truncated at runtime, zero it out. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- P_INIT(pagep, 0, PGNO_INVALID,
- PGNO_INVALID, PGNO_INVALID, 0, P_INVALID);
- ZERO_LSN(pagep->lsn);
- } else {
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
- pagep->lsn = *lsnp;
-
- }
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /* Need to reallocate the page. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- memcpy(pagep, argp->header.data, argp->header.size);
- if (data)
- memcpy((u_int8_t*)pagep + HOFFSET(pagep),
- argp->data.data, argp->data.size);
- }
- if (pagep != NULL &&
- (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
- goto out;
-
- pagep = NULL;
-#ifdef HAVE_FTRUNCATE
- /*
- * If we are keeping an in memory free list remove this
- * element from the list.
- */
- if (op == DB_TXN_ABORT && argp->pgno != argp->last_pgno) {
- db_pgno_t *lp;
- u_int32_t nelem, pos;
-
- if ((ret = __memp_get_freelist(mpf, &nelem, &lp)) != 0)
- goto out;
- if (lp != NULL) {
- pos = 0;
- if (!is_meta) {
- __db_freelist_pos(argp->pgno, lp, nelem, &pos);
-
- /*
- * If we aborted after logging but before
- * updating the free list don't do anything.
- */
- if (argp->pgno != lp[pos]) {
- DB_ASSERT(env,
- argp->meta_pgno == lp[pos]);
- goto done;
- }
- DB_ASSERT(env,
- argp->meta_pgno == lp[pos - 1]);
- } else if (nelem != 0 && argp->pgno != lp[pos])
- goto done;
-
- if (pos < nelem)
- memmove(&lp[pos], &lp[pos + 1],
- ((nelem - pos) - 1) * sizeof(*lp));
-
- /* Shrink the list */
- if ((ret =
- __memp_extend_freelist(mpf, nelem - 1, &lp)) != 0)
- goto out;
- }
- }
-#endif
-done:
- if (meta != NULL &&
- (ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
- goto out;
- meta = NULL;
- ret = 0;
-
-out: if (pagep != NULL)
- (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
- if (meta != NULL)
- (void)__memp_fput(mpf, ip, meta, file_dbp->priority);
-
- return (ret);
-}
-
-/*
- * __db_pg_free_recover --
- * Recovery function for pg_free.
- *
- * PUBLIC: int __db_pg_free_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_pg_free_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_pg_free_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- DB_THREAD_INFO *ip;
- int ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- REC_PRINT(__db_pg_free_print);
- REC_INTRO(__db_pg_free_read, ip, 0);
-
- ret = __db_pg_free_recover_int(env, ip,
- (__db_pg_freedata_args *)argp, file_dbp, lsnp, mpf, op, 0);
-
-done: *lsnp = argp->prev_lsn;
-out:
- REC_CLOSE;
-}
-
-/*
- * __db_pg_freedata_recover --
- * Recovery function for pg_freedata.
- *
- * PUBLIC: int __db_pg_freedata_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_pg_freedata_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_pg_freedata_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- DB_THREAD_INFO *ip;
- int ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- REC_PRINT(__db_pg_freedata_print);
- REC_INTRO(__db_pg_freedata_read, ip, 0);
-
- ret = __db_pg_free_recover_int(env,
- ip, argp, file_dbp, lsnp, mpf, op, 1);
-
-done: *lsnp = argp->prev_lsn;
-out:
- REC_CLOSE;
-}
-
-/*
- * __db_cksum_recover --
- * Recovery function for checksum failure log record.
- *
- * PUBLIC: int __db_cksum_recover __P((ENV *,
- * PUBLIC: DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_cksum_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_cksum_args *argp;
- int ret;
-
- COMPQUIET(info, NULL);
- COMPQUIET(lsnp, NULL);
- COMPQUIET(op, DB_TXN_ABORT);
-
- REC_PRINT(__db_cksum_print);
-
- if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0)
- return (ret);
-
- /*
- * We had a checksum failure -- the only option is to run catastrophic
- * recovery.
- */
- if (F_ISSET(env, ENV_RECOVER_FATAL))
- ret = 0;
- else {
- __db_errx(env,
- "Checksum failure requires catastrophic recovery");
- ret = __env_panic(env, DB_RUNRECOVERY);
- }
-
- __os_free(env, argp);
- return (ret);
-}
-
-/*
- * __db_pg_init_recover --
- * Recovery function to reinit pages after truncation.
- *
- * PUBLIC: int __db_pg_init_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_pg_init_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_pg_init_args *argp;
- DB_THREAD_INFO *ip;
- DB *file_dbp;
- DBC *dbc;
- DB_LSN copy_lsn;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- int cmp_n, cmp_p, ret, type;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- REC_PRINT(__db_pg_init_print);
- REC_INTRO(__db_pg_init_read, ip, 0);
-
- mpf = file_dbp->mpf;
- if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
- if (DB_UNDO(op)) {
- if (ret == DB_PAGE_NOTFOUND)
- goto done;
- else {
- ret = __db_pgerr(file_dbp, argp->pgno, ret);
- goto out;
- }
- }
-
- /*
- * This page was truncated and may simply not have
- * had an item written to it yet. This should only
- * happen on hash databases, so confirm that.
- */
- DB_ASSERT(env, file_dbp->type == DB_HASH);
- if ((ret = __memp_fget(mpf, &argp->pgno,
- ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) {
- ret = __db_pgerr(file_dbp, argp->pgno, ret);
- goto out;
- }
- }
-
- (void)__ua_memcpy(&copy_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
- cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
- cmp_p = LOG_COMPARE(&LSN(pagep), &copy_lsn);
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &copy_lsn);
- CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp);
-
- if (cmp_p == 0 && DB_REDO(op)) {
- if (TYPE(pagep) == P_HASH)
- type = P_HASH;
- else
- type = file_dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE;
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- P_INIT(pagep, file_dbp->pgsize, PGNO(pagep), PGNO_INVALID,
- PGNO_INVALID, TYPE(pagep) == P_HASH ? 0 : 1, type);
- pagep->lsn = *lsnp;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /* Put the data back on the page. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- memcpy(pagep, argp->header.data, argp->header.size);
- if (argp->data.size > 0)
- memcpy((u_int8_t*)pagep + HOFFSET(pagep),
- argp->data.data, argp->data.size);
- }
- if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
- goto out;
-
-done: *lsnp = argp->prev_lsn;
-out:
- REC_CLOSE;
-}
-
-/*
- * __db_pg_trunc_recover --
- * Recovery function for pg_trunc.
- *
- * PUBLIC: int __db_pg_trunc_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_pg_trunc_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
-#ifdef HAVE_FTRUNCATE
- __db_pg_trunc_args *argp;
- DB_THREAD_INFO *ip;
- DB *file_dbp;
- DBC *dbc;
- DBMETA *meta;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- db_pglist_t *pglist, *lp;
- db_pgno_t last_pgno, *list;
- u_int32_t felem, nelem, pos;
- int ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- REC_PRINT(__db_pg_trunc_print);
- REC_INTRO(__db_pg_trunc_read, ip, 1);
-
- pglist = (db_pglist_t *) argp->list.data;
- nelem = argp->list.size / sizeof(db_pglist_t);
- if (DB_REDO(op)) {
- /*
- * First call __db_pg_truncate to find the truncation
- * point, truncate the file and return the new last_pgno.
- */
- last_pgno = argp->last_pgno;
- if ((ret = __db_pg_truncate(dbc, NULL, pglist,
- NULL, &nelem, argp->next_free, &last_pgno, lsnp, 1)) != 0)
- goto out;
-
- if (argp->last_free != PGNO_INVALID) {
- /*
- * Update the next pointer of the last page in
- * the freelist. If the truncation point is
- * beyond next_free then this is still in the freelist
- * otherwise the last_free page is at the end.
- */
- if ((ret = __memp_fget(mpf,
- &argp->last_free, ip, NULL, 0, &meta)) == 0) {
- if (LOG_COMPARE(&LSN(meta),
- &argp->last_lsn) == 0) {
- REC_DIRTY(mpf,
- ip, dbc->priority, &meta);
- if (pglist->pgno > last_pgno)
- NEXT_PGNO(meta) = PGNO_INVALID;
- else
- NEXT_PGNO(meta) = pglist->pgno;
- LSN(meta) = *lsnp;
- }
- if ((ret = __memp_fput(mpf, ip,
- meta, file_dbp->priority)) != 0)
- goto out;
- meta = NULL;
- } else if (ret != DB_PAGE_NOTFOUND)
- goto out;
- }
- if ((ret = __memp_fget(mpf, &argp->meta, ip, NULL,
- 0, &meta)) != 0)
- goto out;
- if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) {
- REC_DIRTY(mpf, ip, dbc->priority, &meta);
- if (argp->last_free == PGNO_INVALID) {
- if (nelem == 0)
- meta->free = PGNO_INVALID;
- else
- meta->free = pglist->pgno;
- }
- meta->last_pgno = last_pgno;
- LSN(meta) = *lsnp;
- }
- } else {
- /* Put the free list back in its original order. */
- for (lp = pglist; lp < &pglist[nelem]; lp++) {
- if ((ret = __memp_fget(mpf, &lp->pgno, ip,
- NULL, DB_MPOOL_CREATE, &pagep)) != 0)
- goto out;
- if (IS_ZERO_LSN(LSN(pagep)) ||
- LOG_COMPARE(&LSN(pagep), lsnp) == 0) {
- REC_DIRTY(mpf, ip, dbc->priority, &pagep);
- P_INIT(pagep, file_dbp->pgsize, lp->pgno,
- PGNO_INVALID, lp->next_pgno, 0, P_INVALID);
- LSN(pagep) = lp->lsn;
- }
- if ((ret = __memp_fput(mpf,
- ip, pagep, file_dbp->priority)) != 0)
- goto out;
- }
- /*
- * Link the truncated part back into the free list.
- * Its either after the last_free page or direclty
- * linked to the metadata page.
- */
- if (argp->last_free != PGNO_INVALID) {
- if ((ret = __memp_fget(mpf, &argp->last_free,
- ip, NULL, DB_MPOOL_EDIT, &meta)) == 0) {
- if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
- NEXT_PGNO(meta) = argp->next_free;
- LSN(meta) = argp->last_lsn;
- }
- if ((ret = __memp_fput(mpf, ip,
- meta, file_dbp->priority)) != 0)
- goto out;
- } else if (ret != DB_PAGE_NOTFOUND)
- goto out;
- meta = NULL;
- }
- if ((ret = __memp_fget(mpf, &argp->meta,
- ip, NULL, DB_MPOOL_EDIT, &meta)) != 0)
- goto out;
- if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
- REC_DIRTY(mpf, ip, dbc->priority, &meta);
- /*
- * If we had to break up the list last_pgno
- * may only represent the end of the block.
- */
- if (meta->last_pgno < argp->last_pgno)
- meta->last_pgno = argp->last_pgno;
- if (argp->last_free == PGNO_INVALID)
- meta->free = argp->next_free;
- LSN(meta) = argp->meta_lsn;
- }
- }
-
- if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
- goto out;
-
- if (op == DB_TXN_ABORT) {
- /*
- * Put the pages back on the in memory free list.
- * If this is part of a multi-record truncate then
- * we need to find this batch, it may not be at the end.
- * If we aborted while writing one of the log records
- * then this set may still be in the list.
- */
- if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
- goto out;
- if (list != NULL) {
- if (felem != 0 && list[felem - 1] > pglist->pgno) {
- __db_freelist_pos(
- pglist->pgno, list, felem, &pos);
- DB_ASSERT(env, pos < felem);
- if (pglist->pgno == list[pos])
- goto done;
- pos++;
- } else if (felem != 0 &&
- list[felem - 1] == pglist->pgno)
- goto done;
- else
- pos = felem;
- if ((ret = __memp_extend_freelist(
- mpf, felem + nelem, &list)) != 0)
- goto out;
- if (pos != felem)
- memmove(&list[nelem + pos], &list[pos],
- sizeof(*list) * (felem - pos));
- for (lp = pglist; lp < &pglist[nelem]; lp++)
- list[pos++] = lp->pgno;
- }
- }
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: REC_CLOSE;
-#else
- /*
- * If HAVE_FTRUNCATE is not defined, we'll never see pg_trunc records
- * to recover.
- */
- COMPQUIET(env, NULL);
- COMPQUIET(dbtp, NULL);
- COMPQUIET(lsnp, NULL);
- COMPQUIET(op, DB_TXN_ABORT);
- COMPQUIET(info, NULL);
- return (EINVAL);
-#endif
-}
-/*
- * __db_pg_sort_44_recover --
- * Recovery function for pg_sort.
- * This is deprecated and kept for replication upgrades.
- *
- * PUBLIC: int __db_pg_sort_44_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_pg_sort_44_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
-#ifdef HAVE_FTRUNCATE
- __db_pg_sort_44_args *argp;
- DB_THREAD_INFO *ip;
- DB *file_dbp;
- DBC *dbc;
- DBMETA *meta;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- db_pglist_t *pglist, *lp;
- db_pgno_t pgno, *list;
- u_int32_t felem, nelem;
- int ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- REC_PRINT(__db_pg_sort_44_print);
- REC_INTRO(__db_pg_sort_44_read, ip, 1);
-
- pglist = (db_pglist_t *) argp->list.data;
- nelem = argp->list.size / sizeof(db_pglist_t);
- if (DB_REDO(op)) {
- pgno = argp->last_pgno;
- __db_freelist_sort(pglist, nelem);
- if ((ret = __db_pg_truncate(dbc, NULL,
- pglist, NULL, &nelem, PGNO_INVALID, &pgno, lsnp, 1)) != 0)
- goto out;
-
- if (argp->last_free != PGNO_INVALID) {
- if ((ret = __memp_fget(mpf,
- &argp->last_free, ip, NULL, 0, &meta)) == 0) {
- if (LOG_COMPARE(&LSN(meta),
- &argp->last_lsn) == 0) {
- REC_DIRTY(mpf,
- ip, dbc->priority, &meta);
- NEXT_PGNO(meta) = PGNO_INVALID;
- LSN(meta) = *lsnp;
- }
- if ((ret = __memp_fput(mpf, ip,
- meta, file_dbp->priority)) != 0)
- goto out;
- meta = NULL;
- } else if (ret != DB_PAGE_NOTFOUND)
- goto out;
- }
- if ((ret = __memp_fget(mpf, &argp->meta, ip, NULL,
- 0, &meta)) != 0)
- goto out;
- if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) {
- REC_DIRTY(mpf, ip, dbc->priority, &meta);
- if (argp->last_free == PGNO_INVALID) {
- if (nelem == 0)
- meta->free = PGNO_INVALID;
- else
- meta->free = pglist->pgno;
- }
- meta->last_pgno = pgno;
- LSN(meta) = *lsnp;
- }
- } else {
- /* Put the free list back in its original order. */
- for (lp = pglist; lp < &pglist[nelem]; lp++) {
- if ((ret = __memp_fget(mpf, &lp->pgno, ip,
- NULL, DB_MPOOL_CREATE, &pagep)) != 0)
- goto out;
- if (IS_ZERO_LSN(LSN(pagep)) ||
- LOG_COMPARE(&LSN(pagep), lsnp) == 0) {
- REC_DIRTY(mpf, ip, dbc->priority, &pagep);
- if (lp == &pglist[nelem - 1])
- pgno = PGNO_INVALID;
- else
- pgno = lp[1].pgno;
-
- P_INIT(pagep, file_dbp->pgsize,
- lp->pgno, PGNO_INVALID, pgno, 0, P_INVALID);
- LSN(pagep) = lp->lsn;
- }
- if ((ret = __memp_fput(mpf,
- ip, pagep, file_dbp->priority)) != 0)
- goto out;
- }
- if (argp->last_free != PGNO_INVALID) {
- if ((ret = __memp_fget(mpf, &argp->last_free,
- ip, NULL, DB_MPOOL_EDIT, &meta)) == 0) {
- if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
- NEXT_PGNO(meta) = pglist->pgno;
- LSN(meta) = argp->last_lsn;
- }
- if ((ret = __memp_fput(mpf, ip,
- meta, file_dbp->priority)) != 0)
- goto out;
- } else if (ret != DB_PAGE_NOTFOUND)
- goto out;
- meta = NULL;
- }
- if ((ret = __memp_fget(mpf, &argp->meta,
- ip, NULL, DB_MPOOL_EDIT, &meta)) != 0)
- goto out;
- if (LOG_COMPARE(&LSN(meta), lsnp) == 0) {
- REC_DIRTY(mpf, ip, dbc->priority, &meta);
- meta->last_pgno = argp->last_pgno;
- if (argp->last_free == PGNO_INVALID)
- meta->free = pglist->pgno;
- LSN(meta) = argp->meta_lsn;
- }
- }
- if (op == DB_TXN_ABORT) {
- if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0)
- goto out;
- if (list != NULL) {
- DB_ASSERT(env, felem == 0 ||
- argp->last_free == list[felem - 1]);
- if ((ret = __memp_extend_freelist(
- mpf, felem + nelem, &list)) != 0)
- goto out;
- for (lp = pglist; lp < &pglist[nelem]; lp++)
- list[felem++] = lp->pgno;
- }
- }
-
- if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
- goto out;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: REC_CLOSE;
-#else
- /*
- * If HAVE_FTRUNCATE is not defined, we'll never see pg_sort records
- * to recover.
- */
- COMPQUIET(env, NULL);
- COMPQUIET(dbtp, NULL);
- COMPQUIET(lsnp, NULL);
- COMPQUIET(op, DB_TXN_ABORT);
- COMPQUIET(info, NULL);
- return (EINVAL);
-#endif
-}
-
-/*
- * __db_pg_alloc_42_recover --
- * Recovery function for pg_alloc.
- *
- * PUBLIC: int __db_pg_alloc_42_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_pg_alloc_42_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_pg_alloc_42_args *argp;
- DB_THREAD_INFO *ip;
- DB *file_dbp;
- DBC *dbc;
- DBMETA *meta;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- db_pgno_t pgno;
- int cmp_n, cmp_p, created, level, ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- meta = NULL;
- pagep = NULL;
- created = 0;
- REC_PRINT(__db_pg_alloc_42_print);
- REC_INTRO(__db_pg_alloc_42_read, ip, 0);
-
- /*
- * Fix up the metadata page. If we're redoing the operation, we have
- * to get the metadata page and update its LSN and its free pointer.
- * If we're undoing the operation and the page was ever created, we put
- * it on the freelist.
- */
- pgno = PGNO_BASE_MD;
- if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) {
- /* The metadata page must always exist on redo. */
- if (DB_REDO(op)) {
- ret = __db_pgerr(file_dbp, pgno, ret);
- goto out;
- } else
- goto done;
- }
- cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
- cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
- CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
- if (cmp_p == 0 && DB_REDO(op)) {
- /* Need to redo update described. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
- LSN(meta) = *lsnp;
- meta->free = argp->next;
- if (argp->pgno > meta->last_pgno)
- meta->last_pgno = argp->pgno;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- goto no_rollback;
- }
-
- /*
- * Fix up the allocated page. If the page does not exist
- * and we can truncate it then don't create it.
- * Otherwise if we're redoing the operation, we have
- * to get the page (creating it if it doesn't exist), and update its
- * LSN. If we're undoing the operation, we have to reset the page's
- * LSN and put it on the free list, or truncate it.
- */
- if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
- /*
- * We have to be able to identify if a page was newly
- * created so we can recover it properly. We cannot simply
- * look for an empty header, because hash uses a pgin
- * function that will set the header. Instead, we explicitly
- * try for the page without CREATE and if that fails, then
- * create it.
- */
- if ((ret = __memp_fget(mpf, &argp->pgno,
- ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) {
- if (DB_UNDO(op) && ret == ENOSPC)
- goto do_truncate;
- ret = __db_pgerr(file_dbp, argp->pgno, ret);
- goto out;
- }
- created = 1;
- }
-
- /* Fix up the allocated page. */
- cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
- cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn);
-
- /*
- * If an initial allocation is aborted and then reallocated during
- * an archival restore the log record will have an LSN for the page
- * but the page will be empty.
- */
- if (IS_ZERO_LSN(LSN(pagep)) ||
- (IS_ZERO_LSN(argp->page_lsn) && IS_INIT_LSN(LSN(pagep))))
- cmp_p = 0;
-
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn);
- /*
- * Another special case we have to handle is if we ended up with a
- * page of all 0's which can happen if we abort between allocating a
- * page in mpool and initializing it. In that case, even if we're
- * undoing, we need to re-initialize the page.
- */
- if (DB_REDO(op) && cmp_p == 0) {
- /* Need to redo update described. */
- switch (argp->ptype) {
- case P_LBTREE:
- case P_LRECNO:
- case P_LDUP:
- level = LEAFLEVEL;
- break;
- default:
- level = 0;
- break;
- }
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype);
-
- pagep->lsn = *lsnp;
- } else if (DB_UNDO(op) && (cmp_n == 0 || created)) {
- /*
- * This is where we handle the case of a 0'd page (pagep->pgno
- * is equal to PGNO_INVALID).
- * Undo the allocation, reinitialize the page and
- * link its next pointer to the free list.
- */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
-
- pagep->lsn = argp->page_lsn;
- }
-
-do_truncate:
- /*
- * We cannot undo things from 4.2 land, because we nolonger
- * have limbo processing.
- */
- if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) &&
- IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) {
-no_rollback: __db_errx(env,
-"Cannot replicate prepared transactions from master running release 4.2 ");
- ret = __env_panic(env, EINVAL);
- }
-
- if (pagep != NULL &&
- (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
- goto out;
- pagep = NULL;
-
- if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
- goto out;
- meta = NULL;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: if (pagep != NULL)
- (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
- if (meta != NULL)
- (void)__memp_fput(mpf, ip, meta, file_dbp->priority);
- REC_CLOSE;
-}
-
-/*
- * __db_pg_free_recover_42_int --
- */
-static int
-__db_pg_free_recover_42_int(env, ip, argp, file_dbp, lsnp, mpf, op, data)
- ENV *env;
- DB_THREAD_INFO *ip;
- __db_pg_freedata_42_args *argp;
- DB *file_dbp;
- DB_LSN *lsnp;
- DB_MPOOLFILE *mpf;
- db_recops op;
- int data;
-{
- DBMETA *meta;
- DB_LSN copy_lsn;
- PAGE *pagep, *prevp;
- int cmp_n, cmp_p, is_meta, ret;
-
- meta = NULL;
- pagep = NULL;
- prevp = NULL;
-
- /*
- * Get the "metapage". This will either be the metapage
- * or the previous page in the free list if we are doing
- * sorted allocations. If its a previous page then
- * we will not be truncating.
- */
- is_meta = argp->meta_pgno == PGNO_BASE_MD;
-
- REC_FGET(mpf, ip, argp->meta_pgno, &meta, check_meta);
-
- if (argp->meta_pgno != PGNO_BASE_MD)
- prevp = (PAGE *)meta;
-
- cmp_n = LOG_COMPARE(lsnp, &LSN(meta));
- cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn);
- CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn);
-
- /*
- * Fix up the metadata page. If we're redoing or undoing the operation
- * we get the page and update its LSN, last and free pointer.
- */
- if (cmp_p == 0 && DB_REDO(op)) {
- /* Need to redo the deallocation. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
- if (prevp == NULL)
- meta->free = argp->pgno;
- else
- NEXT_PGNO(prevp) = argp->pgno;
- /*
- * If this was a compensating transaction and
- * we are a replica, then we never executed the
- * original allocation which incremented meta->free.
- */
- if (prevp == NULL && meta->last_pgno < meta->free)
- meta->last_pgno = meta->free;
- LSN(meta) = *lsnp;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /* Need to undo the deallocation. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &meta);
- if (prevp == NULL)
- meta->free = argp->next;
- else
- NEXT_PGNO(prevp) = argp->next;
- LSN(meta) = argp->meta_lsn;
- if (prevp == NULL && meta->last_pgno < argp->pgno)
- meta->last_pgno = argp->pgno;
- }
-
-check_meta:
- if (ret != 0 && is_meta) {
- /* The metadata page must always exist. */
- ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
- goto out;
- }
-
- /*
- * Get the freed page. If we support truncate then don't
- * create the page if we are going to free it. If we're
- * redoing the operation we get the page and explicitly discard
- * its contents, then update its LSN. If we're undoing the
- * operation, we get the page and restore its header.
- * If we don't support truncate, then we must create the page
- * and roll it back.
- */
- if ((ret = __memp_fget(mpf, &argp->pgno,
- ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0)
- goto out;
-
- (void)__ua_memcpy(&copy_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
- cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep));
- cmp_p = LOG_COMPARE(&LSN(pagep), &copy_lsn);
-
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &copy_lsn);
- if (DB_REDO(op) &&
- (cmp_p == 0 ||
- (IS_ZERO_LSN(copy_lsn) &&
- LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) {
- /* Need to redo the deallocation. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
- pagep->lsn = *lsnp;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /* Need to reallocate the page. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- memcpy(pagep, argp->header.data, argp->header.size);
- if (data)
- memcpy((u_int8_t*)pagep + HOFFSET(pagep),
- argp->data.data, argp->data.size);
- }
- if (pagep != NULL &&
- (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
- goto out;
-
- pagep = NULL;
- if (meta != NULL &&
- (ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0)
- goto out;
- meta = NULL;
-
- ret = 0;
-
-out: if (pagep != NULL)
- (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
- if (meta != NULL)
- (void)__memp_fput(mpf, ip, meta, file_dbp->priority);
-
- return (ret);
-}
-
-/*
- * __db_pg_free_42_recover --
- * Recovery function for pg_free.
- *
- * PUBLIC: int __db_pg_free_42_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_pg_free_42_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_pg_free_42_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- DB_THREAD_INFO *ip;
- int ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- REC_PRINT(__db_pg_free_42_print);
- REC_INTRO(__db_pg_free_42_read, ip, 0);
-
- ret = __db_pg_free_recover_42_int(env, ip,
- (__db_pg_freedata_42_args *)argp, file_dbp, lsnp, mpf, op, 0);
-
-done: *lsnp = argp->prev_lsn;
-out:
- REC_CLOSE;
-}
-
-/*
- * __db_pg_freedata_42_recover --
- * Recovery function for pg_freedata.
- *
- * PUBLIC: int __db_pg_freedata_42_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_pg_freedata_42_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_pg_freedata_42_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- DB_THREAD_INFO *ip;
- int ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- REC_PRINT(__db_pg_freedata_42_print);
- REC_INTRO(__db_pg_freedata_42_read, ip, 0);
-
- ret = __db_pg_free_recover_42_int(
- env, ip, argp, file_dbp, lsnp, mpf, op, 1);
-
-done: *lsnp = argp->prev_lsn;
-out:
- REC_CLOSE;
-}
-
-/*
- * __db_relink_42_recover --
- * Recovery function for relink.
- *
- * PUBLIC: int __db_relink_42_recover
- * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__db_relink_42_recover(env, dbtp, lsnp, op, info)
- ENV *env;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __db_relink_42_args *argp;
- DB_THREAD_INFO *ip;
- DB *file_dbp;
- DBC *dbc;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- int cmp_n, cmp_p, modified, ret;
-
- ip = ((DB_TXNHEAD *)info)->thread_info;
- pagep = NULL;
- REC_PRINT(__db_relink_42_print);
- REC_INTRO(__db_relink_42_read, ip, 0);
-
- /*
- * There are up to three pages we need to check -- the page, and the
- * previous and next pages, if they existed. For a page add operation,
- * the current page is the result of a split and is being recovered
- * elsewhere, so all we need do is recover the next page.
- */
- if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) {
- if (DB_REDO(op)) {
- ret = __db_pgerr(file_dbp, argp->pgno, ret);
- goto out;
- }
- goto next2;
- }
- if (argp->opcode == DB_ADD_PAGE_COMPAT)
- goto next1;
-
- cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn);
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn);
- if (cmp_p == 0 && DB_REDO(op)) {
- /* Redo the relink. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- pagep->lsn = *lsnp;
- } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
- /* Undo the relink. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- pagep->next_pgno = argp->next;
- pagep->prev_pgno = argp->prev;
- pagep->lsn = argp->lsn;
- }
-next1: if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
- goto out;
- pagep = NULL;
-
-next2: if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) {
- if (DB_REDO(op)) {
- ret = __db_pgerr(file_dbp, argp->next, ret);
- goto out;
- }
- goto prev;
- }
- modified = 0;
- cmp_n = LOG_COMPARE(lsnp, &LSN(pagep));
- cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next);
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next);
- if ((argp->opcode == DB_REM_PAGE_COMPAT && cmp_p == 0 && DB_REDO(op)) ||
- (argp->opcode == DB_ADD_PAGE_COMPAT && cmp_n == 0 && DB_UNDO(op))) {
- /* Redo the remove or undo the add. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- pagep->prev_pgno = argp->prev;
- modified = 1;
- } else if ((argp->opcode == DB_REM_PAGE_COMPAT &&
- cmp_n == 0 && DB_UNDO(op)) ||
- (argp->opcode == DB_ADD_PAGE_COMPAT && cmp_p == 0 && DB_REDO(op))) {
- /* Undo the remove or redo the add. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- pagep->prev_pgno = argp->pgno;
- modified = 1;
- }
- if (modified) {
- if (DB_UNDO(op))
- pagep->lsn = argp->lsn_next;
- else
- pagep->lsn = *lsnp;
- }
- if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
- goto out;
- pagep = NULL;
- if (argp->opcode == DB_ADD_PAGE_COMPAT)
- goto done;
-
-prev: if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) {
- if (DB_REDO(op)) {
- ret = __db_pgerr(file_dbp, argp->prev, ret);
- goto out;
- }
- goto done;
- }
- modified = 0;
- cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev);
- CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev);
- if (cmp_p == 0 && DB_REDO(op)) {
- /* Redo the relink. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- pagep->next_pgno = argp->next;
- modified = 1;
- } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) {
- /* Undo the relink. */
- REC_DIRTY(mpf, ip, file_dbp->priority, &pagep);
- pagep->next_pgno = argp->pgno;
- modified = 1;
- }
- if (modified) {
- if (DB_UNDO(op))
- pagep->lsn = argp->lsn_prev;
- else
- pagep->lsn = *lsnp;
- }
- if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0)
- goto out;
- pagep = NULL;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: if (pagep != NULL)
- (void)__memp_fput(mpf, ip, pagep, file_dbp->priority);
- REC_CLOSE;
-}
diff --git a/db/db_reclaim.c b/db/db_reclaim.c
deleted file mode 100644
index a44d054..0000000
--- a/db/db_reclaim.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/btree.h"
-#include "dbinc/mp.h"
-
-/*
- * __db_traverse_big
- * Traverse a chain of overflow pages and call the callback routine
- * on each one. The calling convention for the callback is:
- * callback(dbc, page, cookie, did_put),
- * where did_put is a return value indicating if the page in question has
- * already been returned to the mpool.
- *
- * PUBLIC: int __db_traverse_big __P((DBC *, db_pgno_t,
- * PUBLIC: int (*)(DBC *, PAGE *, void *, int *), void *));
- */
-int
-__db_traverse_big(dbc, pgno, callback, cookie)
- DBC *dbc;
- db_pgno_t pgno;
- int (*callback) __P((DBC *, PAGE *, void *, int *));
- void *cookie;
-{
- DB_MPOOLFILE *mpf;
- PAGE *p;
- int did_put, ret;
-
- mpf = dbc->dbp->mpf;
-
- do {
- did_put = 0;
- if ((ret = __memp_fget(mpf,
- &pgno, dbc->thread_info, dbc->txn, 0, &p)) != 0)
- return (ret);
- /*
- * If we are freeing pages only process the overflow
- * chain if the head of the chain has a refcount of 1.
- */
- pgno = NEXT_PGNO(p);
- if (callback == __db_truncate_callback && OV_REF(p) != 1)
- pgno = PGNO_INVALID;
- if ((ret = callback(dbc, p, cookie, &did_put)) == 0 &&
- !did_put)
- ret = __memp_fput(mpf,
- dbc->thread_info, p, dbc->priority);
- } while (ret == 0 && pgno != PGNO_INVALID);
-
- return (ret);
-}
-
-/*
- * __db_reclaim_callback
- * This is the callback routine used during a delete of a subdatabase.
- * we are traversing a btree or hash table and trying to free all the
- * pages. Since they share common code for duplicates and overflow
- * items, we traverse them identically and use this routine to do the
- * actual free. The reason that this is callback is because hash uses
- * the same traversal code for statistics gathering.
- *
- * PUBLIC: int __db_reclaim_callback __P((DBC *, PAGE *, void *, int *));
- */
-int
-__db_reclaim_callback(dbc, p, cookie, putp)
- DBC *dbc;
- PAGE *p;
- void *cookie;
- int *putp;
-{
- DB *dbp;
- int ret;
-
- COMPQUIET(cookie, NULL);
- dbp = dbc->dbp;
-
- /*
- * We don't want to log the free of the root with the subdb.
- * If we abort then the subdb may not be openable to undo
- * the free.
- */
- if ((dbp->type == DB_BTREE || dbp->type == DB_RECNO) &&
- PGNO(p) == ((BTREE *)dbp->bt_internal)->bt_root)
- return (0);
- if ((ret = __db_free(dbc, p)) != 0)
- return (ret);
- *putp = 1;
-
- return (0);
-}
-
-/*
- * __db_truncate_callback
- * This is the callback routine used during a truncate.
- * we are traversing a btree or hash table and trying to free all the
- * pages.
- *
- * PUBLIC: int __db_truncate_callback __P((DBC *, PAGE *, void *, int *));
- */
-int
-__db_truncate_callback(dbc, p, cookie, putp)
- DBC *dbc;
- PAGE *p;
- void *cookie;
- int *putp;
-{
- DB *dbp;
- DBT ddbt, ldbt;
- DB_MPOOLFILE *mpf;
- db_indx_t indx, len, off, tlen, top;
- u_int8_t *hk, type;
- u_int32_t *countp;
- int ret;
-
- top = NUM_ENT(p);
- dbp = dbc->dbp;
- mpf = dbp->mpf;
- countp = cookie;
- *putp = 1;
-
- switch (TYPE(p)) {
- case P_LBTREE:
- /* Skip for off-page duplicates and deleted items. */
- for (indx = 0; indx < top; indx += P_INDX) {
- type = GET_BKEYDATA(dbp, p, indx + O_INDX)->type;
- if (!B_DISSET(type) && B_TYPE(type) != B_DUPLICATE)
- ++*countp;
- }
- /* FALLTHROUGH */
- case P_IBTREE:
- case P_IRECNO:
- case P_INVALID:
- if (dbp->type != DB_HASH &&
- ((BTREE *)dbp->bt_internal)->bt_root == PGNO(p)) {
- type = dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE;
- goto reinit;
- }
- break;
- case P_OVERFLOW:
- if ((ret = __memp_dirty(mpf,
- &p, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0)
- return (ret);
- if (DBC_LOGGING(dbc)) {
- if ((ret = __db_ovref_log(dbp, dbc->txn,
- &LSN(p), 0, p->pgno, -1, &LSN(p))) != 0)
- return (ret);
- } else
- LSN_NOT_LOGGED(LSN(p));
- if (--OV_REF(p) != 0)
- *putp = 0;
- break;
- case P_LRECNO:
- for (indx = 0; indx < top; indx += O_INDX) {
- type = GET_BKEYDATA(dbp, p, indx)->type;
- if (!B_DISSET(type))
- ++*countp;
- }
-
- if (((BTREE *)dbp->bt_internal)->bt_root == PGNO(p)) {
- type = P_LRECNO;
- goto reinit;
- }
- break;
- case P_LDUP:
- /* Correct for deleted items. */
- for (indx = 0; indx < top; indx += O_INDX)
- if (!B_DISSET(GET_BKEYDATA(dbp, p, indx)->type))
- ++*countp;
-
- break;
- case P_HASH:
- /* Correct for on-page duplicates and deleted items. */
- for (indx = 0; indx < top; indx += P_INDX) {
- switch (*H_PAIRDATA(dbp, p, indx)) {
- case H_OFFDUP:
- break;
- case H_OFFPAGE:
- case H_KEYDATA:
- ++*countp;
- break;
- case H_DUPLICATE:
- tlen = LEN_HDATA(dbp, p, 0, indx);
- hk = H_PAIRDATA(dbp, p, indx);
- for (off = 0; off < tlen;
- off += len + 2 * sizeof(db_indx_t)) {
- ++*countp;
- memcpy(&len,
- HKEYDATA_DATA(hk)
- + off, sizeof(db_indx_t));
- }
- break;
- default:
- return (__db_pgfmt(dbp->env, p->pgno));
- }
- }
- /* Don't free the head of the bucket. */
- if (PREV_PGNO(p) == PGNO_INVALID) {
- type = P_HASH;
-
-reinit: if ((ret = __memp_dirty(mpf, &p,
- dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0)
- return (ret);
- *putp = 0;
- if (DBC_LOGGING(dbc)) {
- memset(&ldbt, 0, sizeof(ldbt));
- memset(&ddbt, 0, sizeof(ddbt));
- ldbt.data = p;
- ldbt.size = P_OVERHEAD(dbp);
- ldbt.size += p->entries * sizeof(db_indx_t);
- ddbt.data = (u_int8_t *)p + HOFFSET(p);
- ddbt.size = dbp->pgsize - HOFFSET(p);
- if ((ret = __db_pg_init_log(dbp,
- dbc->txn, &LSN(p), 0,
- p->pgno, &ldbt, &ddbt)) != 0)
- return (ret);
- } else
- LSN_NOT_LOGGED(LSN(p));
-
- P_INIT(p, dbp->pgsize, PGNO(p), PGNO_INVALID,
- PGNO_INVALID, type == P_HASH ? 0 : 1, type);
- }
- break;
- default:
- return (__db_pgfmt(dbp->env, p->pgno));
- }
-
- if (*putp == 1) {
- if ((ret = __db_free(dbc, p)) != 0)
- return (ret);
- } else {
- if ((ret = __memp_fput(mpf, dbc->thread_info, p,
- dbc->priority)) != 0)
- return (ret);
- *putp = 1;
- }
-
- return (0);
-}
diff --git a/db/db_remove.c b/db/db_remove.c
deleted file mode 100644
index 6b59ec3..0000000
--- a/db/db_remove.c
+++ /dev/null
@@ -1,492 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 2001, 2010 Oracle and/or its affiliates. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/fop.h"
-#include "dbinc/btree.h"
-#include "dbinc/hash.h"
-#include "dbinc/lock.h"
-#include "dbinc/mp.h"
-#include "dbinc/txn.h"
-
-static int __db_dbtxn_remove __P((DB *,
- DB_THREAD_INFO *, DB_TXN *, const char *, const char *));
-static int __db_subdb_remove __P((DB *,
- DB_THREAD_INFO *, DB_TXN *, const char *, const char *));
-
-/*
- * __env_dbremove_pp
- * ENV->dbremove pre/post processing.
- *
- * PUBLIC: int __env_dbremove_pp __P((DB_ENV *,
- * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t));
- */
-int
-__env_dbremove_pp(dbenv, txn, name, subdb, flags)
- DB_ENV *dbenv;
- DB_TXN *txn;
- const char *name, *subdb;
- u_int32_t flags;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret, txn_local;
-
- dbp = NULL;
- env = dbenv->env;
- txn_local = 0;
-
- ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->dbremove");
-
- /*
- * The actual argument checking is simple, do it inline, outside of
- * the replication block.
- */
- if ((ret = __db_fchk(env,
- "DB->remove", flags, DB_AUTO_COMMIT | DB_TXN_NOT_DURABLE)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (ret = __env_rep_enter(env, 1)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- /*
- * Create local transaction as necessary, check for consistent
- * transaction usage.
- */
- if (IS_ENV_AUTO_COMMIT(env, txn, flags)) {
- if ((ret = __db_txn_auto_init(env, ip, &txn)) != 0)
- goto err;
- txn_local = 1;
- } else
- if (txn != NULL && !TXN_ON(env) &&
- (!CDB_LOCKING(env) || !F_ISSET(txn, TXN_CDSGROUP))) {
- ret = __db_not_txn_env(env);
- goto err;
- }
- LF_CLR(DB_AUTO_COMMIT);
-
- if ((ret = __db_create_internal(&dbp, env, 0)) != 0)
- goto err;
- if (LF_ISSET(DB_TXN_NOT_DURABLE) &&
- (ret = __db_set_flags(dbp, DB_TXN_NOT_DURABLE)) != 0)
- goto err;
- LF_CLR(DB_TXN_NOT_DURABLE);
-
- ret = __db_remove_int(dbp, ip, txn, name, subdb, flags);
-
- if (txn_local) {
- /*
- * We created the DBP here and when we commit/abort, we'll
- * release all the transactional locks, including the handle
- * lock; mark the handle cleared explicitly.
- */
- LOCK_INIT(dbp->handle_lock);
- dbp->locker = NULL;
- } else if (txn != NULL) {
- /*
- * We created this handle locally so we need to close it
- * and clean it up. Unfortunately, it's holding transactional
- * locks that need to persist until the end of transaction.
- * If we invalidate the locker id (dbp->locker), then the close
- * won't free these locks prematurely.
- */
- dbp->locker = NULL;
- }
-
-err: if (txn_local && (t_ret =
- __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && ret == 0)
- ret = t_ret;
-
- /*
- * We never opened this dbp for real, so don't include a transaction
- * handle, and use NOSYNC to avoid calling into mpool.
- *
- * !!!
- * Note we're reversing the order of operations: we started the txn and
- * then opened the DB handle; we're resolving the txn and then closing
- * closing the DB handle -- a DB handle cannot be closed before
- * resolving the txn.
- */
- if (dbp != NULL &&
- (t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0)
- ret = t_ret;
-
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_remove_pp
- * DB->remove pre/post processing.
- *
- * PUBLIC: int __db_remove_pp
- * PUBLIC: __P((DB *, const char *, const char *, u_int32_t));
- */
-int
-__db_remove_pp(dbp, name, subdb, flags)
- DB *dbp;
- const char *name, *subdb;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = dbp->env;
-
- /*
- * Validate arguments, continuing to destroy the handle on failure.
- *
- * Cannot use DB_ILLEGAL_AFTER_OPEN directly because it returns.
- *
- * !!!
- * We have a serious problem if we're here with a handle used to open
- * a database -- we'll destroy the handle, and the application won't
- * ever be able to close the database.
- */
- if (F_ISSET(dbp, DB_AM_OPEN_CALLED))
- return (__db_mi_open(env, "DB->remove", 1));
-
- /* Validate arguments. */
- if ((ret = __db_fchk(env, "DB->remove", flags, 0)) != 0)
- return (ret);
-
- /* Check for consistent transaction usage. */
- if ((ret = __db_check_txn(dbp, NULL, DB_LOCK_INVALIDID, 0)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (ret = __db_rep_enter(dbp, 1, 1, 0)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- /* Remove the file. */
- ret = __db_remove(dbp, ip, NULL, name, subdb, flags);
-
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
-err: ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_remove
- * DB->remove method.
- *
- * PUBLIC: int __db_remove __P((DB *, DB_THREAD_INFO *,
- * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t));
- */
-int
-__db_remove(dbp, ip, txn, name, subdb, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *name, *subdb;
- u_int32_t flags;
-{
- int ret, t_ret;
-
- ret = __db_remove_int(dbp, ip, txn, name, subdb, flags);
-
- if ((t_ret = __db_close(dbp, txn, DB_NOSYNC)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __db_remove_int
- * Worker function for the DB->remove method.
- *
- * PUBLIC: int __db_remove_int __P((DB *, DB_THREAD_INFO *,
- * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t));
- */
-int
-__db_remove_int(dbp, ip, txn, name, subdb, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *name, *subdb;
- u_int32_t flags;
-{
- ENV *env;
- int ret;
- char *real_name, *tmpname;
-
- env = dbp->env;
- real_name = tmpname = NULL;
-
- if (name == NULL && subdb == NULL) {
- __db_errx(env, "Remove on temporary files invalid");
- ret = EINVAL;
- goto err;
- }
-
- if (name == NULL) {
- MAKE_INMEM(dbp);
- real_name = (char *)subdb;
- } else if (subdb != NULL) {
- ret = __db_subdb_remove(dbp, ip, txn, name, subdb);
- goto err;
- }
-
- /* Handle transactional file removes separately. */
- if (IS_REAL_TXN(txn)) {
- ret = __db_dbtxn_remove(dbp, ip, txn, name, subdb);
- goto err;
- }
-
- /*
- * The remaining case is a non-transactional file remove.
- *
- * Find the real name of the file.
- */
- if (!F_ISSET(dbp, DB_AM_INMEM) && (ret = __db_appname(env,
- DB_APP_DATA, name, &dbp->dirname, &real_name)) != 0)
- goto err;
-
- /*
- * If this is a file and force is set, remove the temporary file, which
- * may have been left around. Ignore errors because the temporary file
- * might not exist.
- */
- if (!F_ISSET(dbp, DB_AM_INMEM) && LF_ISSET(DB_FORCE) &&
- (ret = __db_backup_name(env, real_name, NULL, &tmpname)) == 0)
- (void)__os_unlink(env, tmpname, 0);
-
- if ((ret = __fop_remove_setup(dbp, NULL, real_name, 0)) != 0)
- goto err;
-
- if (dbp->db_am_remove != NULL &&
- (ret = dbp->db_am_remove(dbp, ip, NULL, name, subdb, flags)) != 0)
- goto err;
-
- ret = F_ISSET(dbp, DB_AM_INMEM) ?
- __db_inmem_remove(dbp, NULL, real_name) :
- __fop_remove(env,
- NULL, dbp->fileid, name, &dbp->dirname, DB_APP_DATA,
- F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0);
-
-err: if (!F_ISSET(dbp, DB_AM_INMEM) && real_name != NULL)
- __os_free(env, real_name);
- if (tmpname != NULL)
- __os_free(env, tmpname);
-
- return (ret);
-}
-
-/*
- * __db_inmem_remove --
- * Removal of a named in-memory database.
- *
- * PUBLIC: int __db_inmem_remove __P((DB *, DB_TXN *, const char *));
- */
-int
-__db_inmem_remove(dbp, txn, name)
- DB *dbp;
- DB_TXN *txn;
- const char *name;
-{
- DBT fid_dbt, name_dbt;
- DB_LOCKER *locker;
- DB_LSN lsn;
- ENV *env;
- int ret;
-
- env = dbp->env;
- locker = NULL;
-
- DB_ASSERT(env, name != NULL);
-
- /* This had better exist if we are trying to do a remove. */
- (void)__memp_set_flags(dbp->mpf, DB_MPOOL_NOFILE, 1);
- if ((ret = __memp_fopen(dbp->mpf, NULL,
- name, &dbp->dirname, 0, 0, 0)) != 0)
- return (ret);
- if ((ret = __memp_get_fileid(dbp->mpf, dbp->fileid)) != 0)
- return (ret);
- dbp->preserve_fid = 1;
-
- if (LOCKING_ON(env)) {
- if (dbp->locker == NULL &&
- (ret = __lock_id(env, NULL, &dbp->locker)) != 0)
- return (ret);
- locker = txn == NULL ? dbp->locker : txn->locker;
- }
-
- /*
- * In a transactional environment, we'll play the same game we play
- * for databases in the file system -- create a temporary database
- * and put it in with the current name and then rename this one to
- * another name. We'll then use a commit-time event to remove the
- * entry.
- */
- if ((ret =
- __fop_lock_handle(env, dbp, locker, DB_LOCK_WRITE, NULL, 0)) != 0)
- return (ret);
-
- if (!IS_REAL_TXN(txn))
- ret = __memp_nameop(env, dbp->fileid, NULL, name, NULL, 1);
- else if (LOGGING_ON(env)) {
- if (txn != NULL && (ret =
- __txn_remevent(env, txn, name, dbp->fileid, 1)) != 0)
- return (ret);
-
- DB_INIT_DBT(name_dbt, name, strlen(name) + 1);
- DB_INIT_DBT(fid_dbt, dbp->fileid, DB_FILE_ID_LEN);
- ret = __crdel_inmem_remove_log(
- env, txn, &lsn, 0, &name_dbt, &fid_dbt);
- }
-
- return (ret);
-}
-
-/*
- * __db_subdb_remove --
- * Remove a subdatabase.
- */
-static int
-__db_subdb_remove(dbp, ip, txn, name, subdb)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *name, *subdb;
-{
- DB *mdbp, *sdbp;
- int ret, t_ret;
-
- mdbp = sdbp = NULL;
-
- /* Open the subdatabase. */
- if ((ret = __db_create_internal(&sdbp, dbp->env, 0)) != 0)
- goto err;
- if (F_ISSET(dbp, DB_AM_NOT_DURABLE) &&
- (ret = __db_set_flags(sdbp, DB_TXN_NOT_DURABLE)) != 0)
- goto err;
- if ((ret = __db_open(sdbp, ip,
- txn, name, subdb, DB_UNKNOWN, DB_WRITEOPEN, 0, PGNO_BASE_MD)) != 0)
- goto err;
-
- DB_TEST_RECOVERY(sdbp, DB_TEST_PREDESTROY, ret, name);
-
- /* Free up the pages in the subdatabase. */
- switch (sdbp->type) {
- case DB_BTREE:
- case DB_RECNO:
- if ((ret = __bam_reclaim(sdbp, ip, txn)) != 0)
- goto err;
- break;
- case DB_HASH:
- if ((ret = __ham_reclaim(sdbp, ip, txn)) != 0)
- goto err;
- break;
- case DB_QUEUE:
- case DB_UNKNOWN:
- default:
- ret = __db_unknown_type(
- sdbp->env, "__db_subdb_remove", sdbp->type);
- goto err;
- }
-
- /*
- * Remove the entry from the main database and free the subdatabase
- * metadata page.
- */
- if ((ret = __db_master_open(sdbp, ip, txn, name, 0, 0, &mdbp)) != 0)
- goto err;
-
- if ((ret = __db_master_update(mdbp,
- sdbp, ip, txn, subdb, sdbp->type, MU_REMOVE, NULL, 0)) != 0)
- goto err;
-
- DB_TEST_RECOVERY(sdbp, DB_TEST_POSTDESTROY, ret, name);
-
-DB_TEST_RECOVERY_LABEL
-err:
- /* Close the main and subdatabases. */
- if ((t_ret = __db_close(sdbp, txn, DB_NOSYNC)) != 0 && ret == 0)
- ret = t_ret;
-
- if (mdbp != NULL &&
- (t_ret = __db_close(mdbp, txn, DB_NOSYNC)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-static int
-__db_dbtxn_remove(dbp, ip, txn, name, subdb)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *name, *subdb;
-{
- ENV *env;
- int ret;
- char *tmpname;
-
- env = dbp->env;
- tmpname = NULL;
-
- /*
- * This is a transactional remove, so we have to keep the name
- * of the file locked until the transaction commits. As a result,
- * we implement remove by renaming the file to some other name
- * (which creates a dummy named file as a placeholder for the
- * file being rename/dremoved) and then deleting that file as
- * a delayed remove at commit.
- */
- if ((ret = __db_backup_name(env,
- F_ISSET(dbp, DB_AM_INMEM) ? subdb : name, txn, &tmpname)) != 0)
- return (ret);
-
- DB_TEST_RECOVERY(dbp, DB_TEST_PREDESTROY, ret, name);
-
- if ((ret = __db_rename_int(dbp,
- txn->thread_info, txn, name, subdb, tmpname)) != 0)
- goto err;
-
- /*
- * The internal removes will also translate into delayed removes.
- */
- if (dbp->db_am_remove != NULL &&
- (ret = dbp->db_am_remove(dbp, ip, txn, tmpname, NULL, 0)) != 0)
- goto err;
-
- ret = F_ISSET(dbp, DB_AM_INMEM) ?
- __db_inmem_remove(dbp, txn, tmpname) :
- __fop_remove(env,
- txn, dbp->fileid, tmpname, &dbp->dirname, DB_APP_DATA,
- F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0);
-
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, name);
-
-err:
-DB_TEST_RECOVERY_LABEL
- if (tmpname != NULL)
- __os_free(env, tmpname);
-
- return (ret);
-}
diff --git a/db/db_rename.c b/db/db_rename.c
deleted file mode 100644
index 1fdf721..0000000
--- a/db/db_rename.c
+++ /dev/null
@@ -1,372 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 2001, 2010 Oracle and/or its affiliates. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_am.h"
-#include "dbinc/fop.h"
-#include "dbinc/lock.h"
-#include "dbinc/log.h"
-#include "dbinc/mp.h"
-#include "dbinc/txn.h"
-
-static int __db_rename __P((DB *, DB_THREAD_INFO *,
- DB_TXN *, const char *, const char *, const char *));
-static int __db_subdb_rename __P((DB *, DB_THREAD_INFO *,
- DB_TXN *, const char *, const char *, const char *));
-
-/*
- * __env_dbrename_pp
- * ENV->dbrename pre/post processing.
- *
- * PUBLIC: int __env_dbrename_pp __P((DB_ENV *, DB_TXN *,
- * PUBLIC: const char *, const char *, const char *, u_int32_t));
- */
-int
-__env_dbrename_pp(dbenv, txn, name, subdb, newname, flags)
- DB_ENV *dbenv;
- DB_TXN *txn;
- const char *name, *subdb, *newname;
- u_int32_t flags;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret, txn_local;
-
- env = dbenv->env;
- dbp = NULL;
- txn_local = 0;
-
- ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->dbrename");
-
- /*
- * The actual argument checking is simple, do it inline, outside of
- * the replication block.
- */
- if ((ret = __db_fchk(env, "DB->rename", flags, DB_AUTO_COMMIT)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (ret = __env_rep_enter(env, 1)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- /*
- * Create local transaction as necessary, check for consistent
- * transaction usage.
- */
- if (IS_ENV_AUTO_COMMIT(env, txn, flags)) {
- if ((ret = __db_txn_auto_init(env, ip, &txn)) != 0)
- goto err;
- txn_local = 1;
- } else
- if (txn != NULL && !TXN_ON(env) &&
- (!CDB_LOCKING(env) || !F_ISSET(txn, TXN_CDSGROUP))) {
- ret = __db_not_txn_env(env);
- goto err;
- }
-
- LF_CLR(DB_AUTO_COMMIT);
-
- if ((ret = __db_create_internal(&dbp, env, 0)) != 0)
- goto err;
-
- ret = __db_rename_int(dbp, ip, txn, name, subdb, newname);
-
- if (txn_local) {
- /*
- * We created the DBP here and when we commit/abort, we'll
- * release all the transactional locks, including the handle
- * lock; mark the handle cleared explicitly.
- */
- LOCK_INIT(dbp->handle_lock);
- dbp->locker = NULL;
- } else if (txn != NULL) {
- /*
- * We created this handle locally so we need to close it and
- * clean it up. Unfortunately, it's holding transactional
- * or CDS group locks that need to persist until the end of
- * transaction. If we invalidate the locker (dbp->locker),
- * then the close won't free these locks prematurely.
- */
- dbp->locker = NULL;
- }
-
-err: if (txn_local && (t_ret =
- __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && ret == 0)
- ret = t_ret;
-
- /*
- * We never opened this dbp for real, so don't include a transaction
- * handle, and use NOSYNC to avoid calling into mpool.
- *
- * !!!
- * Note we're reversing the order of operations: we started the txn and
- * then opened the DB handle; we're resolving the txn and then closing
- * closing the DB handle -- it's safer.
- */
- if (dbp != NULL &&
- (t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0)
- ret = t_ret;
-
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_rename_pp
- * DB->rename pre/post processing.
- *
- * PUBLIC: int __db_rename_pp __P((DB *,
- * PUBLIC: const char *, const char *, const char *, u_int32_t));
- */
-int
-__db_rename_pp(dbp, name, subdb, newname, flags)
- DB *dbp;
- const char *name, *subdb, *newname;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = dbp->env;
- handle_check = 0;
-
- /*
- * Validate arguments, continuing to destroy the handle on failure.
- *
- * Cannot use DB_ILLEGAL_AFTER_OPEN directly because it returns.
- *
- * !!!
- * We have a serious problem if we're here with a handle used to open
- * a database -- we'll destroy the handle, and the application won't
- * ever be able to close the database.
- */
- if (F_ISSET(dbp, DB_AM_OPEN_CALLED))
- return (__db_mi_open(env, "DB->rename", 1));
-
- /* Validate arguments. */
- if ((ret = __db_fchk(env, "DB->rename", flags, 0)) != 0)
- return (ret);
-
- /* Check for consistent transaction usage. */
- if ((ret = __db_check_txn(dbp, NULL, DB_LOCK_INVALIDID, 0)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (ret = __db_rep_enter(dbp, 1, 1, 0)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- /* Rename the file. */
- ret = __db_rename(dbp, ip, NULL, name, subdb, newname);
-
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-err: ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_rename
- * DB->rename method.
- *
- */
-static int
-__db_rename(dbp, ip, txn, name, subdb, newname)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *name, *subdb, *newname;
-{
- int ret, t_ret;
-
- ret = __db_rename_int(dbp, ip, txn, name, subdb, newname);
-
- if ((t_ret = __db_close(dbp, txn, DB_NOSYNC)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __db_rename_int
- * Worker function for DB->rename method; the close of the dbp is
- * left in the wrapper routine.
- *
- * PUBLIC: int __db_rename_int __P((DB *, DB_THREAD_INFO *,
- * PUBLIC: DB_TXN *, const char *, const char *, const char *));
- */
-int
-__db_rename_int(dbp, ip, txn, name, subdb, newname)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *name, *subdb, *newname;
-{
- ENV *env;
- int ret;
- char *old, *real_name;
-
- env = dbp->env;
- real_name = NULL;
-
- DB_TEST_RECOVERY(dbp, DB_TEST_PREDESTROY, ret, name);
-
- if (name == NULL && subdb == NULL) {
- __db_errx(env, "Rename on temporary files invalid");
- ret = EINVAL;
- goto err;
- }
-
- if (name == NULL)
- MAKE_INMEM(dbp);
- else if (subdb != NULL) {
- ret = __db_subdb_rename(dbp, ip, txn, name, subdb, newname);
- goto err;
- }
-
- /*
- * From here on down, this pertains to files or in-memory databases.
- *
- * Find the real name of the file.
- */
- if (F_ISSET(dbp, DB_AM_INMEM)) {
- old = (char *)subdb;
- real_name = (char *)subdb;
- } else {
- if ((ret = __db_appname(env, DB_APP_DATA,
- name, &dbp->dirname, &real_name)) != 0)
- goto err;
- old = (char *)name;
- }
- DB_ASSERT(env, old != NULL);
-
- if ((ret = __fop_remove_setup(dbp, txn, real_name, 0)) != 0)
- goto err;
-
- if (dbp->db_am_rename != NULL &&
- (ret = dbp->db_am_rename(dbp, ip, txn, name, subdb, newname)) != 0)
- goto err;
-
- /*
- * The transactional case and non-transactional case are
- * quite different. In the non-transactional case, we simply
- * do the rename. In the transactional case, since we need
- * the ability to back out and maintain locking, we have to
- * create a temporary object as a placeholder. This is all
- * taken care of in the fop layer.
- */
- if (IS_REAL_TXN(txn)) {
- if ((ret = __fop_dummy(dbp, txn, old, newname)) != 0)
- goto err;
- } else {
- if ((ret = __fop_dbrename(dbp, old, newname)) != 0)
- goto err;
- }
-
- /*
- * I am pretty sure that we haven't gotten a dbreg id, so calling
- * dbreg_filelist_update is not necessary.
- */
- DB_ASSERT(env, dbp->log_filename == NULL ||
- dbp->log_filename->id == DB_LOGFILEID_INVALID);
-
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, newname);
-
-DB_TEST_RECOVERY_LABEL
-err: if (!F_ISSET(dbp, DB_AM_INMEM) && real_name != NULL)
- __os_free(env, real_name);
-
- return (ret);
-}
-
-/*
- * __db_subdb_rename --
- * Rename a subdatabase.
- */
-static int
-__db_subdb_rename(dbp, ip, txn, name, subdb, newname)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *name, *subdb, *newname;
-{
- DB *mdbp;
- ENV *env;
- PAGE *meta;
- int ret, t_ret;
-
- mdbp = NULL;
- meta = NULL;
- env = dbp->env;
-
- /*
- * We have not opened this dbp so it isn't marked as a subdb,
- * but it ought to be.
- */
- F_SET(dbp, DB_AM_SUBDB);
-
- /*
- * Rename the entry in the main database. We need to first
- * get the meta-data page number (via MU_OPEN) so that we can
- * read the meta-data page and obtain a handle lock. Once we've
- * done that, we can proceed to do the rename in the master.
- */
- if ((ret = __db_master_open(dbp, ip, txn, name, 0, 0, &mdbp)) != 0)
- goto err;
-
- if ((ret = __db_master_update(mdbp, dbp, ip, txn, subdb, dbp->type,
- MU_OPEN, NULL, 0)) != 0)
- goto err;
-
- if ((ret = __memp_fget(mdbp->mpf, &dbp->meta_pgno,
- ip, txn, 0, &meta)) != 0)
- goto err;
- memcpy(dbp->fileid, ((DBMETA *)meta)->uid, DB_FILE_ID_LEN);
- if ((ret = __fop_lock_handle(env,
- dbp, mdbp->locker, DB_LOCK_WRITE, NULL, NOWAIT_FLAG(txn))) != 0)
- goto err;
-
- ret = __memp_fput(mdbp->mpf, ip, meta, dbp->priority);
- meta = NULL;
- if (ret != 0)
- goto err;
-
- if ((ret = __db_master_update(mdbp, dbp, ip, txn,
- subdb, dbp->type, MU_RENAME, newname, 0)) != 0)
- goto err;
-
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, name);
-
-DB_TEST_RECOVERY_LABEL
-err:
- if (meta != NULL && (t_ret =
- __memp_fput(mdbp->mpf, ip, meta, dbp->priority)) != 0 && ret == 0)
- ret = t_ret;
-
- if (mdbp != NULL &&
- (t_ret = __db_close(mdbp, txn, DB_NOSYNC)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
diff --git a/db/db_ret.c b/db/db_ret.c
deleted file mode 100644
index 5ff60d1..0000000
--- a/db/db_ret.c
+++ /dev/null
@@ -1,156 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_am.h"
-
-/*
- * __db_ret --
- * Build return DBT.
- *
- * PUBLIC: int __db_ret __P((DBC *,
- * PUBLIC: PAGE *, u_int32_t, DBT *, void **, u_int32_t *));
- */
-int
-__db_ret(dbc, h, indx, dbt, memp, memsize)
- DBC *dbc;
- PAGE *h;
- u_int32_t indx;
- DBT *dbt;
- void **memp;
- u_int32_t *memsize;
-{
- BKEYDATA *bk;
- BOVERFLOW *bo;
- DB *dbp;
- HOFFPAGE ho;
- u_int32_t len;
- u_int8_t *hk;
- void *data;
-
- dbp = dbc->dbp;
-
- switch (TYPE(h)) {
- case P_HASH_UNSORTED:
- case P_HASH:
- hk = P_ENTRY(dbp, h, indx);
- if (HPAGE_PTYPE(hk) == H_OFFPAGE) {
- memcpy(&ho, hk, sizeof(HOFFPAGE));
- return (__db_goff(dbc, dbt,
- ho.tlen, ho.pgno, memp, memsize));
- }
- len = LEN_HKEYDATA(dbp, h, dbp->pgsize, indx);
- data = HKEYDATA_DATA(hk);
- break;
- case P_LBTREE:
- case P_LDUP:
- case P_LRECNO:
- bk = GET_BKEYDATA(dbp, h, indx);
- if (B_TYPE(bk->type) == B_OVERFLOW) {
- bo = (BOVERFLOW *)bk;
- return (__db_goff(dbc, dbt,
- bo->tlen, bo->pgno, memp, memsize));
- }
- len = bk->len;
- data = bk->data;
- break;
- default:
- return (__db_pgfmt(dbp->env, h->pgno));
- }
-
- return (__db_retcopy(dbp->env, dbt, data, len, memp, memsize));
-}
-
-/*
- * __db_retcopy --
- * Copy the returned data into the user's DBT, handling special flags.
- *
- * PUBLIC: int __db_retcopy __P((ENV *, DBT *,
- * PUBLIC: void *, u_int32_t, void **, u_int32_t *));
- */
-int
-__db_retcopy(env, dbt, data, len, memp, memsize)
- ENV *env;
- DBT *dbt;
- void *data;
- u_int32_t len;
- void **memp;
- u_int32_t *memsize;
-{
- int ret;
-
- ret = 0;
-
- /* If returning a partial record, reset the length. */
- if (F_ISSET(dbt, DB_DBT_PARTIAL)) {
- data = (u_int8_t *)data + dbt->doff;
- if (len > dbt->doff) {
- len -= dbt->doff;
- if (len > dbt->dlen)
- len = dbt->dlen;
- } else
- len = 0;
- }
-
- /*
- * Allocate memory to be owned by the application: DB_DBT_MALLOC,
- * DB_DBT_REALLOC.
- *
- * !!!
- * We always allocate memory, even if we're copying out 0 bytes. This
- * guarantees consistency, i.e., the application can always free memory
- * without concern as to how many bytes of the record were requested.
- *
- * Use the memory specified by the application: DB_DBT_USERMEM.
- *
- * !!!
- * If the length we're going to copy is 0, the application-supplied
- * memory pointer is allowed to be NULL.
- */
- if (F_ISSET(dbt, DB_DBT_USERCOPY)) {
- dbt->size = len;
- return (len == 0 ? 0 : env->dbt_usercopy(dbt, 0, data,
- len, DB_USERCOPY_SETDATA));
-
- } else if (F_ISSET(dbt, DB_DBT_MALLOC))
- ret = __os_umalloc(env, len, &dbt->data);
- else if (F_ISSET(dbt, DB_DBT_REALLOC)) {
- if (dbt->data == NULL || dbt->size == 0 || dbt->size < len)
- ret = __os_urealloc(env, len, &dbt->data);
- } else if (F_ISSET(dbt, DB_DBT_USERMEM)) {
- if (len != 0 && (dbt->data == NULL || dbt->ulen < len))
- ret = DB_BUFFER_SMALL;
- } else if (memp == NULL || memsize == NULL)
- ret = EINVAL;
- else {
- if (len != 0 && (*memsize == 0 || *memsize < len)) {
- if ((ret = __os_realloc(env, len, memp)) == 0)
- *memsize = len;
- else
- *memsize = 0;
- }
- if (ret == 0)
- dbt->data = *memp;
- }
-
- if (ret == 0 && len != 0)
- memcpy(dbt->data, data, len);
-
- /*
- * Return the length of the returned record in the DBT size field.
- * This satisfies the requirement that if we're using user memory
- * and insufficient memory was provided, return the amount necessary
- * in the size field.
- */
- dbt->size = len;
-
- return (ret);
-}
diff --git a/db/db_setid.c b/db/db_setid.c
deleted file mode 100644
index a78977e..0000000
--- a/db/db_setid.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 2000-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_swap.h"
-#include "dbinc/db_am.h"
-#include "dbinc/mp.h"
-
-/*
- * __env_fileid_reset_pp --
- * ENV->fileid_reset pre/post processing.
- *
- * PUBLIC: int __env_fileid_reset_pp __P((DB_ENV *, const char *, u_int32_t));
- */
-int
-__env_fileid_reset_pp(dbenv, name, flags)
- DB_ENV *dbenv;
- const char *name;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int ret;
-
- env = dbenv->env;
-
- ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->fileid_reset");
-
- /*
- * !!!
- * The actual argument checking is simple, do it inline, outside of
- * the replication block.
- */
- if (flags != 0 && flags != DB_ENCRYPT)
- return (__db_ferr(env, "DB_ENV->fileid_reset", 0));
-
- ENV_ENTER(env, ip);
- REPLICATION_WRAP(env,
- (__env_fileid_reset(env, ip, name, LF_ISSET(DB_ENCRYPT) ? 1 : 0)),
- 1, ret);
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __env_fileid_reset --
- * Reset the file IDs for every database in the file.
- * PUBLIC: int __env_fileid_reset
- * PUBLIC: __P((ENV *, DB_THREAD_INFO *, const char *, int));
- */
-int
-__env_fileid_reset(env, ip, name, encrypted)
- ENV *env;
- DB_THREAD_INFO *ip;
- const char *name;
- int encrypted;
-{
- DB *dbp;
- DBC *dbcp;
- DBMETA *meta;
- DBT key, data;
- DB_FH *fhp;
- DB_MPOOLFILE *mpf;
- DB_PGINFO cookie;
- db_pgno_t pgno;
- int t_ret, ret;
- size_t n;
- char *real_name;
- u_int8_t fileid[DB_FILE_ID_LEN], mbuf[DBMETASIZE];
- void *pagep;
-
- dbp = NULL;
- dbcp = NULL;
- fhp = NULL;
- real_name = NULL;
-
- /* Get the real backing file name. */
- if ((ret = __db_appname(env,
- DB_APP_DATA, name, NULL, &real_name)) != 0)
- return (ret);
-
- /* Get a new file ID. */
- if ((ret = __os_fileid(env, real_name, 1, fileid)) != 0)
- goto err;
-
- /*
- * The user may have physically copied a file currently open in the
- * cache, which means if we open this file through the cache before
- * updating the file ID on page 0, we might connect to the file from
- * which the copy was made.
- */
- if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0) {
- __db_err(env, ret, "%s", real_name);
- goto err;
- }
- if ((ret = __os_read(env, fhp, mbuf, sizeof(mbuf), &n)) != 0)
- goto err;
-
- if (n != sizeof(mbuf)) {
- ret = EINVAL;
- __db_errx(env,
- "__env_fileid_reset: %s: unexpected file type or format",
- real_name);
- goto err;
- }
-
- /*
- * Create the DB object.
- */
- if ((ret = __db_create_internal(&dbp, env, 0)) != 0)
- goto err;
-
- /* If configured with a password, the databases are encrypted. */
- if (encrypted && (ret = __db_set_flags(dbp, DB_ENCRYPT)) != 0)
- goto err;
-
- if ((ret = __db_meta_setup(env,
- dbp, real_name, (DBMETA *)mbuf, 0, DB_CHK_META)) != 0)
- goto err;
-
- meta = (DBMETA *)mbuf;
- if (FLD_ISSET(meta->metaflags,
- DBMETA_PART_RANGE | DBMETA_PART_CALLBACK) && (ret =
- __part_fileid_reset(env, ip, name, meta->nparts, encrypted)) != 0)
- goto err;
-
- memcpy(meta->uid, fileid, DB_FILE_ID_LEN);
- cookie.db_pagesize = sizeof(mbuf);
- cookie.flags = dbp->flags;
- cookie.type = dbp->type;
- key.data = &cookie;
-
- if ((ret = __db_pgout(env->dbenv, 0, mbuf, &key)) != 0)
- goto err;
- if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
- goto err;
- if ((ret = __os_write(env, fhp, mbuf, sizeof(mbuf), &n)) != 0)
- goto err;
- if ((ret = __os_fsync(env, fhp)) != 0)
- goto err;
-
- /*
- * Page 0 of the file has an updated file ID, and we can open it in
- * the cache without connecting to a different, existing file. Open
- * the file in the cache, and update the file IDs for subdatabases.
- * (No existing code, as far as I know, actually uses the file ID of
- * a subdatabase, but it's cleaner to get them all.)
- */
-
- /*
- * If the database file doesn't support subdatabases, we only have
- * to update a single metadata page. Otherwise, we have to open a
- * cursor and step through the master database, and update all of
- * the subdatabases' metadata pages.
- */
- if (meta->type != P_BTREEMETA || !F_ISSET(meta, BTM_SUBDB))
- goto err;
-
- /*
- * Open the DB file.
- *
- * !!!
- * Note DB_RDWRMASTER flag, we need to open the master database file
- * for writing in this case.
- */
- if ((ret = __db_open(dbp, ip, NULL,
- name, NULL, DB_UNKNOWN, DB_RDWRMASTER, 0, PGNO_BASE_MD)) != 0)
- goto err;
-
- mpf = dbp->mpf;
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
- if ((ret = __db_cursor(dbp, ip, NULL, &dbcp, 0)) != 0)
- goto err;
- while ((ret = __dbc_get(dbcp, &key, &data, DB_NEXT)) == 0) {
- /*
- * XXX
- * We're handling actual data, not on-page meta-data, so it
- * hasn't been converted to/from opposite endian architectures.
- * Do it explicitly, now.
- */
- memcpy(&pgno, data.data, sizeof(db_pgno_t));
- DB_NTOHL_SWAP(env, &pgno);
- if ((ret = __memp_fget(mpf, &pgno, ip, NULL,
- DB_MPOOL_DIRTY, &pagep)) != 0)
- goto err;
- memcpy(((DBMETA *)pagep)->uid, fileid, DB_FILE_ID_LEN);
- if ((ret = __memp_fput(mpf, ip, pagep, dbcp->priority)) != 0)
- goto err;
- }
- if (ret == DB_NOTFOUND)
- ret = 0;
-
-err: if (dbcp != NULL && (t_ret = __dbc_close(dbcp)) != 0 && ret == 0)
- ret = t_ret;
- if (dbp != NULL && (t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0)
- ret = t_ret;
- if (fhp != NULL &&
- (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
- ret = t_ret;
- if (real_name != NULL)
- __os_free(env, real_name);
-
- return (ret);
-}
diff --git a/db/db_setlsn.c b/db/db_setlsn.c
deleted file mode 100644
index 51ee7d3..0000000
--- a/db/db_setlsn.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 2000-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_am.h"
-#include "dbinc/mp.h"
-#include "dbinc/partition.h"
-#include "dbinc/qam.h"
-
-static int __env_lsn_reset __P((ENV *, DB_THREAD_INFO *, const char *, int));
-
-/*
- * __env_lsn_reset_pp --
- * ENV->lsn_reset pre/post processing.
- *
- * PUBLIC: int __env_lsn_reset_pp __P((DB_ENV *, const char *, u_int32_t));
- */
-int
-__env_lsn_reset_pp(dbenv, name, flags)
- DB_ENV *dbenv;
- const char *name;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int ret;
-
- env = dbenv->env;
-
- ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->lsn_reset");
-
- /*
- * !!!
- * The actual argument checking is simple, do it inline, outside of
- * the replication block.
- */
- if (flags != 0 && flags != DB_ENCRYPT)
- return (__db_ferr(env, "DB_ENV->lsn_reset", 0));
-
- ENV_ENTER(env, ip);
- REPLICATION_WRAP(env,
- (__env_lsn_reset(env, ip, name, LF_ISSET(DB_ENCRYPT) ? 1 : 0)),
- 1, ret);
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __env_lsn_reset --
- * Reset the LSNs for every page in the file.
- */
-static int
-__env_lsn_reset(env, ip, name, encrypted)
- ENV *env;
- DB_THREAD_INFO *ip;
- const char *name;
- int encrypted;
-{
- DB *dbp;
- int t_ret, ret;
-
- /* Create the DB object. */
- if ((ret = __db_create_internal(&dbp, env, 0)) != 0)
- return (ret);
-
- /* If configured with a password, the databases are encrypted. */
- if (encrypted && (ret = __db_set_flags(dbp, DB_ENCRYPT)) != 0)
- goto err;
-
- /*
- * Open the DB file.
- *
- * !!!
- * Note DB_RDWRMASTER flag, we need to open the master database file
- * for writing in this case.
- */
- if ((ret = __db_open(dbp, ip, NULL,
- name, NULL, DB_UNKNOWN, DB_RDWRMASTER, 0, PGNO_BASE_MD)) != 0) {
- __db_err(env, ret, "%s", name);
- goto err;
- }
-
- ret = __db_lsn_reset(dbp->mpf, ip);
-#ifdef HAVE_PARTITION
- if (ret == 0 && DB_IS_PARTITIONED(dbp))
- ret = __part_lsn_reset(dbp, ip);
- else
-#endif
- if (ret == 0 && dbp->type == DB_QUEUE)
-#ifdef HAVE_QUEUE
- ret = __qam_lsn_reset(dbp, ip);
-#else
- ret = __db_no_queue_am(env);
-#endif
-
-err: if ((t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
-}
-
-/*
- * __db_lsn_reset -- reset the lsn for a db mpool handle.
- * PUBLIC: int __db_lsn_reset __P((DB_MPOOLFILE *, DB_THREAD_INFO *));
- */
-int
-__db_lsn_reset(mpf, ip)
- DB_MPOOLFILE *mpf;
- DB_THREAD_INFO *ip;
-{
- PAGE *pagep;
- db_pgno_t pgno;
- int ret;
-
- /* Reset the LSN on every page of the database file. */
- for (pgno = 0;
- (ret = __memp_fget(mpf,
- &pgno, ip, NULL, DB_MPOOL_DIRTY, &pagep)) == 0;
- ++pgno) {
- LSN_NOT_LOGGED(pagep->lsn);
- if ((ret = __memp_fput(mpf,
- ip, pagep, DB_PRIORITY_UNCHANGED)) != 0)
- break;
- }
-
- if (ret == DB_PAGE_NOTFOUND)
- ret = 0;
-
- return (ret);
-}
diff --git a/db/db_sort_multiple.c b/db/db_sort_multiple.c
deleted file mode 100644
index 32ae2df..0000000
--- a/db/db_sort_multiple.c
+++ /dev/null
@@ -1,287 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/btree.h"
-
-static int __db_quicksort __P((DB *, DBT *, DBT *, u_int32_t *, u_int32_t *,
- u_int32_t *, u_int32_t *, u_int32_t));
-
-/*
- * __db_compare_both --
- * Use the comparison functions from db to compare akey and bkey, and if
- * DB_DUPSORT adata and bdata.
- *
- * PUBLIC: int __db_compare_both __P((DB *, const DBT *, const DBT *,
- * PUBLIC: const DBT *, const DBT *));
- */
-int
-__db_compare_both(db, akey, adata, bkey, bdata)
- DB *db;
- const DBT *akey;
- const DBT *adata;
- const DBT *bkey;
- const DBT *bdata;
-{
- BTREE *t;
- int cmp;
-
- t = (BTREE *)db->bt_internal;
-
- cmp = t->bt_compare(db, akey, bkey);
- if (cmp != 0) return cmp;
- if (!F_ISSET(db, DB_AM_DUPSORT)) return 0;
-
- if (adata == 0) return bdata == 0 ? 0 : -1;
- if (bdata == 0) return 1;
-
-#ifdef HAVE_COMPRESSION
- if (DB_IS_COMPRESSED(db))
- return t->compress_dup_compare(db, adata, bdata);
-#endif
- return db->dup_compare(db, adata, bdata);
-}
-
-#define DB_SORT_SWAP(a, ad, b, bd) \
-do { \
- tmp = (a)[0]; (a)[0] = (b)[0]; (b)[0] = tmp; \
- tmp = (a)[-1]; (a)[-1] = (b)[-1]; (b)[-1] = tmp; \
- if (data != NULL) { \
- tmp = (ad)[0]; (ad)[0] = (bd)[0]; (bd)[0] = tmp; \
- tmp = (ad)[-1]; (ad)[-1] = (bd)[-1]; (bd)[-1] = tmp; \
- } \
-} while (0)
-
-#define DB_SORT_LOAD_DBT(a, ad, aptr, adptr) \
-do { \
- (a).data = (u_int8_t*)key->data + (aptr)[0]; \
- (a).size = (aptr)[-1]; \
- if (data != NULL) { \
- (ad).data = (u_int8_t*)data->data + (adptr)[0]; \
- (ad).size = (adptr)[-1]; \
- } \
-} while (0)
-
-#define DB_SORT_COMPARE(a, ad, b, bd) (data != NULL ? \
- __db_compare_both(db, &(a), &(ad), &(b), &(bd)) : \
- __db_compare_both(db, &(a), 0, &(b), 0))
-
-#define DB_SORT_STACKSIZE 32
-
-/*
- * __db_quicksort --
- * The quicksort implementation for __db_sort_multiple() and
- * __db_sort_multiple_key().
- */
-static int
-__db_quicksort(db, key, data, kstart, kend, dstart, dend, size)
- DB *db;
- DBT *key, *data;
- u_int32_t *kstart, *kend, *dstart, *dend;
- u_int32_t size;
-{
- int ret;
- u_int32_t tmp;
- u_int32_t *kmiddle, *dmiddle, *kptr, *dptr;
- DBT a, ad, b, bd, m, md;
- ENV *env;
-
- struct DB_SORT_quicksort_stack {
- u_int32_t *kstart;
- u_int32_t *kend;
- u_int32_t *dstart;
- u_int32_t *dend;
- } stackbuf[DB_SORT_STACKSIZE], *stack;
- u_int32_t soff, slen;
-
- ret = 0;
- env = db->env;
-
- memset(&a, 0, sizeof(DBT));
- memset(&ad, 0, sizeof(DBT));
- memset(&b, 0, sizeof(DBT));
- memset(&bd, 0, sizeof(DBT));
- memset(&m, 0, sizeof(DBT));
- memset(&md, 0, sizeof(DBT));
-
- /* NB end is smaller than start */
-
- stack = stackbuf;
- soff = 0;
- slen = DB_SORT_STACKSIZE;
-
- start:
- if (kend >= kstart) goto pop;
-
- /* If there's only one value, it's already sorted */
- tmp = (u_int32_t)(kstart - kend) / size;
- if (tmp == 1) goto pop;
-
- DB_SORT_LOAD_DBT(a, ad, kstart, dstart);
- DB_SORT_LOAD_DBT(b, bd, kend + size, dend + size);
-
- if (tmp == 2) {
- /* Special case the sorting of two value sequences */
- if (DB_SORT_COMPARE(a, ad, b, bd) > 0) {
- DB_SORT_SWAP(kstart, dstart, kend + size, dend + size);
- }
- goto pop;
- }
-
- kmiddle = kstart - (tmp / 2) * size;
- dmiddle = dstart - (tmp / 2) * size;
- DB_SORT_LOAD_DBT(m, md, kmiddle, dmiddle);
-
- /* Find the median of three */
- if (DB_SORT_COMPARE(a, ad, b, bd) < 0) {
- if (DB_SORT_COMPARE(m, md, a, ad) < 0) {
- /* m < a < b */
- DB_SORT_SWAP(kstart, dstart, kend + size, dend + size);
- } else if (DB_SORT_COMPARE(m, md, b, bd) < 0) {
- /* a < m < b */
- DB_SORT_SWAP(kmiddle,
- dmiddle, kend + size, dend + size);
- } else {
- /* a < b < m */
- /* Do nothing */
- }
- } else {
- if (DB_SORT_COMPARE(a, ad, m, md) < 0) {
- /* b < a < m */
- DB_SORT_SWAP(kstart, dstart, kend + size, dend + size);
- } else if (DB_SORT_COMPARE(b, bd, m, md) < 0) {
- /* b < m < a */
- DB_SORT_SWAP(kmiddle,
- dmiddle, kend + size, dend + size);
- } else {
- /* m < b < a */
- /* Do nothing */
- }
- }
-
- /* partition */
- DB_SORT_LOAD_DBT(b, bd, kend + size, dend + size);
- kmiddle = kstart;
- dmiddle = dstart;
- for (kptr = kstart, dptr = dstart; kptr > kend;
- kptr -= size, dptr -= size) {
- DB_SORT_LOAD_DBT(a, ad, kptr, dptr);
- if (DB_SORT_COMPARE(a, ad, b, bd) < 0) {
- DB_SORT_SWAP(kmiddle, dmiddle, kptr, dptr);
- kmiddle -= size;
- dmiddle -= size;
- }
- }
-
- DB_SORT_SWAP(kmiddle, dmiddle, kend + size, dend + size);
-
- if (soff == slen) {
- /* Grow the stack */
- slen = slen * 2;
- if (stack == stackbuf) {
- ret = __os_malloc(env, slen *
- sizeof(struct DB_SORT_quicksort_stack), &stack);
- if (ret != 0) goto error;
- memcpy(stack, stackbuf, soff *
- sizeof(struct DB_SORT_quicksort_stack));
- } else {
- ret = __os_realloc(env, slen *
- sizeof(struct DB_SORT_quicksort_stack), &stack);
- if (ret != 0) goto error;
- }
- }
-
- /* divide and conquer */
- stack[soff].kstart = kmiddle - size;
- stack[soff].kend = kend;
- stack[soff].dstart = dmiddle - size;
- stack[soff].dend = dend;
- ++soff;
-
- kend = kmiddle;
- dend = dmiddle;
-
- goto start;
-
- pop:
- if (soff != 0) {
- --soff;
- kstart = stack[soff].kstart;
- kend = stack[soff].kend;
- dstart = stack[soff].dstart;
- dend = stack[soff].dend;
- goto start;
- }
-
- error:
- if (stack != stackbuf)
- __os_free(env, stack);
-
- return ret;
-}
-
-#undef DB_SORT_SWAP
-#undef DB_SORT_LOAD_DBT
-
-/*
- * __db_sort_multiple --
- * If flags == DB_MULTIPLE_KEY, sorts a DB_MULTIPLE_KEY format DBT using
- * the BTree comparison function and duplicate comparison function.
- *
- * If flags == DB_MULTIPLE, sorts one or two DB_MULTIPLE format DBTs using
- * the BTree comparison function and duplicate comparison function. Will
- * assume key and data specifies pairs of key/data to sort together. If
- * data is NULL, will just sort key according to the btree comparison
- * function.
- *
- * Uses an in-place quicksort algorithm, with median of three for the pivot
- * point.
- *
- * PUBLIC: int __db_sort_multiple __P((DB *, DBT *, DBT *, u_int32_t));
- */
-int
-__db_sort_multiple(db, key, data, flags)
- DB *db;
- DBT *key, *data;
- u_int32_t flags;
-{
- u_int32_t *kstart, *kend, *dstart, *dend;
-
- /* TODO: sanity checks on the DBTs */
- /* DB_ILLEGAL_METHOD(db, DB_OK_BTREE); */
-
- kstart = (u_int32_t*)((u_int8_t *)key->data + key->ulen) - 1;
-
- switch (flags) {
- case DB_MULTIPLE:
- if (data != NULL)
- dstart = (u_int32_t*)((u_int8_t *)data->data +
- data->ulen) - 1;
- else
- dstart = kstart;
-
- /* Find the end */
- for (kend = kstart, dend = dstart;
- *kend != (u_int32_t)-1 && *dend != (u_int32_t)-1;
- kend -= 2, dend -= 2)
- ;
-
- return (__db_quicksort(db, key, data, kstart, kend, dstart,
- dend, 2));
- case DB_MULTIPLE_KEY:
- /* Find the end */
- for (kend = kstart; *kend != (u_int32_t)-1; kend -= 4)
- ;
-
- return (__db_quicksort(db, key, key, kstart, kend, kstart - 2,
- kend - 2, 4));
- default:
- return (__db_ferr(db->env, "DB->sort_multiple", 0));
- }
-}
diff --git a/db/db_stati.c b/db/db_stati.c
deleted file mode 100644
index b8d3a3f..0000000
--- a/db/db_stati.c
+++ /dev/null
@@ -1,494 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/btree.h"
-#include "dbinc/hash.h"
-#include "dbinc/qam.h"
-#include "dbinc/lock.h"
-#include "dbinc/log.h"
-#include "dbinc/mp.h"
-#include "dbinc/partition.h"
-
-#ifdef HAVE_STATISTICS
-static int __db_print_all __P((DB *, u_int32_t));
-static int __db_print_citem __P((DBC *));
-static int __db_print_cursor __P((DB *));
-static int __db_print_stats __P((DB *, DB_THREAD_INFO *, u_int32_t));
-static int __db_stat __P((DB *, DB_THREAD_INFO *, DB_TXN *, void *, u_int32_t));
-static int __db_stat_arg __P((DB *, u_int32_t));
-
-/*
- * __db_stat_pp --
- * DB->stat pre/post processing.
- *
- * PUBLIC: int __db_stat_pp __P((DB *, DB_TXN *, void *, u_int32_t));
- */
-int
-__db_stat_pp(dbp, txn, spp, flags)
- DB *dbp;
- DB_TXN *txn;
- void *spp;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = dbp->env;
-
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat");
-
- if ((ret = __db_stat_arg(dbp, flags)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (ret = __db_rep_enter(dbp, 1, 0,
- txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- ret = __db_stat(dbp, ip, txn, spp, flags);
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
-err: ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_stat --
- * DB->stat.
- *
- */
-static int
-__db_stat(dbp, ip, txn, spp, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- void *spp;
- u_int32_t flags;
-{
- DBC *dbc;
- ENV *env;
- int ret, t_ret;
-
- env = dbp->env;
-
- /* Acquire a cursor. */
- if ((ret = __db_cursor(dbp, ip, txn,
- &dbc, LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED))) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, NULL, "DB->stat", NULL, NULL, flags);
- LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED);
-#ifdef HAVE_PARTITION
- if (DB_IS_PARTITIONED(dbp))
- ret = __partition_stat(dbc, spp, flags);
- else
-#endif
- switch (dbp->type) {
- case DB_BTREE:
- case DB_RECNO:
- ret = __bam_stat(dbc, spp, flags);
- break;
- case DB_HASH:
- ret = __ham_stat(dbc, spp, flags);
- break;
- case DB_QUEUE:
- ret = __qam_stat(dbc, spp, flags);
- break;
- case DB_UNKNOWN:
- default:
- ret = (__db_unknown_type(env, "DB->stat", dbp->type));
- break;
- }
-
- if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __db_stat_arg --
- * Check DB->stat arguments.
- */
-static int
-__db_stat_arg(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- ENV *env;
-
- env = dbp->env;
-
- /* Check for invalid function flags. */
- LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED);
- switch (flags) {
- case 0:
- case DB_FAST_STAT:
- break;
- default:
- return (__db_ferr(env, "DB->stat", 0));
- }
-
- return (0);
-}
-
-/*
- * __db_stat_print_pp --
- * DB->stat_print pre/post processing.
- *
- * PUBLIC: int __db_stat_print_pp __P((DB *, u_int32_t));
- */
-int
-__db_stat_print_pp(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret;
-
- env = dbp->env;
-
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat_print");
-
- /*
- * !!!
- * The actual argument checking is simple, do it inline.
- */
- if ((ret = __db_fchk(env,
- "DB->stat_print", flags, DB_FAST_STAT | DB_STAT_ALL)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- ret = __db_stat_print(dbp, ip, flags);
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
-err: ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_stat_print --
- * DB->stat_print.
- *
- * PUBLIC: int __db_stat_print __P((DB *, DB_THREAD_INFO *, u_int32_t));
- */
-int
-__db_stat_print(dbp, ip, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- u_int32_t flags;
-{
- time_t now;
- int ret;
- char time_buf[CTIME_BUFLEN];
-
- (void)time(&now);
- __db_msg(dbp->env, "%.24s\tLocal time", __os_ctime(&now, time_buf));
-
- if (LF_ISSET(DB_STAT_ALL) && (ret = __db_print_all(dbp, flags)) != 0)
- return (ret);
-
- if ((ret = __db_print_stats(dbp, ip, flags)) != 0)
- return (ret);
-
- return (0);
-}
-
-/*
- * __db_print_stats --
- * Display default DB handle statistics.
- */
-static int
-__db_print_stats(dbp, ip, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- u_int32_t flags;
-{
- DBC *dbc;
- ENV *env;
- int ret, t_ret;
-
- env = dbp->env;
-
- /* Acquire a cursor. */
- if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, NULL, "DB->stat_print", NULL, NULL, 0);
-
- switch (dbp->type) {
- case DB_BTREE:
- case DB_RECNO:
- ret = __bam_stat_print(dbc, flags);
- break;
- case DB_HASH:
- ret = __ham_stat_print(dbc, flags);
- break;
- case DB_QUEUE:
- ret = __qam_stat_print(dbc, flags);
- break;
- case DB_UNKNOWN:
- default:
- ret = (__db_unknown_type(env, "DB->stat_print", dbp->type));
- break;
- }
-
- if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __db_print_all --
- * Display debugging DB handle statistics.
- */
-static int
-__db_print_all(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- static const FN fn[] = {
- { DB_AM_CHKSUM, "DB_AM_CHKSUM" },
- { DB_AM_COMPENSATE, "DB_AM_COMPENSATE" },
- { DB_AM_CREATED, "DB_AM_CREATED" },
- { DB_AM_CREATED_MSTR, "DB_AM_CREATED_MSTR" },
- { DB_AM_DBM_ERROR, "DB_AM_DBM_ERROR" },
- { DB_AM_DELIMITER, "DB_AM_DELIMITER" },
- { DB_AM_DISCARD, "DB_AM_DISCARD" },
- { DB_AM_DUP, "DB_AM_DUP" },
- { DB_AM_DUPSORT, "DB_AM_DUPSORT" },
- { DB_AM_ENCRYPT, "DB_AM_ENCRYPT" },
- { DB_AM_FIXEDLEN, "DB_AM_FIXEDLEN" },
- { DB_AM_INMEM, "DB_AM_INMEM" },
- { DB_AM_IN_RENAME, "DB_AM_IN_RENAME" },
- { DB_AM_NOT_DURABLE, "DB_AM_NOT_DURABLE" },
- { DB_AM_OPEN_CALLED, "DB_AM_OPEN_CALLED" },
- { DB_AM_PAD, "DB_AM_PAD" },
- { DB_AM_PGDEF, "DB_AM_PGDEF" },
- { DB_AM_RDONLY, "DB_AM_RDONLY" },
- { DB_AM_READ_UNCOMMITTED, "DB_AM_READ_UNCOMMITTED" },
- { DB_AM_RECNUM, "DB_AM_RECNUM" },
- { DB_AM_RECOVER, "DB_AM_RECOVER" },
- { DB_AM_RENUMBER, "DB_AM_RENUMBER" },
- { DB_AM_REVSPLITOFF, "DB_AM_REVSPLITOFF" },
- { DB_AM_SECONDARY, "DB_AM_SECONDARY" },
- { DB_AM_SNAPSHOT, "DB_AM_SNAPSHOT" },
- { DB_AM_SUBDB, "DB_AM_SUBDB" },
- { DB_AM_SWAP, "DB_AM_SWAP" },
- { DB_AM_TXN, "DB_AM_TXN" },
- { DB_AM_VERIFYING, "DB_AM_VERIFYING" },
- { 0, NULL }
- };
- ENV *env;
- char time_buf[CTIME_BUFLEN];
-
- env = dbp->env;
-
- __db_msg(env, "%s", DB_GLOBAL(db_line));
- __db_msg(env, "DB handle information:");
- STAT_ULONG("Page size", dbp->pgsize);
- STAT_ISSET("Append recno", dbp->db_append_recno);
- STAT_ISSET("Feedback", dbp->db_feedback);
- STAT_ISSET("Dup compare", dbp->dup_compare);
- STAT_ISSET("App private", dbp->app_private);
- STAT_ISSET("DbEnv", dbp->env);
- STAT_STRING("Type", __db_dbtype_to_string(dbp->type));
-
- __mutex_print_debug_single(env, "Thread mutex", dbp->mutex, flags);
-
- STAT_STRING("File", dbp->fname);
- STAT_STRING("Database", dbp->dname);
- STAT_HEX("Open flags", dbp->open_flags);
-
- __db_print_fileid(env, dbp->fileid, "\tFile ID");
-
- STAT_ULONG("Cursor adjust ID", dbp->adj_fileid);
- STAT_ULONG("Meta pgno", dbp->meta_pgno);
- if (dbp->locker != NULL)
- STAT_ULONG("Locker ID", dbp->locker->id);
- if (dbp->cur_locker != NULL)
- STAT_ULONG("Handle lock", dbp->cur_locker->id);
- if (dbp->associate_locker != NULL)
- STAT_ULONG("Associate lock", dbp->associate_locker->id);
- STAT_ULONG("RPC remote ID", dbp->cl_id);
-
- __db_msg(env,
- "%.24s\tReplication handle timestamp",
- dbp->timestamp == 0 ? "0" : __os_ctime(&dbp->timestamp, time_buf));
-
- STAT_ISSET("Secondary callback", dbp->s_callback);
- STAT_ISSET("Primary handle", dbp->s_primary);
-
- STAT_ISSET("api internal", dbp->api_internal);
- STAT_ISSET("Btree/Recno internal", dbp->bt_internal);
- STAT_ISSET("Hash internal", dbp->h_internal);
- STAT_ISSET("Queue internal", dbp->q_internal);
-
- __db_prflags(env, NULL, dbp->flags, fn, NULL, "\tFlags");
-
- if (dbp->log_filename == NULL)
- STAT_ISSET("File naming information", dbp->log_filename);
- else
- __dbreg_print_fname(env, dbp->log_filename);
-
- (void)__db_print_cursor(dbp);
-
- return (0);
-}
-
-/*
- * __db_print_cursor --
- * Display the cursor active and free queues.
- */
-static int
-__db_print_cursor(dbp)
- DB *dbp;
-{
- DBC *dbc;
- ENV *env;
- int ret, t_ret;
-
- env = dbp->env;
-
- __db_msg(env, "%s", DB_GLOBAL(db_line));
- __db_msg(env, "DB handle cursors:");
-
- ret = 0;
- MUTEX_LOCK(dbp->env, dbp->mutex);
- __db_msg(env, "Active queue:");
- TAILQ_FOREACH(dbc, &dbp->active_queue, links)
- if ((t_ret = __db_print_citem(dbc)) != 0 && ret == 0)
- ret = t_ret;
- __db_msg(env, "Join queue:");
- TAILQ_FOREACH(dbc, &dbp->join_queue, links)
- if ((t_ret = __db_print_citem(dbc)) != 0 && ret == 0)
- ret = t_ret;
- __db_msg(env, "Free queue:");
- TAILQ_FOREACH(dbc, &dbp->free_queue, links)
- if ((t_ret = __db_print_citem(dbc)) != 0 && ret == 0)
- ret = t_ret;
- MUTEX_UNLOCK(dbp->env, dbp->mutex);
-
- return (ret);
-}
-
-static int
-__db_print_citem(dbc)
- DBC *dbc;
-{
- static const FN fn[] = {
- { DBC_ACTIVE, "DBC_ACTIVE" },
- { DBC_DONTLOCK, "DBC_DONTLOCK" },
- { DBC_MULTIPLE, "DBC_MULTIPLE" },
- { DBC_MULTIPLE_KEY, "DBC_MULTIPLE_KEY" },
- { DBC_OPD, "DBC_OPD" },
- { DBC_OWN_LID, "DBC_OWN_LID" },
- { DBC_READ_COMMITTED, "DBC_READ_COMMITTED" },
- { DBC_READ_UNCOMMITTED, "DBC_READ_UNCOMMITTED" },
- { DBC_RECOVER, "DBC_RECOVER" },
- { DBC_RMW, "DBC_RMW" },
- { DBC_TRANSIENT, "DBC_TRANSIENT" },
- { DBC_WAS_READ_COMMITTED,"DBC_WAS_READ_COMMITTED" },
- { DBC_WRITECURSOR, "DBC_WRITECURSOR" },
- { DBC_WRITER, "DBC_WRITER" },
- { 0, NULL }
- };
- DB *dbp;
- DBC_INTERNAL *cp;
- ENV *env;
-
- dbp = dbc->dbp;
- env = dbp->env;
- cp = dbc->internal;
-
- STAT_POINTER("DBC", dbc);
- STAT_POINTER("Associated dbp", dbc->dbp);
- STAT_POINTER("Associated txn", dbc->txn);
- STAT_POINTER("Internal", cp);
- STAT_HEX("Default locker ID", dbc->lref == NULL ? 0 : dbc->lref->id);
- STAT_HEX("Locker", P_TO_ULONG(dbc->locker));
- STAT_STRING("Type", __db_dbtype_to_string(dbc->dbtype));
-
- STAT_POINTER("Off-page duplicate cursor", cp->opd);
- STAT_POINTER("Referenced page", cp->page);
- STAT_ULONG("Root", cp->root);
- STAT_ULONG("Page number", cp->pgno);
- STAT_ULONG("Page index", cp->indx);
- STAT_STRING("Lock mode", __db_lockmode_to_string(cp->lock_mode));
- __db_prflags(env, NULL, dbc->flags, fn, NULL, "\tFlags");
-
- switch (dbc->dbtype) {
- case DB_BTREE:
- case DB_RECNO:
- __bam_print_cursor(dbc);
- break;
- case DB_HASH:
- __ham_print_cursor(dbc);
- break;
- case DB_UNKNOWN:
- DB_ASSERT(env, dbp->type != DB_UNKNOWN);
- /* FALLTHROUGH */
- case DB_QUEUE:
- default:
- break;
- }
- return (0);
-}
-
-#else /* !HAVE_STATISTICS */
-
-int
-__db_stat_pp(dbp, txn, spp, flags)
- DB *dbp;
- DB_TXN *txn;
- void *spp;
- u_int32_t flags;
-{
- COMPQUIET(spp, NULL);
- COMPQUIET(txn, NULL);
- COMPQUIET(flags, 0);
-
- return (__db_stat_not_built(dbp->env));
-}
-
-int
-__db_stat_print_pp(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- COMPQUIET(flags, 0);
-
- return (__db_stat_not_built(dbp->env));
-}
-#endif
diff --git a/db/db_truncate.c b/db/db_truncate.c
deleted file mode 100644
index 66f4180..0000000
--- a/db/db_truncate.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 2001-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/btree.h"
-#include "dbinc/hash.h"
-#include "dbinc/qam.h"
-#include "dbinc/lock.h"
-#include "dbinc/log.h"
-#include "dbinc/partition.h"
-#include "dbinc/txn.h"
-
-static int __db_cursor_check __P((DB *));
-
-/*
- * __db_truncate_pp
- * DB->truncate pre/post processing.
- *
- * PUBLIC: int __db_truncate_pp __P((DB *, DB_TXN *, u_int32_t *, u_int32_t));
- */
-int
-__db_truncate_pp(dbp, txn, countp, flags)
- DB *dbp;
- DB_TXN *txn;
- u_int32_t *countp, flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int handle_check, ret, t_ret, txn_local;
-
- env = dbp->env;
- handle_check = txn_local = 0;
-
- STRIP_AUTO_COMMIT(flags);
-
- /* Check for invalid flags. */
- if (F_ISSET(dbp, DB_AM_SECONDARY)) {
- __db_errx(env, "DB->truncate forbidden on secondary indices");
- return (EINVAL);
- }
- if ((ret = __db_fchk(env, "DB->truncate", flags, 0)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- /*
- * Make sure there are no active cursors on this db. Since we drop
- * pages we cannot really adjust cursors.
- */
- if ((ret = __db_cursor_check(dbp)) != 0) {
- __db_errx(env,
- "DB->truncate not permitted with active cursors");
- goto err;
- }
-
-#ifdef CONFIG_TEST
- if (IS_REP_MASTER(env))
- DB_TEST_WAIT(env, env->test_check);
-#endif
- /* Check for replication block. */
- handle_check = IS_ENV_REPLICATED(env);
- if (handle_check &&
- (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) {
- handle_check = 0;
- goto err;
- }
-
- /*
- * Check for changes to a read-only database. This must be after the
- * replication block so that we cannot race master/client state changes.
- */
- if (DB_IS_READONLY(dbp)) {
- ret = __db_rdonly(env, "DB->truncate");
- goto err;
- }
-
- /*
- * Create local transaction as necessary, check for consistent
- * transaction usage.
- */
- if (IS_DB_AUTO_COMMIT(dbp, txn)) {
- if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0)
- goto err;
- txn_local = 1;
- }
-
- /* Check for consistent transaction usage. */
- if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0)
- goto err;
-
- ret = __db_truncate(dbp, ip, txn, countp);
-
-err: if (txn_local &&
- (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0)
- ret = t_ret;
-
- /* Release replication block. */
- if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_truncate
- * DB->truncate.
- *
- * PUBLIC: int __db_truncate __P((DB *, DB_THREAD_INFO *, DB_TXN *,
- * PUBLIC: u_int32_t *));
- */
-int
-__db_truncate(dbp, ip, txn, countp)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- u_int32_t *countp;
-{
- DB *sdbp;
- DBC *dbc;
- ENV *env;
- u_int32_t scount;
- int ret, t_ret;
-
- env = dbp->env;
- dbc = NULL;
- ret = 0;
-
- /*
- * Run through all secondaries and truncate them first. The count
- * returned is the count of the primary only. QUEUE uses normal
- * processing to truncate so it will update the secondaries normally.
- */
- if (dbp->type != DB_QUEUE && DB_IS_PRIMARY(dbp)) {
- if ((ret = __db_s_first(dbp, &sdbp)) != 0)
- return (ret);
- for (; sdbp != NULL && ret == 0; ret = __db_s_next(&sdbp, txn))
- if ((ret = __db_truncate(sdbp, ip, txn, &scount)) != 0)
- break;
- if (sdbp != NULL)
- (void)__db_s_done(sdbp, txn);
- if (ret != 0)
- return (ret);
- }
-
- DB_TEST_RECOVERY(dbp, DB_TEST_PREDESTROY, ret, NULL);
-
- /* Acquire a cursor. */
- if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, txn, "DB->truncate", NULL, NULL, 0);
-#ifdef HAVE_PARTITION
- if (DB_IS_PARTITIONED(dbp))
- ret = __part_truncate(dbc, countp);
- else
-#endif
- switch (dbp->type) {
- case DB_BTREE:
- case DB_RECNO:
- ret = __bam_truncate(dbc, countp);
- break;
- case DB_HASH:
- ret = __ham_truncate(dbc, countp);
- break;
- case DB_QUEUE:
- ret = __qam_truncate(dbc, countp);
- break;
- case DB_UNKNOWN:
- default:
- ret = __db_unknown_type(env, "DB->truncate", dbp->type);
- break;
- }
-
- /* Discard the cursor. */
- if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, NULL);
-
-DB_TEST_RECOVERY_LABEL
-
- return (ret);
-}
-
-/*
- * __db_cursor_check --
- * See if there are any active cursors on this db.
- */
-static int
-__db_cursor_check(dbp)
- DB *dbp;
-{
- DB *ldbp;
- DBC *dbc;
- ENV *env;
- int found;
-
- env = dbp->env;
-
- MUTEX_LOCK(env, env->mtx_dblist);
- FIND_FIRST_DB_MATCH(env, dbp, ldbp);
- for (found = 0;
- !found && ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
- ldbp = TAILQ_NEXT(ldbp, dblistlinks)) {
- MUTEX_LOCK(env, dbp->mutex);
- TAILQ_FOREACH(dbc, &ldbp->active_queue, links)
- if (IS_INITIALIZED(dbc)) {
- found = 1;
- break;
- }
- MUTEX_UNLOCK(env, dbp->mutex);
- }
- MUTEX_UNLOCK(env, env->mtx_dblist);
-
- return (found ? EINVAL : 0);
-}
diff --git a/db/db_upg.c b/db/db_upg.c
deleted file mode 100644
index 5a6db94..0000000
--- a/db/db_upg.c
+++ /dev/null
@@ -1,510 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_swap.h"
-#include "dbinc/btree.h"
-#include "dbinc/hash.h"
-#include "dbinc/qam.h"
-
-/*
- * __db_upgrade_pp --
- * DB->upgrade pre/post processing.
- *
- * PUBLIC: int __db_upgrade_pp __P((DB *, const char *, u_int32_t));
- */
-int
-__db_upgrade_pp(dbp, fname, flags)
- DB *dbp;
- const char *fname;
- u_int32_t flags;
-{
-#ifdef HAVE_UPGRADE_SUPPORT
- DB_THREAD_INFO *ip;
- ENV *env;
- int ret;
-
- env = dbp->env;
-
- /*
- * !!!
- * The actual argument checking is simple, do it inline.
- */
- if ((ret = __db_fchk(env, "DB->upgrade", flags, DB_DUPSORT)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
- ret = __db_upgrade(dbp, fname, flags);
- ENV_LEAVE(env, ip);
- return (ret);
-#else
- COMPQUIET(dbp, NULL);
- COMPQUIET(fname, NULL);
- COMPQUIET(flags, 0);
-
- __db_errx(dbp->env, "upgrade not supported");
- return (EINVAL);
-#endif
-}
-
-#ifdef HAVE_UPGRADE_SUPPORT
-static int (* const func_31_list[P_PAGETYPE_MAX])
- __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)) = {
- NULL, /* P_INVALID */
- NULL, /* __P_DUPLICATE */
- __ham_31_hash, /* P_HASH_UNSORTED */
- NULL, /* P_IBTREE */
- NULL, /* P_IRECNO */
- __bam_31_lbtree, /* P_LBTREE */
- NULL, /* P_LRECNO */
- NULL, /* P_OVERFLOW */
- __ham_31_hashmeta, /* P_HASHMETA */
- __bam_31_btreemeta, /* P_BTREEMETA */
- NULL, /* P_QAMMETA */
- NULL, /* P_QAMDATA */
- NULL, /* P_LDUP */
- NULL, /* P_HASH */
-};
-
-static int (* const func_46_list[P_PAGETYPE_MAX])
- __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)) = {
- NULL, /* P_INVALID */
- NULL, /* __P_DUPLICATE */
- __ham_46_hash, /* P_HASH_UNSORTED */
- NULL, /* P_IBTREE */
- NULL, /* P_IRECNO */
- NULL, /* P_LBTREE */
- NULL, /* P_LRECNO */
- NULL, /* P_OVERFLOW */
- __ham_46_hashmeta, /* P_HASHMETA */
- NULL, /* P_BTREEMETA */
- NULL, /* P_QAMMETA */
- NULL, /* P_QAMDATA */
- NULL, /* P_LDUP */
- NULL, /* P_HASH */
-};
-
-static int __db_page_pass __P((DB *, char *, u_int32_t, int (* const [])
- (DB *, char *, u_int32_t, DB_FH *, PAGE *, int *), DB_FH *));
-static int __db_set_lastpgno __P((DB *, char *, DB_FH *));
-
-/*
- * __db_upgrade --
- * Upgrade an existing database.
- *
- * PUBLIC: int __db_upgrade __P((DB *, const char *, u_int32_t));
- */
-int
-__db_upgrade(dbp, fname, flags)
- DB *dbp;
- const char *fname;
- u_int32_t flags;
-{
- DBMETA *meta;
- DB_FH *fhp;
- ENV *env;
- size_t n;
- int ret, t_ret, use_mp_open;
- u_int8_t mbuf[256], tmpflags;
- char *real_name;
-
- use_mp_open = 0;
- env = dbp->env;
- fhp = NULL;
-
- /* Get the real backing file name. */
- if ((ret = __db_appname(env,
- DB_APP_DATA, fname, NULL, &real_name)) != 0)
- return (ret);
-
- /* Open the file. */
- if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0) {
- __db_err(env, ret, "%s", real_name);
- return (ret);
- }
-
- /* Initialize the feedback. */
- if (dbp->db_feedback != NULL)
- dbp->db_feedback(dbp, DB_UPGRADE, 0);
-
- /*
- * Read the metadata page. We read 256 bytes, which is larger than
- * any access method's metadata page and smaller than any disk sector.
- */
- if ((ret = __os_read(env, fhp, mbuf, sizeof(mbuf), &n)) != 0)
- goto err;
-
- switch (((DBMETA *)mbuf)->magic) {
- case DB_BTREEMAGIC:
- switch (((DBMETA *)mbuf)->version) {
- case 6:
- /*
- * Before V7 not all pages had page types, so we do the
- * single meta-data page by hand.
- */
- if ((ret =
- __bam_30_btreemeta(dbp, real_name, mbuf)) != 0)
- goto err;
- if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
- goto err;
- if ((ret = __os_write(env, fhp, mbuf, 256, &n)) != 0)
- goto err;
- /* FALLTHROUGH */
- case 7:
- /*
- * We need the page size to do more. Rip it out of
- * the meta-data page.
- */
- memcpy(&dbp->pgsize, mbuf + 20, sizeof(u_int32_t));
-
- if ((ret = __db_page_pass(
- dbp, real_name, flags, func_31_list, fhp)) != 0)
- goto err;
- /* FALLTHROUGH */
- case 8:
- if ((ret =
- __db_set_lastpgno(dbp, real_name, fhp)) != 0)
- goto err;
- /* FALLTHROUGH */
- case 9:
- break;
- default:
- __db_errx(env, "%s: unsupported btree version: %lu",
- real_name, (u_long)((DBMETA *)mbuf)->version);
- ret = DB_OLD_VERSION;
- goto err;
- }
- break;
- case DB_HASHMAGIC:
- switch (((DBMETA *)mbuf)->version) {
- case 4:
- case 5:
- /*
- * Before V6 not all pages had page types, so we do the
- * single meta-data page by hand.
- */
- if ((ret =
- __ham_30_hashmeta(dbp, real_name, mbuf)) != 0)
- goto err;
- if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
- goto err;
- if ((ret = __os_write(env, fhp, mbuf, 256, &n)) != 0)
- goto err;
-
- /*
- * Before V6, we created hash pages one by one as they
- * were needed, using hashhdr.ovfl_point to reserve
- * a block of page numbers for them. A consequence
- * of this was that, if no overflow pages had been
- * created, the current doubling might extend past
- * the end of the database file.
- *
- * In DB 3.X, we now create all the hash pages
- * belonging to a doubling atomically; it's not
- * safe to just save them for later, because when
- * we create an overflow page we'll just create
- * a new last page (whatever that may be). Grow
- * the database to the end of the current doubling.
- */
- if ((ret =
- __ham_30_sizefix(dbp, fhp, real_name, mbuf)) != 0)
- goto err;
- /* FALLTHROUGH */
- case 6:
- /*
- * We need the page size to do more. Rip it out of
- * the meta-data page.
- */
- memcpy(&dbp->pgsize, mbuf + 20, sizeof(u_int32_t));
-
- if ((ret = __db_page_pass(
- dbp, real_name, flags, func_31_list, fhp)) != 0)
- goto err;
- /* FALLTHROUGH */
- case 7:
- if ((ret =
- __db_set_lastpgno(dbp, real_name, fhp)) != 0)
- goto err;
- /* FALLTHROUGH */
- case 8:
- /*
- * Any upgrade that has proceeded this far has metadata
- * pages compatible with hash version 8 metadata pages,
- * so casting mbuf to a dbmeta is safe.
- * If a newer revision moves the pagesize, checksum or
- * encrypt_alg flags in the metadata, then the
- * extraction of the fields will need to use hard coded
- * offsets.
- */
- meta = (DBMETA*)mbuf;
- /*
- * We need the page size to do more. Extract it from
- * the meta-data page.
- */
- memcpy(&dbp->pgsize, &meta->pagesize,
- sizeof(u_int32_t));
- /*
- * Rip out metadata and encrypt_alg fields from the
- * metadata page. So the upgrade can know how big
- * the page metadata pre-amble is. Any upgrade that has
- * proceeded this far has metadata pages compatible
- * with hash version 8 metadata pages, so extracting
- * the fields is safe.
- */
- memcpy(&tmpflags, &meta->metaflags, sizeof(u_int8_t));
- if (FLD_ISSET(tmpflags, DBMETA_CHKSUM))
- F_SET(dbp, DB_AM_CHKSUM);
- memcpy(&tmpflags, &meta->encrypt_alg, sizeof(u_int8_t));
- if (tmpflags != 0) {
- if (!CRYPTO_ON(dbp->env)) {
- __db_errx(env,
-"Attempt to upgrade an encrypted database without providing a password.");
- ret = EINVAL;
- goto err;
- }
- F_SET(dbp, DB_AM_ENCRYPT);
- }
-
- /*
- * This is ugly. It is necessary to have a usable
- * mpool in the dbp to upgrade from an unsorted
- * to a sorted hash database. The mpool file is used
- * to resolve offpage key items, which are needed to
- * determine sort order. Having mpool open and access
- * the file does not affect the page pass, since the
- * page pass only updates DB_HASH_UNSORTED pages
- * in-place, and the mpool file is only used to read
- * OFFPAGE items.
- */
- use_mp_open = 1;
- if ((ret = __os_closehandle(env, fhp)) != 0)
- return (ret);
- dbp->type = DB_HASH;
- if ((ret = __env_mpool(dbp, fname,
- DB_AM_NOT_DURABLE | DB_AM_VERIFYING)) != 0)
- return (ret);
- fhp = dbp->mpf->fhp;
-
- /* Do the actual conversion pass. */
- if ((ret = __db_page_pass(
- dbp, real_name, flags, func_46_list, fhp)) != 0)
- goto err;
-
- /* FALLTHROUGH */
- case 9:
- break;
- default:
- __db_errx(env, "%s: unsupported hash version: %lu",
- real_name, (u_long)((DBMETA *)mbuf)->version);
- ret = DB_OLD_VERSION;
- goto err;
- }
- break;
- case DB_QAMMAGIC:
- switch (((DBMETA *)mbuf)->version) {
- case 1:
- /*
- * If we're in a Queue database, the only page that
- * needs upgrading is the meta-database page, don't
- * bother with a full pass.
- */
- if ((ret = __qam_31_qammeta(dbp, real_name, mbuf)) != 0)
- return (ret);
- /* FALLTHROUGH */
- case 2:
- if ((ret = __qam_32_qammeta(dbp, real_name, mbuf)) != 0)
- return (ret);
- if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
- goto err;
- if ((ret = __os_write(env, fhp, mbuf, 256, &n)) != 0)
- goto err;
- /* FALLTHROUGH */
- case 3:
- case 4:
- break;
- default:
- __db_errx(env, "%s: unsupported queue version: %lu",
- real_name, (u_long)((DBMETA *)mbuf)->version);
- ret = DB_OLD_VERSION;
- goto err;
- }
- break;
- default:
- M_32_SWAP(((DBMETA *)mbuf)->magic);
- switch (((DBMETA *)mbuf)->magic) {
- case DB_BTREEMAGIC:
- case DB_HASHMAGIC:
- case DB_QAMMAGIC:
- __db_errx(env,
- "%s: DB->upgrade only supported on native byte-order systems",
- real_name);
- break;
- default:
- __db_errx(env,
- "%s: unrecognized file type", real_name);
- break;
- }
- ret = EINVAL;
- goto err;
- }
-
- ret = __os_fsync(env, fhp);
-
- /*
- * If mp_open was used, then rely on the database close to clean up
- * any file handles.
- */
-err: if (use_mp_open == 0 && fhp != NULL &&
- (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
- ret = t_ret;
- __os_free(env, real_name);
-
- /* We're done. */
- if (dbp->db_feedback != NULL)
- dbp->db_feedback(dbp, DB_UPGRADE, 100);
-
- return (ret);
-}
-
-/*
- * __db_page_pass --
- * Walk the pages of the database, upgrading whatever needs it.
- */
-static int
-__db_page_pass(dbp, real_name, flags, fl, fhp)
- DB *dbp;
- char *real_name;
- u_int32_t flags;
- int (* const fl[P_PAGETYPE_MAX])
- __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
- DB_FH *fhp;
-{
- ENV *env;
- PAGE *page;
- db_pgno_t i, pgno_last;
- size_t n;
- int dirty, ret;
-
- env = dbp->env;
-
- /* Determine the last page of the file. */
- if ((ret = __db_lastpgno(dbp, real_name, fhp, &pgno_last)) != 0)
- return (ret);
-
- /* Allocate memory for a single page. */
- if ((ret = __os_malloc(env, dbp->pgsize, &page)) != 0)
- return (ret);
-
- /* Walk the file, calling the underlying conversion functions. */
- for (i = 0; i < pgno_last; ++i) {
- if (dbp->db_feedback != NULL)
- dbp->db_feedback(
- dbp, DB_UPGRADE, (int)((i * 100)/pgno_last));
- if ((ret = __os_seek(env, fhp, i, dbp->pgsize, 0)) != 0)
- break;
- if ((ret = __os_read(env, fhp, page, dbp->pgsize, &n)) != 0)
- break;
- dirty = 0;
- /* Always decrypt the page. */
- if ((ret = __db_decrypt_pg(env, dbp, page)) != 0)
- break;
- if (fl[TYPE(page)] != NULL && (ret = fl[TYPE(page)]
- (dbp, real_name, flags, fhp, page, &dirty)) != 0)
- break;
- if (dirty) {
- if ((ret = __db_encrypt_and_checksum_pg(
- env, dbp, page)) != 0)
- break;
- if ((ret =
- __os_seek(env, fhp, i, dbp->pgsize, 0)) != 0)
- break;
- if ((ret = __os_write(env,
- fhp, page, dbp->pgsize, &n)) != 0)
- break;
- }
- }
-
- __os_free(dbp->env, page);
- return (ret);
-}
-
-/*
- * __db_lastpgno --
- * Return the current last page number of the file.
- *
- * PUBLIC: int __db_lastpgno __P((DB *, char *, DB_FH *, db_pgno_t *));
- */
-int
-__db_lastpgno(dbp, real_name, fhp, pgno_lastp)
- DB *dbp;
- char *real_name;
- DB_FH *fhp;
- db_pgno_t *pgno_lastp;
-{
- ENV *env;
- db_pgno_t pgno_last;
- u_int32_t mbytes, bytes;
- int ret;
-
- env = dbp->env;
-
- if ((ret = __os_ioinfo(env,
- real_name, fhp, &mbytes, &bytes, NULL)) != 0) {
- __db_err(env, ret, "%s", real_name);
- return (ret);
- }
-
- /* Page sizes have to be a power-of-two. */
- if (bytes % dbp->pgsize != 0) {
- __db_errx(env,
- "%s: file size not a multiple of the pagesize", real_name);
- return (EINVAL);
- }
- pgno_last = mbytes * (MEGABYTE / dbp->pgsize);
- pgno_last += bytes / dbp->pgsize;
-
- *pgno_lastp = pgno_last;
- return (0);
-}
-
-/*
- * __db_set_lastpgno --
- * Update the meta->last_pgno field.
- *
- * Code assumes that we do not have checksums/crypto on the page.
- */
-static int
-__db_set_lastpgno(dbp, real_name, fhp)
- DB *dbp;
- char *real_name;
- DB_FH *fhp;
-{
- DBMETA meta;
- ENV *env;
- int ret;
- size_t n;
-
- env = dbp->env;
- if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
- return (ret);
- if ((ret = __os_read(env, fhp, &meta, sizeof(meta), &n)) != 0)
- return (ret);
- dbp->pgsize = meta.pagesize;
- if ((ret = __db_lastpgno(dbp, real_name, fhp, &meta.last_pgno)) != 0)
- return (ret);
- if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
- return (ret);
- if ((ret = __os_write(env, fhp, &meta, sizeof(meta), &n)) != 0)
- return (ret);
-
- return (0);
-}
-#endif /* HAVE_UPGRADE_SUPPORT */
diff --git a/db/db_upg_opd.c b/db/db_upg_opd.c
deleted file mode 100644
index ea143cf..0000000
--- a/db/db_upg_opd.c
+++ /dev/null
@@ -1,343 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/btree.h"
-
-static int __db_build_bi __P((DB *, DB_FH *, PAGE *, PAGE *, u_int32_t, int *));
-static int __db_build_ri __P((DB *, DB_FH *, PAGE *, PAGE *, u_int32_t, int *));
-static int __db_up_ovref __P((DB *, DB_FH *, db_pgno_t));
-
-#define GET_PAGE(dbp, fhp, pgno, page) { \
- if ((ret = __os_seek( \
- dbp->env, fhp, pgno, (dbp)->pgsize, 0)) != 0) \
- goto err; \
- if ((ret = __os_read(dbp->env, \
- fhp, page, (dbp)->pgsize, &n)) != 0) \
- goto err; \
-}
-#define PUT_PAGE(dbp, fhp, pgno, page) { \
- if ((ret = __os_seek( \
- dbp->env, fhp, pgno, (dbp)->pgsize, 0)) != 0) \
- goto err; \
- if ((ret = __os_write(dbp->env, \
- fhp, page, (dbp)->pgsize, &n)) != 0) \
- goto err; \
-}
-
-/*
- * __db_31_offdup --
- * Convert 3.0 off-page duplicates to 3.1 off-page duplicates.
- *
- * PUBLIC: int __db_31_offdup __P((DB *, char *, DB_FH *, int, db_pgno_t *));
- */
-int
-__db_31_offdup(dbp, real_name, fhp, sorted, pgnop)
- DB *dbp;
- char *real_name;
- DB_FH *fhp;
- int sorted;
- db_pgno_t *pgnop;
-{
- PAGE *ipage, *page;
- db_indx_t indx;
- db_pgno_t cur_cnt, i, next_cnt, pgno, *pgno_cur, pgno_last;
- db_pgno_t *pgno_next, pgno_max, *tmp;
- db_recno_t nrecs;
- size_t n;
- int level, nomem, ret;
-
- ipage = page = NULL;
- pgno_cur = pgno_next = NULL;
-
- /* Allocate room to hold a page. */
- if ((ret = __os_malloc(dbp->env, dbp->pgsize, &page)) != 0)
- goto err;
-
- /*
- * Walk the chain of 3.0 off-page duplicates. Each one is converted
- * in place to a 3.1 off-page duplicate page. If the duplicates are
- * sorted, they are converted to a Btree leaf page, otherwise to a
- * Recno leaf page.
- */
- for (nrecs = 0, cur_cnt = pgno_max = 0,
- pgno = *pgnop; pgno != PGNO_INVALID;) {
- if (pgno_max == cur_cnt) {
- pgno_max += 20;
- if ((ret = __os_realloc(dbp->env, pgno_max *
- sizeof(db_pgno_t), &pgno_cur)) != 0)
- goto err;
- }
- pgno_cur[cur_cnt++] = pgno;
-
- GET_PAGE(dbp, fhp, pgno, page);
- nrecs += NUM_ENT(page);
- LEVEL(page) = LEAFLEVEL;
- TYPE(page) = sorted ? P_LDUP : P_LRECNO;
- /*
- * !!!
- * DB didn't zero the LSNs on off-page duplicates pages.
- */
- ZERO_LSN(LSN(page));
- PUT_PAGE(dbp, fhp, pgno, page);
-
- pgno = NEXT_PGNO(page);
- }
-
- /* If we only have a single page, it's easy. */
- if (cur_cnt <= 1)
- goto done;
-
- /*
- * pgno_cur is the list of pages we just converted. We're
- * going to walk that list, but we'll need to create a new
- * list while we do so.
- */
- if ((ret = __os_malloc(dbp->env,
- cur_cnt * sizeof(db_pgno_t), &pgno_next)) != 0)
- goto err;
-
- /* Figure out where we can start allocating new pages. */
- if ((ret = __db_lastpgno(dbp, real_name, fhp, &pgno_last)) != 0)
- goto err;
-
- /* Allocate room for an internal page. */
- if ((ret = __os_malloc(dbp->env, dbp->pgsize, &ipage)) != 0)
- goto err;
- PGNO(ipage) = PGNO_INVALID;
-
- /*
- * Repeatedly walk the list of pages, building internal pages, until
- * there's only one page at a level.
- */
- for (level = LEAFLEVEL + 1; cur_cnt > 1; ++level) {
- for (indx = 0, i = next_cnt = 0; i < cur_cnt;) {
- if (indx == 0) {
- P_INIT(ipage, dbp->pgsize, pgno_last,
- PGNO_INVALID, PGNO_INVALID,
- level, sorted ? P_IBTREE : P_IRECNO);
- ZERO_LSN(LSN(ipage));
-
- pgno_next[next_cnt++] = pgno_last++;
- }
-
- GET_PAGE(dbp, fhp, pgno_cur[i], page);
-
- /*
- * If the duplicates are sorted, put the first item on
- * the lower-level page onto a Btree internal page. If
- * the duplicates are not sorted, create an internal
- * Recno structure on the page. If either case doesn't
- * fit, push out the current page and start a new one.
- */
- nomem = 0;
- if (sorted) {
- if ((ret = __db_build_bi(
- dbp, fhp, ipage, page, indx, &nomem)) != 0)
- goto err;
- } else
- if ((ret = __db_build_ri(
- dbp, fhp, ipage, page, indx, &nomem)) != 0)
- goto err;
- if (nomem) {
- indx = 0;
- PUT_PAGE(dbp, fhp, PGNO(ipage), ipage);
- } else {
- ++indx;
- ++NUM_ENT(ipage);
- ++i;
- }
- }
-
- /*
- * Push out the last internal page. Set the top-level record
- * count if we've reached the top.
- */
- if (next_cnt == 1)
- RE_NREC_SET(ipage, nrecs);
- PUT_PAGE(dbp, fhp, PGNO(ipage), ipage);
-
- /* Swap the current and next page number arrays. */
- cur_cnt = next_cnt;
- tmp = pgno_cur;
- pgno_cur = pgno_next;
- pgno_next = tmp;
- }
-
-done: *pgnop = pgno_cur[0];
-
-err: if (pgno_cur != NULL)
- __os_free(dbp->env, pgno_cur);
- if (pgno_next != NULL)
- __os_free(dbp->env, pgno_next);
- if (ipage != NULL)
- __os_free(dbp->env, ipage);
- if (page != NULL)
- __os_free(dbp->env, page);
-
- return (ret);
-}
-
-/*
- * __db_build_bi --
- * Build a BINTERNAL entry for a parent page.
- */
-static int
-__db_build_bi(dbp, fhp, ipage, page, indx, nomemp)
- DB *dbp;
- DB_FH *fhp;
- PAGE *ipage, *page;
- u_int32_t indx;
- int *nomemp;
-{
- BINTERNAL bi, *child_bi;
- BKEYDATA *child_bk;
- u_int8_t *p;
- int ret;
- db_indx_t *inp;
-
- inp = P_INP(dbp, ipage);
- switch (TYPE(page)) {
- case P_IBTREE:
- child_bi = GET_BINTERNAL(dbp, page, 0);
- if (P_FREESPACE(dbp, ipage) < BINTERNAL_PSIZE(child_bi->len)) {
- *nomemp = 1;
- return (0);
- }
- inp[indx] =
- HOFFSET(ipage) -= BINTERNAL_SIZE(child_bi->len);
- p = P_ENTRY(dbp, ipage, indx);
-
- bi.len = child_bi->len;
- B_TSET(bi.type, child_bi->type);
- bi.pgno = PGNO(page);
- bi.nrecs = __bam_total(dbp, page);
- memcpy(p, &bi, SSZA(BINTERNAL, data));
- p += SSZA(BINTERNAL, data);
- memcpy(p, child_bi->data, child_bi->len);
-
- /* Increment the overflow ref count. */
- if (B_TYPE(child_bi->type) == B_OVERFLOW)
- if ((ret = __db_up_ovref(dbp, fhp,
- ((BOVERFLOW *)(child_bi->data))->pgno)) != 0)
- return (ret);
- break;
- case P_LDUP:
- child_bk = GET_BKEYDATA(dbp, page, 0);
- switch (B_TYPE(child_bk->type)) {
- case B_KEYDATA:
- if (P_FREESPACE(dbp, ipage) <
- BINTERNAL_PSIZE(child_bk->len)) {
- *nomemp = 1;
- return (0);
- }
- inp[indx] =
- HOFFSET(ipage) -= BINTERNAL_SIZE(child_bk->len);
- p = P_ENTRY(dbp, ipage, indx);
-
- bi.len = child_bk->len;
- B_TSET(bi.type, child_bk->type);
- bi.pgno = PGNO(page);
- bi.nrecs = __bam_total(dbp, page);
- memcpy(p, &bi, SSZA(BINTERNAL, data));
- p += SSZA(BINTERNAL, data);
- memcpy(p, child_bk->data, child_bk->len);
- break;
- case B_OVERFLOW:
- if (P_FREESPACE(dbp, ipage) <
- BINTERNAL_PSIZE(BOVERFLOW_SIZE)) {
- *nomemp = 1;
- return (0);
- }
- inp[indx] =
- HOFFSET(ipage) -= BINTERNAL_SIZE(BOVERFLOW_SIZE);
- p = P_ENTRY(dbp, ipage, indx);
-
- bi.len = BOVERFLOW_SIZE;
- B_TSET(bi.type, child_bk->type);
- bi.pgno = PGNO(page);
- bi.nrecs = __bam_total(dbp, page);
- memcpy(p, &bi, SSZA(BINTERNAL, data));
- p += SSZA(BINTERNAL, data);
- memcpy(p, child_bk, BOVERFLOW_SIZE);
-
- /* Increment the overflow ref count. */
- if ((ret = __db_up_ovref(dbp, fhp,
- ((BOVERFLOW *)child_bk)->pgno)) != 0)
- return (ret);
- break;
- default:
- return (__db_pgfmt(dbp->env, PGNO(page)));
- }
- break;
- default:
- return (__db_pgfmt(dbp->env, PGNO(page)));
- }
-
- return (0);
-}
-
-/*
- * __db_build_ri --
- * Build a RINTERNAL entry for an internal parent page.
- */
-static int
-__db_build_ri(dbp, fhp, ipage, page, indx, nomemp)
- DB *dbp;
- DB_FH *fhp;
- PAGE *ipage, *page;
- u_int32_t indx;
- int *nomemp;
-{
- RINTERNAL ri;
- db_indx_t *inp;
-
- COMPQUIET(fhp, NULL);
- inp = P_INP(dbp, ipage);
- if (P_FREESPACE(dbp, ipage) < RINTERNAL_PSIZE) {
- *nomemp = 1;
- return (0);
- }
-
- ri.pgno = PGNO(page);
- ri.nrecs = __bam_total(dbp, page);
- inp[indx] = HOFFSET(ipage) -= RINTERNAL_SIZE;
- memcpy(P_ENTRY(dbp, ipage, indx), &ri, RINTERNAL_SIZE);
-
- return (0);
-}
-
-/*
- * __db_up_ovref --
- * Increment/decrement the reference count on an overflow page.
- */
-static int
-__db_up_ovref(dbp, fhp, pgno)
- DB *dbp;
- DB_FH *fhp;
- db_pgno_t pgno;
-{
- PAGE *page;
- size_t n;
- int ret;
-
- /* Allocate room to hold a page. */
- if ((ret = __os_malloc(dbp->env, dbp->pgsize, &page)) != 0)
- return (ret);
-
- GET_PAGE(dbp, fhp, pgno, page);
- ++OV_REF(page);
- PUT_PAGE(dbp, fhp, pgno, page);
-
-err: __os_free(dbp->env, page);
-
- return (ret);
-}
diff --git a/db/db_vrfy.c b/db/db_vrfy.c
deleted file mode 100644
index 7ea9c62..0000000
--- a/db/db_vrfy.c
+++ /dev/null
@@ -1,2894 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 2000-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_swap.h"
-#include "dbinc/db_verify.h"
-#include "dbinc/btree.h"
-#include "dbinc/hash.h"
-#include "dbinc/lock.h"
-#include "dbinc/mp.h"
-#include "dbinc/qam.h"
-#include "dbinc/txn.h"
-
-/*
- * This is the code for DB->verify, the DB database consistency checker.
- * For now, it checks all subdatabases in a database, and verifies
- * everything it knows how to (i.e. it's all-or-nothing, and one can't
- * check only for a subset of possible problems).
- */
-
-static u_int __db_guesspgsize __P((ENV *, DB_FH *));
-static int __db_is_valid_magicno __P((u_int32_t, DBTYPE *));
-static int __db_meta2pgset
- __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, DB *));
-static int __db_salvage __P((DB *, VRFY_DBINFO *,
- db_pgno_t, void *, int (*)(void *, const void *), u_int32_t));
-static int __db_salvage_subdbpg __P((DB *, VRFY_DBINFO *,
- PAGE *, void *, int (*)(void *, const void *), u_int32_t));
-static int __db_salvage_all __P((DB *, VRFY_DBINFO *, void *,
- int(*)(void *, const void *), u_int32_t, int *));
-static int __db_salvage_unknowns __P((DB *, VRFY_DBINFO *, void *,
- int (*)(void *, const void *), u_int32_t));
-static int __db_verify_arg __P((DB *, const char *, void *, u_int32_t));
-static int __db_vrfy_freelist
- __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
-static int __db_vrfy_invalid
- __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
-static int __db_vrfy_orderchkonly __P((DB *,
- VRFY_DBINFO *, const char *, const char *, u_int32_t));
-static int __db_vrfy_pagezero __P((DB *, VRFY_DBINFO *, DB_FH *, u_int32_t));
-static int __db_vrfy_subdbs
- __P((DB *, VRFY_DBINFO *, const char *, u_int32_t));
-static int __db_vrfy_structure __P((DB *, VRFY_DBINFO *,
- const char *, db_pgno_t, void *, void *, u_int32_t));
-static int __db_vrfy_walkpages __P((DB *, VRFY_DBINFO *,
- void *, int (*)(void *, const void *), u_int32_t));
-
-#define VERIFY_FLAGS \
- (DB_AGGRESSIVE | \
- DB_NOORDERCHK | DB_ORDERCHKONLY | DB_PRINTABLE | DB_SALVAGE | DB_UNREF)
-
-/*
- * __db_verify_pp --
- * DB->verify public interface.
- *
- * PUBLIC: int __db_verify_pp
- * PUBLIC: __P((DB *, const char *, const char *, FILE *, u_int32_t));
- */
-int
-__db_verify_pp(dbp, file, database, outfile, flags)
- DB *dbp;
- const char *file, *database;
- FILE *outfile;
- u_int32_t flags;
-{
- /*
- * __db_verify_pp is a wrapper to __db_verify_internal, which lets
- * us pass appropriate equivalents to FILE * in from the non-C APIs.
- * That's why the usual ENV_ENTER macros are in __db_verify_internal,
- * not here.
- */
- return (__db_verify_internal(dbp,
- file, database, outfile, __db_pr_callback, flags));
-}
-
-/*
- * __db_verify_internal --
- *
- * PUBLIC: int __db_verify_internal __P((DB *, const char *,
- * PUBLIC: const char *, void *, int (*)(void *, const void *), u_int32_t));
- */
-int
-__db_verify_internal(dbp, fname, dname, handle, callback, flags)
- DB *dbp;
- const char *fname, *dname;
- void *handle;
- int (*callback) __P((void *, const void *));
- u_int32_t flags;
-{
- DB_THREAD_INFO *ip;
- ENV *env;
- int ret, t_ret;
-
- env = dbp->env;
-
- DB_ILLEGAL_AFTER_OPEN(dbp, "DB->verify");
-
- if (!LF_ISSET(DB_SALVAGE))
- LF_SET(DB_UNREF);
-
- ENV_ENTER(env, ip);
-
- if ((ret = __db_verify_arg(dbp, dname, handle, flags)) == 0)
- ret = __db_verify(dbp, ip,
- fname, dname, handle, callback, NULL, NULL, flags);
-
- /* Db.verify is a DB handle destructor. */
- if ((t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0)
- ret = t_ret;
-
- ENV_LEAVE(env, ip);
- return (ret);
-}
-
-/*
- * __db_verify_arg --
- * Check DB->verify arguments.
- */
-static int
-__db_verify_arg(dbp, dname, handle, flags)
- DB *dbp;
- const char *dname;
- void *handle;
- u_int32_t flags;
-{
- ENV *env;
- int ret;
-
- env = dbp->env;
-
- if ((ret = __db_fchk(env, "DB->verify", flags, VERIFY_FLAGS)) != 0)
- return (ret);
-
- /*
- * DB_SALVAGE is mutually exclusive with the other flags except
- * DB_AGGRESSIVE, DB_PRINTABLE.
- *
- * DB_AGGRESSIVE and DB_PRINTABLE are only meaningful when salvaging.
- *
- * DB_SALVAGE requires an output stream.
- */
- if (LF_ISSET(DB_SALVAGE)) {
- if (LF_ISSET(~(DB_AGGRESSIVE | DB_PRINTABLE | DB_SALVAGE)))
- return (__db_ferr(env, "DB->verify", 1));
- if (handle == NULL) {
- __db_errx(env,
- "DB_SALVAGE requires a an output handle");
- return (EINVAL);
- }
- } else
- if (LF_ISSET(DB_AGGRESSIVE | DB_PRINTABLE))
- return (__db_ferr(env, "DB->verify", 1));
-
- /*
- * DB_ORDERCHKONLY is mutually exclusive with DB_SALVAGE and
- * DB_NOORDERCHK, and requires a database name.
- */
- if ((ret = __db_fcchk(env, "DB->verify", flags,
- DB_ORDERCHKONLY, DB_SALVAGE | DB_NOORDERCHK)) != 0)
- return (ret);
- if (LF_ISSET(DB_ORDERCHKONLY) && dname == NULL) {
- __db_errx(env, "DB_ORDERCHKONLY requires a database name");
- return (EINVAL);
- }
- return (0);
-}
-
-/*
- * __db_verify --
- * Walk the entire file page-by-page, either verifying with or without
- * dumping in db_dump -d format, or DB_SALVAGE-ing whatever key/data
- * pairs can be found and dumping them in standard (db_load-ready)
- * dump format.
- *
- * (Salvaging isn't really a verification operation, but we put it
- * here anyway because it requires essentially identical top-level
- * code.)
- *
- * flags may be 0, DB_NOORDERCHK, DB_ORDERCHKONLY, or DB_SALVAGE
- * (and optionally DB_AGGRESSIVE).
- * PUBLIC: int __db_verify __P((DB *, DB_THREAD_INFO *, const char *,
- * PUBLIC: const char *, void *, int (*)(void *, const void *),
- * PUBLIC: void *, void *, u_int32_t));
- */
-int
-__db_verify(dbp, ip, name, subdb, handle, callback, lp, rp, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- const char *name, *subdb;
- void *handle;
- int (*callback) __P((void *, const void *));
- void *lp, *rp;
- u_int32_t flags;
-{
- DB_FH *fhp;
- ENV *env;
- VRFY_DBINFO *vdp;
- u_int32_t sflags;
- int has_subdbs, isbad, ret, t_ret;
- char *real_name;
-
- env = dbp->env;
- fhp = NULL;
- vdp = NULL;
- real_name = NULL;
- has_subdbs = isbad = ret = t_ret = 0;
-
- F_SET(dbp, DB_AM_VERIFYING);
-
- /* Initialize any feedback function. */
- if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL)
- dbp->db_feedback(dbp, DB_VERIFY, 0);
-
- /*
- * We don't know how large the cache is, and if the database
- * in question uses a small page size--which we don't know
- * yet!--it may be uncomfortably small for the default page
- * size [#2143]. However, the things we need temporary
- * databases for in dbinfo are largely tiny, so using a
- * 1024-byte pagesize is probably not going to be a big hit,
- * and will make us fit better into small spaces.
- */
- if ((ret = __db_vrfy_dbinfo_create(env, ip, 1024, &vdp)) != 0)
- goto err;
-
- /*
- * Note whether the user has requested that we use printable
- * chars where possible. We won't get here with this flag if
- * we're not salvaging.
- */
- if (LF_ISSET(DB_PRINTABLE))
- F_SET(vdp, SALVAGE_PRINTABLE);
-
- /* Find the real name of the file. */
- if ((ret = __db_appname(env,
- DB_APP_DATA, name, &dbp->dirname, &real_name)) != 0)
- goto err;
-
- /*
- * Our first order of business is to verify page 0, which is
- * the metadata page for the master database of subdatabases
- * or of the only database in the file. We want to do this by hand
- * rather than just calling __db_open in case it's corrupt--various
- * things in __db_open might act funny.
- *
- * Once we know the metadata page is healthy, I believe that it's
- * safe to open the database normally and then use the page swapping
- * code, which makes life easier.
- */
- if ((ret = __os_open(env, real_name, 0, DB_OSO_RDONLY, 0, &fhp)) != 0)
- goto err;
-
- /* Verify the metadata page 0; set pagesize and type. */
- if ((ret = __db_vrfy_pagezero(dbp, vdp, fhp, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
-
- /*
- * We can assume at this point that dbp->pagesize and dbp->type are
- * set correctly, or at least as well as they can be, and that
- * locking, logging, and txns are not in use. Thus we can trust
- * the memp code not to look at the page, and thus to be safe
- * enough to use.
- *
- * The dbp is not open, but the file is open in the fhp, and we
- * cannot assume that __db_open is safe. Call __env_setup,
- * the [safe] part of __db_open that initializes the environment--
- * and the mpool--manually.
- */
- if ((ret = __env_setup(dbp, NULL,
- name, subdb, TXN_INVALID, DB_ODDFILESIZE | DB_RDONLY)) != 0)
- goto err;
-
- /*
- * Set our name in the Queue subsystem; we may need it later
- * to deal with extents.
- */
- if (dbp->type == DB_QUEUE &&
- (ret = __qam_set_ext_data(dbp, name)) != 0)
- goto err;
-
- /* Mark the dbp as opened, so that we correctly handle its close. */
- F_SET(dbp, DB_AM_OPEN_CALLED);
-
- /* Find out the page number of the last page in the database. */
- if ((ret = __memp_get_last_pgno(dbp->mpf, &vdp->last_pgno)) != 0)
- goto err;
-
- /*
- * DB_ORDERCHKONLY is a special case; our file consists of
- * several subdatabases, which use different hash, bt_compare,
- * and/or dup_compare functions. Consequently, we couldn't verify
- * sorting and hashing simply by calling DB->verify() on the file.
- * DB_ORDERCHKONLY allows us to come back and check those things; it
- * requires a subdatabase, and assumes that everything but that
- * database's sorting/hashing is correct.
- */
- if (LF_ISSET(DB_ORDERCHKONLY)) {
- ret = __db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags);
- goto done;
- }
-
- sflags = flags;
- if (dbp->p_internal != NULL)
- LF_CLR(DB_SALVAGE);
-
- /*
- * When salvaging, we use a db to keep track of whether we've seen a
- * given overflow or dup page in the course of traversing normal data.
- * If in the end we have not, we assume its key got lost and print it
- * with key "UNKNOWN".
- */
- if (LF_ISSET(DB_SALVAGE)) {
- if ((ret = __db_salvage_init(vdp)) != 0)
- goto err;
-
- /*
- * If we're not being aggressive, salvage by walking the tree
- * and only printing the leaves we find. "has_subdbs" will
- * indicate whether we found subdatabases.
- */
- if (!LF_ISSET(DB_AGGRESSIVE) && __db_salvage_all(
- dbp, vdp, handle, callback, flags, &has_subdbs) != 0)
- isbad = 1;
-
- /*
- * If we have subdatabases, flag if any keys are found that
- * don't belong to a subdatabase -- they'll need to have an
- * "__OTHER__" subdatabase header printed first.
- */
- if (has_subdbs) {
- F_SET(vdp, SALVAGE_PRINTHEADER);
- F_SET(vdp, SALVAGE_HASSUBDBS);
- }
- }
-
- /* Walk all the pages, if a page cannot be read, verify structure. */
- if ((ret =
- __db_vrfy_walkpages(dbp, vdp, handle, callback, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else if (ret != DB_PAGE_NOTFOUND)
- goto err;
- }
-
- /* If we're verifying, verify inter-page structure. */
- if (!LF_ISSET(DB_SALVAGE) && isbad == 0)
- if ((t_ret = __db_vrfy_structure(dbp,
- vdp, name, 0, lp, rp, flags)) != 0) {
- if (t_ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
-
- /*
- * If we're salvaging, output with key UNKNOWN any overflow or dup pages
- * we haven't been able to put in context. Then destroy the salvager's
- * state-saving database.
- */
- if (LF_ISSET(DB_SALVAGE)) {
- if ((ret = __db_salvage_unknowns(dbp,
- vdp, handle, callback, flags)) != 0)
- isbad = 1;
- }
-
- flags = sflags;
-
-#ifdef HAVE_PARTITION
- if (t_ret == 0 && dbp->p_internal != NULL)
- t_ret = __part_verify(dbp, vdp, name, handle, callback, flags);
-#endif
-
- if (ret == 0)
- ret = t_ret;
-
- /* Don't display a footer for a database holding other databases. */
- if (LF_ISSET(DB_SALVAGE | DB_VERIFY_PARTITION) == DB_SALVAGE &&
- (!has_subdbs || F_ISSET(vdp, SALVAGE_PRINTFOOTER)))
- (void)__db_prfooter(handle, callback);
-
-done: err:
- /* Send feedback that we're done. */
- if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL)
- dbp->db_feedback(dbp, DB_VERIFY, 100);
-
- if (LF_ISSET(DB_SALVAGE) &&
- (t_ret = __db_salvage_destroy(vdp)) != 0 && ret == 0)
- ret = t_ret;
- if (fhp != NULL &&
- (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
- ret = t_ret;
- if (vdp != NULL &&
- (t_ret = __db_vrfy_dbinfo_destroy(env, vdp)) != 0 && ret == 0)
- ret = t_ret;
- if (real_name != NULL)
- __os_free(env, real_name);
-
- /*
- * DB_VERIFY_FATAL is a private error, translate to a public one.
- *
- * If we didn't find a page, it's probably a page number was corrupted.
- * Return the standard corruption error.
- *
- * Otherwise, if we found corruption along the way, set the return.
- */
- if (ret == DB_VERIFY_FATAL ||
- ret == DB_PAGE_NOTFOUND || (ret == 0 && isbad == 1))
- ret = DB_VERIFY_BAD;
-
- /* Make sure there's a public complaint if we found corruption. */
- if (ret != 0)
- __db_err(env, ret, "%s", name);
-
- return (ret);
-}
-
-/*
- * __db_vrfy_pagezero --
- * Verify the master metadata page. Use seek, read, and a local buffer
- * rather than the DB paging code, for safety.
- *
- * Must correctly (or best-guess) set dbp->type and dbp->pagesize.
- */
-static int
-__db_vrfy_pagezero(dbp, vdp, fhp, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- DB_FH *fhp;
- u_int32_t flags;
-{
- DBMETA *meta;
- ENV *env;
- VRFY_PAGEINFO *pip;
- db_pgno_t freelist;
- size_t nr;
- int isbad, ret, swapped;
- u_int8_t mbuf[DBMETASIZE];
-
- isbad = ret = swapped = 0;
- freelist = 0;
- env = dbp->env;
- meta = (DBMETA *)mbuf;
- dbp->type = DB_UNKNOWN;
-
- if ((ret = __db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0)
- return (ret);
-
- /*
- * Seek to the metadata page.
- * Note that if we're just starting a verification, dbp->pgsize
- * may be zero; this is okay, as we want page zero anyway and
- * 0*0 == 0.
- */
- if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0 ||
- (ret = __os_read(env, fhp, mbuf, DBMETASIZE, &nr)) != 0) {
- __db_err(env, ret,
- "Metadata page %lu cannot be read", (u_long)PGNO_BASE_MD);
- return (ret);
- }
-
- if (nr != DBMETASIZE) {
- EPRINT((env,
- "Page %lu: Incomplete metadata page",
- (u_long)PGNO_BASE_MD));
- return (DB_VERIFY_FATAL);
- }
-
- if ((ret = __db_chk_meta(env, dbp, meta, 1)) != 0) {
- EPRINT((env,
- "Page %lu: metadata page corrupted", (u_long)PGNO_BASE_MD));
- isbad = 1;
- if (ret != -1) {
- EPRINT((env,
- "Page %lu: could not check metadata page",
- (u_long)PGNO_BASE_MD));
- return (DB_VERIFY_FATAL);
- }
- }
-
- /*
- * Check all of the fields that we can.
- *
- * 08-11: Current page number. Must == pgno.
- * Note that endianness doesn't matter--it's zero.
- */
- if (meta->pgno != PGNO_BASE_MD) {
- isbad = 1;
- EPRINT((env, "Page %lu: pgno incorrectly set to %lu",
- (u_long)PGNO_BASE_MD, (u_long)meta->pgno));
- }
-
- /* 12-15: Magic number. Must be one of valid set. */
- if (__db_is_valid_magicno(meta->magic, &dbp->type))
- swapped = 0;
- else {
- M_32_SWAP(meta->magic);
- if (__db_is_valid_magicno(meta->magic,
- &dbp->type))
- swapped = 1;
- else {
- isbad = 1;
- EPRINT((env,
- "Page %lu: bad magic number %lu",
- (u_long)PGNO_BASE_MD, (u_long)meta->magic));
- }
- }
-
- /*
- * 16-19: Version. Must be current; for now, we
- * don't support verification of old versions.
- */
- if (swapped)
- M_32_SWAP(meta->version);
- if ((dbp->type == DB_BTREE &&
- (meta->version > DB_BTREEVERSION ||
- meta->version < DB_BTREEOLDVER)) ||
- (dbp->type == DB_HASH &&
- (meta->version > DB_HASHVERSION ||
- meta->version < DB_HASHOLDVER)) ||
- (dbp->type == DB_QUEUE &&
- (meta->version > DB_QAMVERSION ||
- meta->version < DB_QAMOLDVER))) {
- isbad = 1;
- EPRINT((env,
- "Page %lu: unsupported DB version %lu; extraneous errors may result",
- (u_long)PGNO_BASE_MD, (u_long)meta->version));
- }
-
- /*
- * 20-23: Pagesize. Must be power of two,
- * greater than 512, and less than 64K.
- */
- if (swapped)
- M_32_SWAP(meta->pagesize);
- if (IS_VALID_PAGESIZE(meta->pagesize))
- dbp->pgsize = meta->pagesize;
- else {
- isbad = 1;
- EPRINT((env, "Page %lu: bad page size %lu",
- (u_long)PGNO_BASE_MD, (u_long)meta->pagesize));
-
- /*
- * Now try to settle on a pagesize to use.
- * If the user-supplied one is reasonable,
- * use it; else, guess.
- */
- if (!IS_VALID_PAGESIZE(dbp->pgsize))
- dbp->pgsize = __db_guesspgsize(env, fhp);
- }
-
- /*
- * 25: Page type. Must be correct for dbp->type,
- * which is by now set as well as it can be.
- */
- /* Needs no swapping--only one byte! */
- if ((dbp->type == DB_BTREE && meta->type != P_BTREEMETA) ||
- (dbp->type == DB_HASH && meta->type != P_HASHMETA) ||
- (dbp->type == DB_QUEUE && meta->type != P_QAMMETA)) {
- isbad = 1;
- EPRINT((env, "Page %lu: bad page type %lu",
- (u_long)PGNO_BASE_MD, (u_long)meta->type));
- }
-
- /*
- * 26: Meta-flags.
- */
- if (meta->metaflags != 0) {
- if (FLD_ISSET(meta->metaflags,
- ~(DBMETA_CHKSUM|DBMETA_PART_RANGE|DBMETA_PART_CALLBACK))) {
- isbad = 1;
- EPRINT((env,
- "Page %lu: bad meta-data flags value %#lx",
- (u_long)PGNO_BASE_MD, (u_long)meta->metaflags));
- }
- if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
- F_SET(pip, VRFY_HAS_CHKSUM);
- if (FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE))
- F_SET(pip, VRFY_HAS_PART_RANGE);
- if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK))
- F_SET(pip, VRFY_HAS_PART_CALLBACK);
-
- if (FLD_ISSET(meta->metaflags,
- DBMETA_PART_RANGE | DBMETA_PART_CALLBACK) &&
- (ret = __partition_init(dbp, meta->metaflags)) != 0)
- return (ret);
- }
-
- /*
- * 28-31: Free list page number.
- * 32-35: Last page in database file.
- * We'll verify its sensibility when we do inter-page
- * verification later; for now, just store it.
- */
- if (swapped)
- M_32_SWAP(meta->free);
- freelist = meta->free;
- if (swapped)
- M_32_SWAP(meta->last_pgno);
- vdp->meta_last_pgno = meta->last_pgno;
-
- /*
- * Initialize vdp->pages to fit a single pageinfo structure for
- * this one page. We'll realloc later when we know how many
- * pages there are.
- */
- pip->pgno = PGNO_BASE_MD;
- pip->type = meta->type;
-
- /*
- * Signal that we still have to check the info specific to
- * a given type of meta page.
- */
- F_SET(pip, VRFY_INCOMPLETE);
-
- pip->free = freelist;
-
- if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
- return (ret);
-
- /* Set up the dbp's fileid. We don't use the regular open path. */
- memcpy(dbp->fileid, meta->uid, DB_FILE_ID_LEN);
-
- if (swapped == 1)
- F_SET(dbp, DB_AM_SWAP);
-
- return (isbad ? DB_VERIFY_BAD : 0);
-}
-
-/*
- * __db_vrfy_walkpages --
- * Main loop of the verifier/salvager. Walks through,
- * page by page, and verifies all pages and/or prints all data pages.
- */
-static int
-__db_vrfy_walkpages(dbp, vdp, handle, callback, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- void *handle;
- int (*callback) __P((void *, const void *));
- u_int32_t flags;
-{
- DB_MPOOLFILE *mpf;
- ENV *env;
- PAGE *h;
- VRFY_PAGEINFO *pip;
- db_pgno_t i;
- int ret, t_ret, isbad;
-
- env = dbp->env;
- mpf = dbp->mpf;
- h = NULL;
- ret = isbad = t_ret = 0;
-
- for (i = 0; i <= vdp->last_pgno; i++) {
- /*
- * If DB_SALVAGE is set, we inspect our database of completed
- * pages, and skip any we've already printed in the subdb pass.
- */
- if (LF_ISSET(DB_SALVAGE) && (__db_salvage_isdone(vdp, i) != 0))
- continue;
-
- /*
- * An individual page get can fail if:
- * * This is a hash database, it is expected to find
- * empty buckets, which don't have allocated pages. Create
- * a dummy page so the verification can proceed.
- * * We are salvaging, flag the error and continue.
- */
- if ((t_ret = __memp_fget(mpf, &i,
- vdp->thread_info, NULL, 0, &h)) != 0) {
- if (dbp->type == DB_HASH) {
- if ((t_ret =
- __db_vrfy_getpageinfo(vdp, i, &pip)) != 0)
- goto err1;
- pip->type = P_INVALID;
- pip->pgno = i;
- F_CLR(pip, VRFY_IS_ALLZEROES);
- if ((t_ret = __db_vrfy_putpageinfo(
- env, vdp, pip)) != 0)
- goto err1;
- continue;
- }
- if (t_ret == DB_PAGE_NOTFOUND) {
- EPRINT((env,
- "Page %lu: beyond the end of the file, metadata page has last page as %lu",
- (u_long)i, (u_long)vdp->last_pgno));
- if (ret == 0)
- return (t_ret);
- }
-
-err1: if (ret == 0)
- ret = t_ret;
- if (LF_ISSET(DB_SALVAGE))
- continue;
- return (ret);
- }
-
- if (LF_ISSET(DB_SALVAGE)) {
- /*
- * We pretty much don't want to quit unless a
- * bomb hits. May as well return that something
- * was screwy, however.
- */
- if ((t_ret = __db_salvage_pg(dbp,
- vdp, i, h, handle, callback, flags)) != 0) {
- if (ret == 0)
- ret = t_ret;
- isbad = 1;
- }
- } else {
- /*
- * If we are not salvaging, and we get any error
- * other than DB_VERIFY_BAD, return immediately;
- * it may not be safe to proceed. If we get
- * DB_VERIFY_BAD, keep going; listing more errors
- * may make it easier to diagnose problems and
- * determine the magnitude of the corruption.
- *
- * Verify info common to all page types.
- */
- if (i != PGNO_BASE_MD) {
- ret = __db_vrfy_common(dbp, vdp, h, i, flags);
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else if (ret != 0)
- goto err;
- }
-
- switch (TYPE(h)) {
- case P_INVALID:
- ret = __db_vrfy_invalid(dbp, vdp, h, i, flags);
- break;
- case __P_DUPLICATE:
- isbad = 1;
- EPRINT((env,
- "Page %lu: old-style duplicate page",
- (u_long)i));
- break;
- case P_HASH_UNSORTED:
- case P_HASH:
- ret = __ham_vrfy(dbp, vdp, h, i, flags);
- break;
- case P_IBTREE:
- case P_IRECNO:
- case P_LBTREE:
- case P_LDUP:
- ret = __bam_vrfy(dbp, vdp, h, i, flags);
- break;
- case P_LRECNO:
- ret = __ram_vrfy_leaf(dbp, vdp, h, i, flags);
- break;
- case P_OVERFLOW:
- ret = __db_vrfy_overflow(dbp, vdp, h, i, flags);
- break;
- case P_HASHMETA:
- ret = __ham_vrfy_meta(dbp,
- vdp, (HMETA *)h, i, flags);
- break;
- case P_BTREEMETA:
- ret = __bam_vrfy_meta(dbp,
- vdp, (BTMETA *)h, i, flags);
- break;
- case P_QAMMETA:
- ret = __qam_vrfy_meta(dbp,
- vdp, (QMETA *)h, i, flags);
- break;
- case P_QAMDATA:
- ret = __qam_vrfy_data(dbp,
- vdp, (QPAGE *)h, i, flags);
- break;
- default:
- EPRINT((env,
- "Page %lu: unknown page type %lu",
- (u_long)i, (u_long)TYPE(h)));
- isbad = 1;
- break;
- }
-
- /*
- * Set up error return.
- */
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else if (ret != 0)
- goto err;
-
- /*
- * Provide feedback to the application about our
- * progress. The range 0-50% comes from the fact
- * that this is the first of two passes through the
- * database (front-to-back, then top-to-bottom).
- */
- if (dbp->db_feedback != NULL)
- dbp->db_feedback(dbp, DB_VERIFY,
- (int)((i + 1) * 50 / (vdp->last_pgno + 1)));
- }
-
- /*
- * Just as with the page get, bail if and only if we're
- * not salvaging.
- */
- if ((t_ret = __memp_fput(mpf,
- vdp->thread_info, h, dbp->priority)) != 0) {
- if (ret == 0)
- ret = t_ret;
- if (!LF_ISSET(DB_SALVAGE))
- return (ret);
- }
- }
-
- /*
- * If we've seen a Queue metadata page, we may need to walk Queue
- * extent pages that won't show up between 0 and vdp->last_pgno.
- */
- if (F_ISSET(vdp, VRFY_QMETA_SET) && (t_ret =
- __qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags)) != 0) {
- if (ret == 0)
- ret = t_ret;
- if (t_ret == DB_VERIFY_BAD)
- isbad = 1;
- else if (!LF_ISSET(DB_SALVAGE))
- return (ret);
- }
-
- if (0) {
-err: if (h != NULL && (t_ret = __memp_fput(mpf,
- vdp->thread_info, h, dbp->priority)) != 0)
- return (ret == 0 ? t_ret : ret);
- }
-
- return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
-}
-
-/*
- * __db_vrfy_structure--
- * After a beginning-to-end walk through the database has been
- * completed, put together the information that has been collected
- * to verify the overall database structure.
- *
- * Should only be called if we want to do a database verification,
- * i.e. if DB_SALVAGE is not set.
- */
-static int
-__db_vrfy_structure(dbp, vdp, dbname, meta_pgno, lp, rp, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- const char *dbname;
- db_pgno_t meta_pgno;
- void *lp, *rp;
- u_int32_t flags;
-{
- DB *pgset;
- ENV *env;
- VRFY_PAGEINFO *pip;
- db_pgno_t i;
- int ret, isbad, hassubs, p;
-
- isbad = 0;
- pip = NULL;
- env = dbp->env;
- pgset = vdp->pgset;
-
- /*
- * Providing feedback here is tricky; in most situations,
- * we fetch each page one more time, but we do so in a top-down
- * order that depends on the access method. Worse, we do this
- * recursively in btree, such that on any call where we're traversing
- * a subtree we don't know where that subtree is in the whole database;
- * worse still, any given database may be one of several subdbs.
- *
- * The solution is to decrement a counter vdp->pgs_remaining each time
- * we verify (and call feedback on) a page. We may over- or
- * under-count, but the structure feedback function will ensure that we
- * never give a percentage under 50 or over 100. (The first pass
- * covered the range 0-50%.)
- */
- if (dbp->db_feedback != NULL)
- vdp->pgs_remaining = vdp->last_pgno + 1;
-
- /*
- * Call the appropriate function to downwards-traverse the db type.
- */
- switch (dbp->type) {
- case DB_BTREE:
- case DB_RECNO:
- if ((ret =
- __bam_vrfy_structure(dbp, vdp, 0, lp, rp, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
-
- /*
- * If we have subdatabases and we know that the database is,
- * thus far, sound, it's safe to walk the tree of subdatabases.
- * Do so, and verify the structure of the databases within.
- */
- if ((ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) != 0)
- goto err;
- hassubs = F_ISSET(pip, VRFY_HAS_SUBDBS) ? 1 : 0;
- if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
- goto err;
- pip = NULL;
-
- if (isbad == 0 && hassubs)
- if ((ret =
- __db_vrfy_subdbs(dbp, vdp, dbname, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
- break;
- case DB_HASH:
- if ((ret = __ham_vrfy_structure(dbp, vdp, 0, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
- break;
- case DB_QUEUE:
- if ((ret = __qam_vrfy_structure(dbp, vdp, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- }
-
- /*
- * Queue pages may be unreferenced and totally zeroed, if
- * they're empty; queue doesn't have much structure, so
- * this is unlikely to be wrong in any troublesome sense.
- * Skip to "err".
- */
- goto err;
- case DB_UNKNOWN:
- default:
- ret = __db_unknown_path(env, "__db_vrfy_structure");
- goto err;
- }
-
- /* Walk free list. */
- if ((ret =
- __db_vrfy_freelist(dbp, vdp, meta_pgno, flags)) == DB_VERIFY_BAD)
- isbad = 1;
-
- /*
- * If structure checks up until now have failed, it's likely that
- * checking what pages have been missed will result in oodles of
- * extraneous error messages being EPRINTed. Skip to the end
- * if this is the case; we're going to be printing at least one
- * error anyway, and probably all the more salient ones.
- */
- if (ret != 0 || isbad == 1)
- goto err;
-
- /*
- * Make sure no page has been missed and that no page is still marked
- * "all zeroes" (only certain hash pages can be, and they're unmarked
- * in __ham_vrfy_structure).
- */
- for (i = 0; i < vdp->last_pgno + 1; i++) {
- if ((ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0)
- goto err;
- if ((ret = __db_vrfy_pgset_get(pgset,
- vdp->thread_info, i, &p)) != 0)
- goto err;
- if (pip->type == P_OVERFLOW) {
- if ((u_int32_t)p != pip->refcount) {
- EPRINT((env,
- "Page %lu: overflow refcount %lu, referenced %lu times",
- (u_long)i,
- (u_long)pip->refcount, (u_long)p));
- isbad = 1;
- }
- } else if (p == 0 &&
-#ifndef HAVE_FTRUNCATE
- !(i > vdp->meta_last_pgno &&
- (F_ISSET(pip, VRFY_IS_ALLZEROES) || pip->type == P_HASH)) &&
-#endif
- !(dbp->type == DB_HASH && pip->type == P_INVALID)) {
- /*
- * It is OK for unreferenced hash buckets to be
- * marked invalid and unreferenced.
- */
- EPRINT((env,
- "Page %lu: unreferenced page", (u_long)i));
- isbad = 1;
- }
-
- if (F_ISSET(pip, VRFY_IS_ALLZEROES)
-#ifndef HAVE_FTRUNCATE
- && i <= vdp->meta_last_pgno
-#endif
- ) {
- EPRINT((env,
- "Page %lu: totally zeroed page", (u_long)i));
- isbad = 1;
- }
- if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
- goto err;
- pip = NULL;
- }
-
-err: if (pip != NULL)
- (void)__db_vrfy_putpageinfo(env, vdp, pip);
-
- return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
-}
-
-/*
- * __db_is_valid_magicno
- */
-static int
-__db_is_valid_magicno(magic, typep)
- u_int32_t magic;
- DBTYPE *typep;
-{
- switch (magic) {
- case DB_BTREEMAGIC:
- *typep = DB_BTREE;
- return (1);
- case DB_HASHMAGIC:
- *typep = DB_HASH;
- return (1);
- case DB_QAMMAGIC:
- *typep = DB_QUEUE;
- return (1);
- default:
- break;
- }
- *typep = DB_UNKNOWN;
- return (0);
-}
-
-/*
- * __db_vrfy_common --
- * Verify info common to all page types.
- *
- * PUBLIC: int __db_vrfy_common
- * PUBLIC: __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
- */
-int
-__db_vrfy_common(dbp, vdp, h, pgno, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- PAGE *h;
- db_pgno_t pgno;
- u_int32_t flags;
-{
- ENV *env;
- VRFY_PAGEINFO *pip;
- int ret, t_ret;
- u_int8_t *p;
-
- env = dbp->env;
-
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
-
- pip->pgno = pgno;
- F_CLR(pip, VRFY_IS_ALLZEROES);
-
- /*
- * Hash expands the table by leaving some pages between the
- * old last and the new last totally zeroed. These pages may
- * not be all zero if they were used, freed and then reallocated.
- *
- * Queue will create sparse files if sparse record numbers are used.
- */
- if (pgno != 0 && PGNO(h) == 0) {
- F_SET(pip, VRFY_IS_ALLZEROES);
- for (p = (u_int8_t *)h; p < (u_int8_t *)h + dbp->pgsize; p++)
- if (*p != 0) {
- F_CLR(pip, VRFY_IS_ALLZEROES);
- break;
- }
- /*
- * Mark it as a hash, and we'll
- * check that that makes sense structurally later.
- * (The queue verification doesn't care, since queues
- * don't really have much in the way of structure.)
- */
- pip->type = P_HASH;
- ret = 0;
- goto err; /* well, not really an err. */
- }
-
- if (PGNO(h) != pgno) {
- EPRINT((env, "Page %lu: bad page number %lu",
- (u_long)pgno, (u_long)h->pgno));
- ret = DB_VERIFY_BAD;
- }
-
- switch (h->type) {
- case P_INVALID: /* Order matches ordinal value. */
- case P_HASH_UNSORTED:
- case P_IBTREE:
- case P_IRECNO:
- case P_LBTREE:
- case P_LRECNO:
- case P_OVERFLOW:
- case P_HASHMETA:
- case P_BTREEMETA:
- case P_QAMMETA:
- case P_QAMDATA:
- case P_LDUP:
- case P_HASH:
- break;
- default:
- EPRINT((env, "Page %lu: bad page type %lu",
- (u_long)pgno, (u_long)h->type));
- ret = DB_VERIFY_BAD;
- }
- pip->type = h->type;
-
-err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __db_vrfy_invalid --
- * Verify P_INVALID page.
- * (Yes, there's not much to do here.)
- */
-static int
-__db_vrfy_invalid(dbp, vdp, h, pgno, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- PAGE *h;
- db_pgno_t pgno;
- u_int32_t flags;
-{
- ENV *env;
- VRFY_PAGEINFO *pip;
- int ret, t_ret;
-
- env = dbp->env;
-
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
- pip->next_pgno = pip->prev_pgno = 0;
-
- if (!IS_VALID_PGNO(NEXT_PGNO(h))) {
- EPRINT((env, "Page %lu: invalid next_pgno %lu",
- (u_long)pgno, (u_long)NEXT_PGNO(h)));
- ret = DB_VERIFY_BAD;
- } else
- pip->next_pgno = NEXT_PGNO(h);
-
- if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
-}
-
-/*
- * __db_vrfy_datapage --
- * Verify elements common to data pages (P_HASH, P_LBTREE,
- * P_IBTREE, P_IRECNO, P_LRECNO, P_OVERFLOW, P_DUPLICATE)--i.e.,
- * those defined in the PAGE structure.
- *
- * Called from each of the per-page routines, after the
- * all-page-type-common elements of pip have been verified and filled
- * in.
- *
- * PUBLIC: int __db_vrfy_datapage
- * PUBLIC: __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
- */
-int
-__db_vrfy_datapage(dbp, vdp, h, pgno, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- PAGE *h;
- db_pgno_t pgno;
- u_int32_t flags;
-{
- ENV *env;
- VRFY_PAGEINFO *pip;
- u_int32_t smallest_entry;
- int isbad, ret, t_ret;
-
- env = dbp->env;
-
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
- isbad = 0;
-
- /*
- * prev_pgno and next_pgno: store for inter-page checks,
- * verify that they point to actual pages and not to self.
- *
- * !!!
- * Internal btree pages do not maintain these fields (indeed,
- * they overload them). Skip.
- */
- if (TYPE(h) != P_IBTREE && TYPE(h) != P_IRECNO) {
- if (!IS_VALID_PGNO(PREV_PGNO(h)) || PREV_PGNO(h) == pip->pgno) {
- isbad = 1;
- EPRINT((env, "Page %lu: invalid prev_pgno %lu",
- (u_long)pip->pgno, (u_long)PREV_PGNO(h)));
- }
- if (!IS_VALID_PGNO(NEXT_PGNO(h)) || NEXT_PGNO(h) == pip->pgno) {
- isbad = 1;
- EPRINT((env, "Page %lu: invalid next_pgno %lu",
- (u_long)pip->pgno, (u_long)NEXT_PGNO(h)));
- }
- pip->prev_pgno = PREV_PGNO(h);
- pip->next_pgno = NEXT_PGNO(h);
- }
-
- /*
- * Verify the number of entries on the page: there's no good way to
- * determine if this is accurate. The best we can do is verify that
- * it's not more than can, in theory, fit on the page. Then, we make
- * sure there are at least this many valid elements in inp[], and
- * hope the test catches most cases.
- */
- switch (TYPE(h)) {
- case P_HASH_UNSORTED:
- case P_HASH:
- smallest_entry = HKEYDATA_PSIZE(0);
- break;
- case P_IBTREE:
- smallest_entry = BINTERNAL_PSIZE(0);
- break;
- case P_IRECNO:
- smallest_entry = RINTERNAL_PSIZE;
- break;
- case P_LBTREE:
- case P_LDUP:
- case P_LRECNO:
- smallest_entry = BKEYDATA_PSIZE(0);
- break;
- default:
- smallest_entry = 0;
- break;
- }
- if (smallest_entry * NUM_ENT(h) / 2 > dbp->pgsize) {
- isbad = 1;
- EPRINT((env, "Page %lu: too many entries: %lu",
- (u_long)pgno, (u_long)NUM_ENT(h)));
- }
-
- if (TYPE(h) != P_OVERFLOW)
- pip->entries = NUM_ENT(h);
-
- /*
- * btree level. Should be zero unless we're a btree;
- * if we are a btree, should be between LEAFLEVEL and MAXBTREELEVEL,
- * and we need to save it off.
- */
- switch (TYPE(h)) {
- case P_IBTREE:
- case P_IRECNO:
- if (LEVEL(h) < LEAFLEVEL + 1) {
- isbad = 1;
- EPRINT((env, "Page %lu: bad btree level %lu",
- (u_long)pgno, (u_long)LEVEL(h)));
- }
- pip->bt_level = LEVEL(h);
- break;
- case P_LBTREE:
- case P_LDUP:
- case P_LRECNO:
- if (LEVEL(h) != LEAFLEVEL) {
- isbad = 1;
- EPRINT((env,
- "Page %lu: btree leaf page has incorrect level %lu",
- (u_long)pgno, (u_long)LEVEL(h)));
- }
- break;
- default:
- if (LEVEL(h) != 0) {
- isbad = 1;
- EPRINT((env,
- "Page %lu: nonzero level %lu in non-btree database",
- (u_long)pgno, (u_long)LEVEL(h)));
- }
- break;
- }
-
- /*
- * Even though inp[] occurs in all PAGEs, we look at it in the
- * access-method-specific code, since btree and hash treat
- * item lengths very differently, and one of the most important
- * things we want to verify is that the data--as specified
- * by offset and length--cover the right part of the page
- * without overlaps, gaps, or violations of the page boundary.
- */
- if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
-
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
-}
-
-/*
- * __db_vrfy_meta--
- * Verify the access-method common parts of a meta page, using
- * normal mpool routines.
- *
- * PUBLIC: int __db_vrfy_meta
- * PUBLIC: __P((DB *, VRFY_DBINFO *, DBMETA *, db_pgno_t, u_int32_t));
- */
-int
-__db_vrfy_meta(dbp, vdp, meta, pgno, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- DBMETA *meta;
- db_pgno_t pgno;
- u_int32_t flags;
-{
- DBTYPE dbtype, magtype;
- ENV *env;
- VRFY_PAGEINFO *pip;
- int isbad, ret, t_ret;
-
- isbad = 0;
- env = dbp->env;
-
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
-
- /* type plausible for a meta page */
- switch (meta->type) {
- case P_BTREEMETA:
- dbtype = DB_BTREE;
- break;
- case P_HASHMETA:
- dbtype = DB_HASH;
- break;
- case P_QAMMETA:
- dbtype = DB_QUEUE;
- break;
- default:
- ret = __db_unknown_path(env, "__db_vrfy_meta");
- goto err;
- }
-
- /* magic number valid */
- if (!__db_is_valid_magicno(meta->magic, &magtype)) {
- isbad = 1;
- EPRINT((env,
- "Page %lu: invalid magic number", (u_long)pgno));
- }
- if (magtype != dbtype) {
- isbad = 1;
- EPRINT((env,
- "Page %lu: magic number does not match database type",
- (u_long)pgno));
- }
-
- /* version */
- if ((dbtype == DB_BTREE &&
- (meta->version > DB_BTREEVERSION ||
- meta->version < DB_BTREEOLDVER)) ||
- (dbtype == DB_HASH &&
- (meta->version > DB_HASHVERSION ||
- meta->version < DB_HASHOLDVER)) ||
- (dbtype == DB_QUEUE &&
- (meta->version > DB_QAMVERSION ||
- meta->version < DB_QAMOLDVER))) {
- isbad = 1;
- EPRINT((env,
- "Page %lu: unsupported database version %lu; extraneous errors may result",
- (u_long)pgno, (u_long)meta->version));
- }
-
- /* pagesize */
- if (meta->pagesize != dbp->pgsize) {
- isbad = 1;
- EPRINT((env, "Page %lu: invalid pagesize %lu",
- (u_long)pgno, (u_long)meta->pagesize));
- }
-
- /* Flags */
- if (meta->metaflags != 0) {
- if (FLD_ISSET(meta->metaflags,
- ~(DBMETA_CHKSUM|DBMETA_PART_RANGE|DBMETA_PART_CALLBACK))) {
- isbad = 1;
- EPRINT((env,
- "Page %lu: bad meta-data flags value %#lx",
- (u_long)PGNO_BASE_MD, (u_long)meta->metaflags));
- }
- if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM))
- F_SET(pip, VRFY_HAS_CHKSUM);
- if (FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE))
- F_SET(pip, VRFY_HAS_PART_RANGE);
- if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK))
- F_SET(pip, VRFY_HAS_PART_CALLBACK);
- }
-
- /*
- * Free list.
- *
- * If this is not the main, master-database meta page, it
- * should not have a free list.
- */
- if (pgno != PGNO_BASE_MD && meta->free != PGNO_INVALID) {
- isbad = 1;
- EPRINT((env,
- "Page %lu: nonempty free list on subdatabase metadata page",
- (u_long)pgno));
- }
-
- /* Can correctly be PGNO_INVALID--that's just the end of the list. */
- if (meta->free != PGNO_INVALID && IS_VALID_PGNO(meta->free))
- pip->free = meta->free;
- else if (!IS_VALID_PGNO(meta->free)) {
- isbad = 1;
- EPRINT((env,
- "Page %lu: nonsensical free list pgno %lu",
- (u_long)pgno, (u_long)meta->free));
- }
-
- /*
- * Check that the meta page agrees with what we got from mpool.
- * If we don't have FTRUNCATE then mpool could include some
- * zeroed pages at the end of the file, we assume the meta page
- * is correct.
- */
- if (pgno == PGNO_BASE_MD && meta->last_pgno != vdp->last_pgno) {
-#ifdef HAVE_FTRUNCATE
- isbad = 1;
- EPRINT((env,
- "Page %lu: last_pgno is not correct: %lu != %lu",
- (u_long)pgno,
- (u_long)meta->last_pgno, (u_long)vdp->last_pgno));
-#endif
- vdp->meta_last_pgno = meta->last_pgno;
- }
-
- /*
- * We have now verified the common fields of the metadata page.
- * Clear the flag that told us they had been incompletely checked.
- */
- F_CLR(pip, VRFY_INCOMPLETE);
-
-err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
-
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
-}
-
-/*
- * __db_vrfy_freelist --
- * Walk free list, checking off pages and verifying absence of
- * loops.
- */
-static int
-__db_vrfy_freelist(dbp, vdp, meta, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- db_pgno_t meta;
- u_int32_t flags;
-{
- DB *pgset;
- ENV *env;
- VRFY_PAGEINFO *pip;
- db_pgno_t cur_pgno, next_pgno;
- int p, ret, t_ret;
-
- env = dbp->env;
- pgset = vdp->pgset;
- DB_ASSERT(env, pgset != NULL);
-
- if ((ret = __db_vrfy_getpageinfo(vdp, meta, &pip)) != 0)
- return (ret);
- for (next_pgno = pip->free;
- next_pgno != PGNO_INVALID; next_pgno = pip->next_pgno) {
- cur_pgno = pip->pgno;
- if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
- return (ret);
-
- /* This shouldn't happen, but just in case. */
- if (!IS_VALID_PGNO(next_pgno)) {
- EPRINT((env,
- "Page %lu: invalid next_pgno %lu on free list page",
- (u_long)cur_pgno, (u_long)next_pgno));
- return (DB_VERIFY_BAD);
- }
-
- /* Detect cycles. */
- if ((ret = __db_vrfy_pgset_get(pgset,
- vdp->thread_info, next_pgno, &p)) != 0)
- return (ret);
- if (p != 0) {
- EPRINT((env,
- "Page %lu: page %lu encountered a second time on free list",
- (u_long)cur_pgno, (u_long)next_pgno));
- return (DB_VERIFY_BAD);
- }
- if ((ret = __db_vrfy_pgset_inc(pgset,
- vdp->thread_info, next_pgno)) != 0)
- return (ret);
-
- if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0)
- return (ret);
-
- if (pip->type != P_INVALID) {
- EPRINT((env,
- "Page %lu: non-invalid page %lu on free list",
- (u_long)cur_pgno, (u_long)next_pgno));
- ret = DB_VERIFY_BAD; /* unsafe to continue */
- break;
- }
- }
-
- if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
- ret = t_ret;
- return (ret);
-}
-
-/*
- * __db_vrfy_subdbs --
- * Walk the known-safe master database of subdbs with a cursor,
- * verifying the structure of each subdatabase we encounter.
- */
-static int
-__db_vrfy_subdbs(dbp, vdp, dbname, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- const char *dbname;
- u_int32_t flags;
-{
- DB *mdbp;
- DBC *dbc;
- DBT key, data;
- ENV *env;
- VRFY_PAGEINFO *pip;
- db_pgno_t meta_pgno;
- int ret, t_ret, isbad;
- u_int8_t type;
-
- isbad = 0;
- dbc = NULL;
- env = dbp->env;
-
- if ((ret = __db_master_open(dbp,
- vdp->thread_info, NULL, dbname, DB_RDONLY, 0, &mdbp)) != 0)
- return (ret);
-
- if ((ret = __db_cursor_int(mdbp, NULL,
- NULL, DB_BTREE, PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0)
- goto err;
-
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
- while ((ret = __dbc_get(dbc, &key, &data, DB_NEXT)) == 0) {
- if (data.size != sizeof(db_pgno_t)) {
- EPRINT((env,
- "Subdatabase entry not page-number size"));
- isbad = 1;
- goto err;
- }
- memcpy(&meta_pgno, data.data, data.size);
- /*
- * Subdatabase meta pgnos are stored in network byte
- * order for cross-endian compatibility. Swap if appropriate.
- */
- DB_NTOHL_SWAP(env, &meta_pgno);
- if (meta_pgno == PGNO_INVALID || meta_pgno > vdp->last_pgno) {
- EPRINT((env,
- "Subdatabase entry references invalid page %lu",
- (u_long)meta_pgno));
- isbad = 1;
- goto err;
- }
- if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &pip)) != 0)
- goto err;
- type = pip->type;
- if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
- goto err;
- switch (type) {
- case P_BTREEMETA:
- if ((ret = __bam_vrfy_structure(
- dbp, vdp, meta_pgno, NULL, NULL, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
- break;
- case P_HASHMETA:
- if ((ret = __ham_vrfy_structure(
- dbp, vdp, meta_pgno, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
- isbad = 1;
- else
- goto err;
- }
- break;
- case P_QAMMETA:
- default:
- EPRINT((env,
- "Subdatabase entry references page %lu of invalid type %lu",
- (u_long)meta_pgno, (u_long)type));
- ret = DB_VERIFY_BAD;
- goto err;
- }
- }
-
- if (ret == DB_NOTFOUND)
- ret = 0;
-
-err: if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- if ((t_ret = __db_close(mdbp, NULL, 0)) != 0 && ret == 0)
- ret = t_ret;
-
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
-}
-
-/*
- * __db_vrfy_struct_feedback --
- * Provide feedback during top-down database structure traversal.
- * (See comment at the beginning of __db_vrfy_structure.)
- *
- * PUBLIC: void __db_vrfy_struct_feedback __P((DB *, VRFY_DBINFO *));
- */
-void
-__db_vrfy_struct_feedback(dbp, vdp)
- DB *dbp;
- VRFY_DBINFO *vdp;
-{
- int progress;
-
- if (dbp->db_feedback == NULL)
- return;
-
- if (vdp->pgs_remaining > 0)
- vdp->pgs_remaining--;
-
- /* Don't allow a feedback call of 100 until we're really done. */
- progress = 100 - (int)(vdp->pgs_remaining * 50 / (vdp->last_pgno + 1));
- dbp->db_feedback(dbp, DB_VERIFY, progress == 100 ? 99 : progress);
-}
-
-/*
- * __db_vrfy_orderchkonly --
- * Do an sort-order/hashing check on a known-otherwise-good subdb.
- */
-static int
-__db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- const char *name, *subdb;
- u_int32_t flags;
-{
- BTMETA *btmeta;
- DB *mdbp, *pgset;
- DBC *pgsc;
- DBT key, data;
- DB_MPOOLFILE *mpf;
- ENV *env;
- HASH *h_internal;
- HMETA *hmeta;
- PAGE *h, *currpg;
- db_pgno_t meta_pgno, p, pgno;
- u_int32_t bucket;
- int t_ret, ret;
-
- pgset = NULL;
- pgsc = NULL;
- env = dbp->env;
- mpf = dbp->mpf;
- currpg = h = NULL;
-
- LF_CLR(DB_NOORDERCHK);
-
- /* Open the master database and get the meta_pgno for the subdb. */
- if ((ret = __db_master_open(dbp,
- vdp->thread_info, NULL, name, DB_RDONLY, 0, &mdbp)) != 0)
- goto err;
-
- DB_INIT_DBT(key, subdb, strlen(subdb));
- memset(&data, 0, sizeof(data));
- if ((ret = __db_get(mdbp,
- vdp->thread_info, NULL, &key, &data, 0)) != 0) {
- if (ret == DB_NOTFOUND)
- ret = ENOENT;
- goto err;
- }
-
- if (data.size != sizeof(db_pgno_t)) {
- EPRINT((env, "Subdatabase entry of invalid size"));
- ret = DB_VERIFY_BAD;
- goto err;
- }
-
- memcpy(&meta_pgno, data.data, data.size);
-
- /*
- * Subdatabase meta pgnos are stored in network byte
- * order for cross-endian compatibility. Swap if appropriate.
- */
- DB_NTOHL_SWAP(env, &meta_pgno);
-
- if ((ret = __memp_fget(mpf,
- &meta_pgno, vdp->thread_info, NULL, 0, &h)) != 0)
- goto err;
-
- if ((ret = __db_vrfy_pgset(env,
- vdp->thread_info, dbp->pgsize, &pgset)) != 0)
- goto err;
-
- switch (TYPE(h)) {
- case P_BTREEMETA:
- btmeta = (BTMETA *)h;
- if (F_ISSET(&btmeta->dbmeta, BTM_RECNO)) {
- /* Recnos have no order to check. */
- ret = 0;
- goto err;
- }
- if ((ret =
- __db_meta2pgset(dbp, vdp, meta_pgno, flags, pgset)) != 0)
- goto err;
- if ((ret = __db_cursor_int(pgset, NULL, NULL, dbp->type,
- PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0)
- goto err;
- while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
- if ((ret = __memp_fget(mpf, &p,
- vdp->thread_info, NULL, 0, &currpg)) != 0)
- goto err;
- if ((ret = __bam_vrfy_itemorder(dbp, NULL,
- vdp->thread_info, currpg, p, NUM_ENT(currpg), 1,
- F_ISSET(&btmeta->dbmeta, BTM_DUP), flags)) != 0)
- goto err;
- if ((ret = __memp_fput(mpf,
- vdp->thread_info, currpg, dbp->priority)) != 0)
- goto err;
- currpg = NULL;
- }
-
- /*
- * The normal exit condition for the loop above is DB_NOTFOUND.
- * If we see that, zero it and continue on to cleanup.
- * Otherwise, it's a real error and will be returned.
- */
- if (ret == DB_NOTFOUND)
- ret = 0;
- break;
- case P_HASHMETA:
- hmeta = (HMETA *)h;
- h_internal = (HASH *)dbp->h_internal;
- /*
- * Make sure h_charkey is right.
- */
- if (h_internal == NULL) {
- EPRINT((env,
- "Page %lu: DB->h_internal field is NULL",
- (u_long)meta_pgno));
- ret = DB_VERIFY_BAD;
- goto err;
- }
- if (h_internal->h_hash == NULL)
- h_internal->h_hash = hmeta->dbmeta.version < 5
- ? __ham_func4 : __ham_func5;
- if (hmeta->h_charkey !=
- h_internal->h_hash(dbp, CHARKEY, sizeof(CHARKEY))) {
- EPRINT((env,
- "Page %lu: incorrect hash function for database",
- (u_long)meta_pgno));
- ret = DB_VERIFY_BAD;
- goto err;
- }
-
- /*
- * Foreach bucket, verify hashing on each page in the
- * corresponding chain of pages.
- */
- if ((ret = __db_cursor_int(dbp, NULL, NULL, dbp->type,
- PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0)
- goto err;
- for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) {
- pgno = BS_TO_PAGE(bucket, hmeta->spares);
- while (pgno != PGNO_INVALID) {
- if ((ret = __memp_fget(mpf, &pgno,
- vdp->thread_info, NULL, 0, &currpg)) != 0)
- goto err;
- if ((ret = __ham_vrfy_hashing(pgsc,
- NUM_ENT(currpg), hmeta, bucket, pgno,
- flags, h_internal->h_hash)) != 0)
- goto err;
- pgno = NEXT_PGNO(currpg);
- if ((ret = __memp_fput(mpf, vdp->thread_info,
- currpg, dbp->priority)) != 0)
- goto err;
- currpg = NULL;
- }
- }
- break;
- default:
- EPRINT((env, "Page %lu: database metapage of bad type %lu",
- (u_long)meta_pgno, (u_long)TYPE(h)));
- ret = DB_VERIFY_BAD;
- break;
- }
-
-err: if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0)
- ret = t_ret;
- if (pgset != NULL &&
- (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret == 0)
- ret = t_ret;
- if (h != NULL && (t_ret = __memp_fput(mpf,
- vdp->thread_info, h, dbp->priority)) != 0)
- ret = t_ret;
- if (currpg != NULL &&
- (t_ret = __memp_fput(mpf,
- vdp->thread_info, currpg, dbp->priority)) != 0)
- ret = t_ret;
- if ((t_ret = __db_close(mdbp, NULL, 0)) != 0)
- ret = t_ret;
- return (ret);
-}
-
-/*
- * __db_salvage_pg --
- * Walk through a page, salvaging all likely or plausible (w/
- * DB_AGGRESSIVE) key/data pairs and marking seen pages in vdp.
- *
- * PUBLIC: int __db_salvage_pg __P((DB *, VRFY_DBINFO *, db_pgno_t,
- * PUBLIC: PAGE *, void *, int (*)(void *, const void *), u_int32_t));
- */
-int
-__db_salvage_pg(dbp, vdp, pgno, h, handle, callback, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- PAGE *h;
- void *handle;
- int (*callback) __P((void *, const void *));
- u_int32_t flags;
-{
- ENV *env;
- VRFY_PAGEINFO *pip;
- int keyflag, ret, t_ret;
-
- env = dbp->env;
- DB_ASSERT(env, LF_ISSET(DB_SALVAGE));
-
- /*
- * !!!
- * We dump record numbers when salvaging Queue databases, but not for
- * immutable Recno databases. The problem is we can't figure out the
- * record number from the database page in the Recno case, while the
- * offset in the file is sufficient for Queue.
- */
- keyflag = 0;
-
- /* If we got this page in the subdb pass, we can safely skip it. */
- if (__db_salvage_isdone(vdp, pgno))
- return (0);
-
- switch (TYPE(h)) {
- case P_BTREEMETA:
- ret = __bam_vrfy_meta(dbp, vdp, (BTMETA *)h, pgno, flags);
- break;
- case P_HASH:
- case P_HASH_UNSORTED:
- case P_LBTREE:
- case P_QAMDATA:
- return (__db_salvage_leaf(dbp,
- vdp, pgno, h, handle, callback, flags));
- case P_HASHMETA:
- ret = __ham_vrfy_meta(dbp, vdp, (HMETA *)h, pgno, flags);
- break;
- case P_IBTREE:
- /*
- * We need to mark any overflow keys on internal pages as seen,
- * so we don't print them out in __db_salvage_unknowns. But if
- * we're an upgraded database, a P_LBTREE page may very well
- * have a reference to the same overflow pages (this practice
- * stopped somewhere around db4.5). To give P_LBTREEs a chance
- * to print out any keys on shared pages, mark the page now and
- * deal with it at the end.
- */
- return (__db_salvage_markneeded(vdp, pgno, SALVAGE_IBTREE));
- case P_LDUP:
- return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LDUP));
- case P_LRECNO:
- /*
- * Recno leaves are tough, because the leaf could be (1) a dup
- * page, or it could be (2) a regular database leaf page.
- * Fortunately, RECNO databases are not allowed to have
- * duplicates.
- *
- * If there are no subdatabases, dump the page immediately if
- * it's a leaf in a RECNO database, otherwise wait and hopefully
- * it will be dumped by the leaf page that refers to it,
- * otherwise we'll get it with the unknowns.
- *
- * If there are subdatabases, there might be mixed types and
- * dbp->type can't be trusted. We'll only get here after
- * salvaging each database, though, so salvaging this page
- * immediately isn't important. If this page is a dup, it might
- * get salvaged later on, otherwise the unknowns pass will pick
- * it up. Note that SALVAGE_HASSUBDBS won't get set if we're
- * salvaging aggressively.
- *
- * If we're salvaging aggressively, we don't know whether or not
- * there's subdatabases, so we wait on all recno pages.
- */
- if (!LF_ISSET(DB_AGGRESSIVE) &&
- !F_ISSET(vdp, SALVAGE_HASSUBDBS) && dbp->type == DB_RECNO)
- return (__db_salvage_leaf(dbp,
- vdp, pgno, h, handle, callback, flags));
- return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LRECNODUP));
- case P_OVERFLOW:
- return (__db_salvage_markneeded(vdp, pgno, SALVAGE_OVERFLOW));
- case P_QAMMETA:
- keyflag = 1;
- ret = __qam_vrfy_meta(dbp, vdp, (QMETA *)h, pgno, flags);
- break;
- case P_INVALID:
- case P_IRECNO:
- case __P_DUPLICATE:
- default:
- /*
- * There's no need to display an error, the page type was
- * already checked and reported on.
- */
- return (0);
- }
- if (ret != 0)
- return (ret);
-
- /*
- * We have to display the dump header if it's a metadata page. It's
- * our last chance as the page was marked "seen" in the vrfy routine,
- * and we won't see the page again. We don't display headers for
- * the first database in a multi-database file, that database simply
- * contains a list of subdatabases.
- */
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
- if (!F_ISSET(pip, VRFY_HAS_SUBDBS) && !LF_ISSET(DB_VERIFY_PARTITION))
- ret = __db_prheader(
- dbp, NULL, 0, keyflag, handle, callback, vdp, pgno);
- if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
-}
-
-/*
- * __db_salvage_leaf --
- * Walk through a leaf, salvaging all likely key/data pairs and marking
- * seen pages in vdp.
- *
- * PUBLIC: int __db_salvage_leaf __P((DB *, VRFY_DBINFO *, db_pgno_t,
- * PUBLIC: PAGE *, void *, int (*)(void *, const void *), u_int32_t));
- */
-int
-__db_salvage_leaf(dbp, vdp, pgno, h, handle, callback, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- PAGE *h;
- void *handle;
- int (*callback) __P((void *, const void *));
- u_int32_t flags;
-{
- ENV *env;
-
- env = dbp->env;
- DB_ASSERT(env, LF_ISSET(DB_SALVAGE));
-
- /* If we got this page in the subdb pass, we can safely skip it. */
- if (__db_salvage_isdone(vdp, pgno))
- return (0);
-
- switch (TYPE(h)) {
- case P_HASH_UNSORTED:
- case P_HASH:
- return (__ham_salvage(dbp, vdp,
- pgno, h, handle, callback, flags));
- case P_LBTREE:
- case P_LRECNO:
- return (__bam_salvage(dbp, vdp,
- pgno, TYPE(h), h, handle, callback, NULL, flags));
- case P_QAMDATA:
- return (__qam_salvage(dbp, vdp,
- pgno, h, handle, callback, flags));
- default:
- /*
- * There's no need to display an error, the page type was
- * already checked and reported on.
- */
- return (0);
- }
-}
-
-/*
- * __db_salvage_unknowns --
- * Walk through the salvager database, printing with key "UNKNOWN"
- * any pages we haven't dealt with.
- */
-static int
-__db_salvage_unknowns(dbp, vdp, handle, callback, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- void *handle;
- int (*callback) __P((void *, const void *));
- u_int32_t flags;
-{
- DBC *dbc;
- DBT unkdbt, key, *dbt;
- DB_MPOOLFILE *mpf;
- ENV *env;
- PAGE *h;
- db_pgno_t pgno;
- u_int32_t pgtype, ovfl_bufsz, tmp_flags;
- int ret, t_ret;
- void *ovflbuf;
-
- dbc = NULL;
- env = dbp->env;
- mpf = dbp->mpf;
-
- DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1);
-
- if ((ret = __os_malloc(env, dbp->pgsize, &ovflbuf)) != 0)
- return (ret);
- ovfl_bufsz = dbp->pgsize;
-
- /*
- * We make two passes -- in the first pass, skip SALVAGE_OVERFLOW
- * pages, because they may be referenced by the standard database
- * pages that we're resolving.
- */
- while ((t_ret =
- __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 1)) == 0) {
- if ((t_ret = __memp_fget(mpf,
- &pgno, vdp->thread_info, NULL, 0, &h)) != 0) {
- if (ret == 0)
- ret = t_ret;
- continue;
- }
-
- dbt = NULL;
- tmp_flags = 0;
- switch (pgtype) {
- case SALVAGE_LDUP:
- case SALVAGE_LRECNODUP:
- dbt = &unkdbt;
- tmp_flags = DB_SA_UNKNOWNKEY;
- /* FALLTHROUGH */
- case SALVAGE_IBTREE:
- case SALVAGE_LBTREE:
- case SALVAGE_LRECNO:
- if ((t_ret = __bam_salvage(
- dbp, vdp, pgno, pgtype, h, handle,
- callback, dbt, tmp_flags | flags)) != 0 && ret == 0)
- ret = t_ret;
- break;
- case SALVAGE_OVERFLOW:
- DB_ASSERT(env, 0); /* Shouldn't ever happen. */
- break;
- case SALVAGE_HASH:
- if ((t_ret = __ham_salvage(dbp, vdp,
- pgno, h, handle, callback, flags)) != 0 && ret == 0)
- ret = t_ret;
- break;
- case SALVAGE_INVALID:
- case SALVAGE_IGNORE:
- default:
- /*
- * Shouldn't happen, but if it does, just do what the
- * nice man says.
- */
- DB_ASSERT(env, 0);
- break;
- }
- if ((t_ret = __memp_fput(mpf,
- vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
- ret = t_ret;
- }
-
- /* We should have reached the end of the database. */
- if (t_ret == DB_NOTFOUND)
- t_ret = 0;
- if (t_ret != 0 && ret == 0)
- ret = t_ret;
-
- /* Re-open the cursor so we traverse the database again. */
- if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
- dbc = NULL;
-
- /* Now, deal with any remaining overflow pages. */
- while ((t_ret =
- __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 0)) == 0) {
- if ((t_ret = __memp_fget(mpf,
- &pgno, vdp->thread_info, NULL, 0, &h)) != 0) {
- if (ret == 0)
- ret = t_ret;
- continue;
- }
-
- switch (pgtype) {
- case SALVAGE_OVERFLOW:
- /*
- * XXX:
- * This may generate multiple "UNKNOWN" keys in
- * a database with no dups. What to do?
- */
- if ((t_ret = __db_safe_goff(dbp, vdp,
- pgno, &key, &ovflbuf, &ovfl_bufsz, flags)) != 0 ||
- ((vdp->type == DB_BTREE || vdp->type == DB_HASH) &&
- (t_ret = __db_vrfy_prdbt(&unkdbt,
- 0, " ", handle, callback, 0, vdp)) != 0) ||
- (t_ret = __db_vrfy_prdbt(
- &key, 0, " ", handle, callback, 0, vdp)) != 0)
- if (ret == 0)
- ret = t_ret;
- break;
- default:
- DB_ASSERT(env, 0); /* Shouldn't ever happen. */
- break;
- }
- if ((t_ret = __memp_fput(mpf,
- vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
- ret = t_ret;
- }
-
- /* We should have reached the end of the database. */
- if (t_ret == DB_NOTFOUND)
- t_ret = 0;
- if (t_ret != 0 && ret == 0)
- ret = t_ret;
-
- if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- __os_free(env, ovflbuf);
-
- return (ret);
-}
-
-/*
- * Offset of the ith inp array entry, which we can compare to the offset
- * the entry stores.
- */
-#define INP_OFFSET(dbp, h, i) \
- ((db_indx_t)((u_int8_t *)((P_INP(dbp,(h))) + (i)) - (u_int8_t *)(h)))
-
-/*
- * __db_vrfy_inpitem --
- * Verify that a single entry in the inp array is sane, and update
- * the high water mark and current item offset. (The former of these is
- * used for state information between calls, and is required; it must
- * be initialized to the pagesize before the first call.)
- *
- * Returns DB_VERIFY_FATAL if inp has collided with the data,
- * since verification can't continue from there; returns DB_VERIFY_BAD
- * if anything else is wrong.
- *
- * PUBLIC: int __db_vrfy_inpitem __P((DB *, PAGE *,
- * PUBLIC: db_pgno_t, u_int32_t, int, u_int32_t, u_int32_t *, u_int32_t *));
- */
-int
-__db_vrfy_inpitem(dbp, h, pgno, i, is_btree, flags, himarkp, offsetp)
- DB *dbp;
- PAGE *h;
- db_pgno_t pgno;
- u_int32_t i;
- int is_btree;
- u_int32_t flags, *himarkp, *offsetp;
-{
- BKEYDATA *bk;
- ENV *env;
- db_indx_t *inp, offset, len;
-
- env = dbp->env;
-
- DB_ASSERT(env, himarkp != NULL);
- inp = P_INP(dbp, h);
-
- /*
- * Check that the inp array, which grows from the beginning of the
- * page forward, has not collided with the data, which grow from the
- * end of the page backward.
- */
- if (inp + i >= (db_indx_t *)((u_int8_t *)h + *himarkp)) {
- /* We've collided with the data. We need to bail. */
- EPRINT((env, "Page %lu: entries listing %lu overlaps data",
- (u_long)pgno, (u_long)i));
- return (DB_VERIFY_FATAL);
- }
-
- offset = inp[i];
-
- /*
- * Check that the item offset is reasonable: it points somewhere
- * after the inp array and before the end of the page.
- */
- if (offset <= INP_OFFSET(dbp, h, i) || offset >= dbp->pgsize) {
- EPRINT((env, "Page %lu: bad offset %lu at page index %lu",
- (u_long)pgno, (u_long)offset, (u_long)i));
- return (DB_VERIFY_BAD);
- }
-
- /* Update the high-water mark (what HOFFSET should be) */
- if (offset < *himarkp)
- *himarkp = offset;
-
- if (is_btree) {
- /*
- * Check alignment; if it's unaligned, it's unsafe to
- * manipulate this item.
- */
- if (offset != DB_ALIGN(offset, sizeof(u_int32_t))) {
- EPRINT((env,
- "Page %lu: unaligned offset %lu at page index %lu",
- (u_long)pgno, (u_long)offset, (u_long)i));
- return (DB_VERIFY_BAD);
- }
-
- /*
- * Check that the item length remains on-page.
- */
- bk = GET_BKEYDATA(dbp, h, i);
-
- /*
- * We need to verify the type of the item here;
- * we can't simply assume that it will be one of the
- * expected three. If it's not a recognizable type,
- * it can't be considered to have a verifiable
- * length, so it's not possible to certify it as safe.
- */
- switch (B_TYPE(bk->type)) {
- case B_KEYDATA:
- len = bk->len;
- break;
- case B_DUPLICATE:
- case B_OVERFLOW:
- len = BOVERFLOW_SIZE;
- break;
- default:
- EPRINT((env,
- "Page %lu: item %lu of unrecognizable type",
- (u_long)pgno, (u_long)i));
- return (DB_VERIFY_BAD);
- }
-
- if ((size_t)(offset + len) > dbp->pgsize) {
- EPRINT((env,
- "Page %lu: item %lu extends past page boundary",
- (u_long)pgno, (u_long)i));
- return (DB_VERIFY_BAD);
- }
- }
-
- if (offsetp != NULL)
- *offsetp = offset;
- return (0);
-}
-
-/*
- * __db_vrfy_duptype--
- * Given a page number and a set of flags to __bam_vrfy_subtree,
- * verify that the dup tree type is correct--i.e., it's a recno
- * if DUPSORT is not set and a btree if it is.
- *
- * PUBLIC: int __db_vrfy_duptype
- * PUBLIC: __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
- */
-int
-__db_vrfy_duptype(dbp, vdp, pgno, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- u_int32_t flags;
-{
- ENV *env;
- VRFY_PAGEINFO *pip;
- int ret, isbad;
-
- env = dbp->env;
- isbad = 0;
-
- if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
- return (ret);
-
- switch (pip->type) {
- case P_IBTREE:
- case P_LDUP:
- if (!LF_ISSET(DB_ST_DUPSORT)) {
- EPRINT((env,
- "Page %lu: sorted duplicate set in unsorted-dup database",
- (u_long)pgno));
- isbad = 1;
- }
- break;
- case P_IRECNO:
- case P_LRECNO:
- if (LF_ISSET(DB_ST_DUPSORT)) {
- EPRINT((env,
- "Page %lu: unsorted duplicate set in sorted-dup database",
- (u_long)pgno));
- isbad = 1;
- }
- break;
- default:
- /*
- * If the page is entirely zeroed, its pip->type will be a lie
- * (we assumed it was a hash page, as they're allowed to be
- * zeroed); handle this case specially.
- */
- if (F_ISSET(pip, VRFY_IS_ALLZEROES))
- ZEROPG_ERR_PRINT(env, pgno, "duplicate page");
- else
- EPRINT((env,
- "Page %lu: duplicate page of inappropriate type %lu",
- (u_long)pgno, (u_long)pip->type));
- isbad = 1;
- break;
- }
-
- if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0)
- return (ret);
- return (isbad == 1 ? DB_VERIFY_BAD : 0);
-}
-
-/*
- * __db_salvage_duptree --
- * Attempt to salvage a given duplicate tree, given its alleged root.
- *
- * The key that corresponds to this dup set has been passed to us
- * in DBT *key. Because data items follow keys, though, it has been
- * printed once already.
- *
- * The basic idea here is that pgno ought to be a P_LDUP, a P_LRECNO, a
- * P_IBTREE, or a P_IRECNO. If it's an internal page, use the verifier
- * functions to make sure it's safe; if it's not, we simply bail and the
- * data will have to be printed with no key later on. if it is safe,
- * recurse on each of its children.
- *
- * Whether or not it's safe, if it's a leaf page, __bam_salvage it.
- *
- * At all times, use the DB hanging off vdp to mark and check what we've
- * done, so each page gets printed exactly once and we don't get caught
- * in any cycles.
- *
- * PUBLIC: int __db_salvage_duptree __P((DB *, VRFY_DBINFO *, db_pgno_t,
- * PUBLIC: DBT *, void *, int (*)(void *, const void *), u_int32_t));
- */
-int
-__db_salvage_duptree(dbp, vdp, pgno, key, handle, callback, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- DBT *key;
- void *handle;
- int (*callback) __P((void *, const void *));
- u_int32_t flags;
-{
- DB_MPOOLFILE *mpf;
- PAGE *h;
- int ret, t_ret;
-
- mpf = dbp->mpf;
-
- if (pgno == PGNO_INVALID || !IS_VALID_PGNO(pgno))
- return (DB_VERIFY_BAD);
-
- /* We have a plausible page. Try it. */
- if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
- return (ret);
-
- switch (TYPE(h)) {
- case P_IBTREE:
- case P_IRECNO:
- if ((ret = __db_vrfy_common(dbp, vdp, h, pgno, flags)) != 0)
- goto err;
- if ((ret = __bam_vrfy(dbp,
- vdp, h, pgno, flags | DB_NOORDERCHK)) != 0 ||
- (ret = __db_salvage_markdone(vdp, pgno)) != 0)
- goto err;
- /*
- * We have a known-healthy internal page. Walk it.
- */
- if ((ret = __bam_salvage_walkdupint(dbp, vdp, h, key,
- handle, callback, flags)) != 0)
- goto err;
- break;
- case P_LRECNO:
- case P_LDUP:
- if ((ret = __bam_salvage(dbp,
- vdp, pgno, TYPE(h), h, handle, callback, key, flags)) != 0)
- goto err;
- break;
- default:
- ret = DB_VERIFY_BAD;
- goto err;
- }
-
-err: if ((t_ret = __memp_fput(mpf,
- vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
-}
-
-/*
- * __db_salvage_all --
- * Salvage only the leaves we find by walking the tree. If we have subdbs,
- * salvage each of them individually.
- */
-static int
-__db_salvage_all(dbp, vdp, handle, callback, flags, hassubsp)
- DB *dbp;
- VRFY_DBINFO *vdp;
- void *handle;
- int (*callback) __P((void *, const void *));
- u_int32_t flags;
- int *hassubsp;
-{
- DB *pgset;
- DBC *pgsc;
- DB_MPOOLFILE *mpf;
- ENV *env;
- PAGE *h;
- VRFY_PAGEINFO *pip;
- db_pgno_t p, meta_pgno;
- int ret, t_ret;
-
- *hassubsp = 0;
-
- env = dbp->env;
- pgset = NULL;
- pgsc = NULL;
- mpf = dbp->mpf;
- h = NULL;
- pip = NULL;
- ret = 0;
-
- /*
- * Check to make sure the page is OK and find out if it contains
- * subdatabases.
- */
- meta_pgno = PGNO_BASE_MD;
- if ((t_ret = __memp_fget(mpf,
- &meta_pgno, vdp->thread_info, NULL, 0, &h)) == 0 &&
- (t_ret = __db_vrfy_common(dbp, vdp, h, PGNO_BASE_MD, flags)) == 0 &&
- (t_ret = __db_salvage_pg(
- dbp, vdp, PGNO_BASE_MD, h, handle, callback, flags)) == 0 &&
- (t_ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) == 0)
- if (F_ISSET(pip, VRFY_HAS_SUBDBS))
- *hassubsp = 1;
- if (pip != NULL &&
- (t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0)
- ret = t_ret;
- if (h != NULL) {
- if ((t_ret = __memp_fput(mpf,
- vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
- ret = t_ret;
- h = NULL;
- }
- if (ret != 0)
- return (ret);
-
- /* Without subdatabases, we can just dump from the meta pgno. */
- if (*hassubsp == 0)
- return (__db_salvage(dbp,
- vdp, PGNO_BASE_MD, handle, callback, flags));
-
- /*
- * We have subdbs. Try to crack them.
- *
- * To do so, get a set of leaf pages in the master database, and then
- * walk each of the valid ones, salvaging subdbs as we go. If any
- * prove invalid, just drop them; we'll pick them up on a later pass.
- */
- if ((ret = __db_vrfy_pgset(env,
- vdp->thread_info, dbp->pgsize, &pgset)) != 0)
- goto err;
- if ((ret = __db_meta2pgset(dbp, vdp, PGNO_BASE_MD, flags, pgset)) != 0)
- goto err;
- if ((ret = __db_cursor(pgset, vdp->thread_info, NULL, &pgsc, 0)) != 0)
- goto err;
- while ((t_ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
- if ((t_ret = __memp_fget(mpf,
- &p, vdp->thread_info, NULL, 0, &h)) == 0 &&
- (t_ret = __db_vrfy_common(dbp, vdp, h, p, flags)) == 0 &&
- (t_ret =
- __bam_vrfy(dbp, vdp, h, p, flags | DB_NOORDERCHK)) == 0)
- t_ret = __db_salvage_subdbpg(
- dbp, vdp, h, handle, callback, flags);
- if (t_ret != 0 && ret == 0)
- ret = t_ret;
- if (h != NULL) {
- if ((t_ret = __memp_fput(mpf, vdp->thread_info,
- h, dbp->priority)) != 0 && ret == 0)
- ret = t_ret;
- h = NULL;
- }
- }
-
- if (t_ret != DB_NOTFOUND && ret == 0)
- ret = t_ret;
-
-err: if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0)
- ret = t_ret;
- if (pgset != NULL &&
- (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret ==0)
- ret = t_ret;
- if (h != NULL &&
- (t_ret = __memp_fput(mpf,
- vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
- ret = t_ret;
- return (ret);
-}
-
-/*
- * __db_salvage_subdbpg --
- * Given a known-good leaf page in the master database, salvage all
- * leaf pages corresponding to each subdb.
- */
-static int
-__db_salvage_subdbpg(dbp, vdp, master, handle, callback, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- PAGE *master;
- void *handle;
- int (*callback) __P((void *, const void *));
- u_int32_t flags;
-{
- BKEYDATA *bkkey, *bkdata;
- BOVERFLOW *bo;
- DB *pgset;
- DBC *pgsc;
- DBT key;
- DB_MPOOLFILE *mpf;
- ENV *env;
- PAGE *subpg;
- db_indx_t i;
- db_pgno_t meta_pgno;
- int ret, err_ret, t_ret;
- char *subdbname;
- u_int32_t ovfl_bufsz;
-
- env = dbp->env;
- mpf = dbp->mpf;
- ret = err_ret = 0;
- subdbname = NULL;
- pgsc = NULL;
- pgset = NULL;
- ovfl_bufsz = 0;
-
- /*
- * For each entry, get and salvage the set of pages
- * corresponding to that entry.
- */
- for (i = 0; i < NUM_ENT(master); i += P_INDX) {
- bkkey = GET_BKEYDATA(dbp, master, i);
- bkdata = GET_BKEYDATA(dbp, master, i + O_INDX);
-
- /* Get the subdatabase name. */
- if (B_TYPE(bkkey->type) == B_OVERFLOW) {
- /*
- * We can, in principle anyway, have a subdb
- * name so long it overflows. Ick.
- */
- bo = (BOVERFLOW *)bkkey;
- if ((ret = __db_safe_goff(dbp, vdp, bo->pgno,
- &key, &subdbname, &ovfl_bufsz, flags)) != 0) {
- err_ret = DB_VERIFY_BAD;
- continue;
- }
-
- /* Nul-terminate it. */
- if (ovfl_bufsz < key.size + 1) {
- if ((ret = __os_realloc(env,
- key.size + 1, &subdbname)) != 0)
- goto err;
- ovfl_bufsz = key.size + 1;
- }
- subdbname[key.size] = '\0';
- } else if (B_TYPE(bkkey->type) == B_KEYDATA) {
- if (ovfl_bufsz < (u_int32_t)bkkey->len + 1) {
- if ((ret = __os_realloc(env,
- bkkey->len + 1, &subdbname)) != 0)
- goto err;
- ovfl_bufsz = bkkey->len + 1;
- }
- DB_ASSERT(env, subdbname != NULL);
- memcpy(subdbname, bkkey->data, bkkey->len);
- subdbname[bkkey->len] = '\0';
- }
-
- /* Get the corresponding pgno. */
- if (bkdata->len != sizeof(db_pgno_t)) {
- err_ret = DB_VERIFY_BAD;
- continue;
- }
- memcpy(&meta_pgno,
- (db_pgno_t *)bkdata->data, sizeof(db_pgno_t));
-
- /*
- * Subdatabase meta pgnos are stored in network byte
- * order for cross-endian compatibility. Swap if appropriate.
- */
- DB_NTOHL_SWAP(env, &meta_pgno);
-
- /* If we can't get the subdb meta page, just skip the subdb. */
- if (!IS_VALID_PGNO(meta_pgno) || (ret = __memp_fget(mpf,
- &meta_pgno, vdp->thread_info, NULL, 0, &subpg)) != 0) {
- err_ret = ret;
- continue;
- }
-
- /*
- * Verify the subdatabase meta page. This has two functions.
- * First, if it's bad, we have no choice but to skip the subdb
- * and let the pages just get printed on a later pass. Second,
- * the access-method-specific meta verification routines record
- * the various state info (such as the presence of dups)
- * that we need for __db_prheader().
- */
- if ((ret =
- __db_vrfy_common(dbp, vdp, subpg, meta_pgno, flags)) != 0) {
- err_ret = ret;
- (void)__memp_fput(mpf,
- vdp->thread_info, subpg, dbp->priority);
- continue;
- }
- switch (TYPE(subpg)) {
- case P_BTREEMETA:
- if ((ret = __bam_vrfy_meta(dbp,
- vdp, (BTMETA *)subpg, meta_pgno, flags)) != 0) {
- err_ret = ret;
- (void)__memp_fput(mpf,
- vdp->thread_info, subpg, dbp->priority);
- continue;
- }
- break;
- case P_HASHMETA:
- if ((ret = __ham_vrfy_meta(dbp,
- vdp, (HMETA *)subpg, meta_pgno, flags)) != 0) {
- err_ret = ret;
- (void)__memp_fput(mpf,
- vdp->thread_info, subpg, dbp->priority);
- continue;
- }
- break;
- default:
- /* This isn't an appropriate page; skip this subdb. */
- err_ret = DB_VERIFY_BAD;
- continue;
- }
-
- if ((ret = __memp_fput(mpf,
- vdp->thread_info, subpg, dbp->priority)) != 0) {
- err_ret = ret;
- continue;
- }
-
- /* Print a subdatabase header. */
- if ((ret = __db_prheader(dbp,
- subdbname, 0, 0, handle, callback, vdp, meta_pgno)) != 0)
- goto err;
-
- /* Salvage meta_pgno's tree. */
- if ((ret = __db_salvage(dbp,
- vdp, meta_pgno, handle, callback, flags)) != 0)
- err_ret = ret;
-
- /* Print a subdatabase footer. */
- if ((ret = __db_prfooter(handle, callback)) != 0)
- goto err;
- }
-
-err: if (subdbname)
- __os_free(env, subdbname);
-
- if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0)
- ret = t_ret;
-
- if (pgset != NULL && (t_ret = __db_close(pgset, NULL, 0)) != 0)
- ret = t_ret;
-
- if ((t_ret = __db_salvage_markdone(vdp, PGNO(master))) != 0)
- return (t_ret);
-
- return ((err_ret != 0) ? err_ret : ret);
-}
-
-/*
- * __db_salvage --
- * Given a meta page number, salvage all data from leaf pages found by
- * walking the meta page's tree.
- */
-static int
-__db_salvage(dbp, vdp, meta_pgno, handle, callback, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- db_pgno_t meta_pgno;
- void *handle;
- int (*callback) __P((void *, const void *));
- u_int32_t flags;
-
-{
- DB *pgset;
- DBC *dbc, *pgsc;
- DB_MPOOLFILE *mpf;
- ENV *env;
- PAGE *subpg;
- db_pgno_t p;
- int err_ret, ret, t_ret;
-
- env = dbp->env;
- mpf = dbp->mpf;
- err_ret = ret = t_ret = 0;
- pgsc = NULL;
- pgset = NULL;
- dbc = NULL;
-
- if ((ret = __db_vrfy_pgset(env,
- vdp->thread_info, dbp->pgsize, &pgset)) != 0)
- goto err;
-
- /* Get all page numbers referenced from this meta page. */
- if ((ret = __db_meta2pgset(dbp, vdp, meta_pgno,
- flags, pgset)) != 0) {
- err_ret = ret;
- goto err;
- }
-
- if ((ret = __db_cursor(pgset,
- vdp->thread_info, NULL, &pgsc, 0)) != 0)
- goto err;
-
- if (dbp->type == DB_QUEUE &&
- (ret = __db_cursor(dbp, vdp->thread_info, NULL, &dbc, 0)) != 0)
- goto err;
-
- /* Salvage every page in pgset. */
- while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) {
- if (dbp->type == DB_QUEUE) {
-#ifdef HAVE_QUEUE
- ret = __qam_fget(dbc, &p, 0, &subpg);
-#else
- ret = __db_no_queue_am(env);
-#endif
- /* Don't report an error for pages not found in a queue.
- * The pgset is a best guess, it doesn't know about
- * deleted extents which leads to this error.
- */
- if (ret == ENOENT || ret == DB_PAGE_NOTFOUND)
- continue;
- } else
- ret = __memp_fget(mpf,
- &p, vdp->thread_info, NULL, 0, &subpg);
- if (ret != 0) {
- err_ret = ret;
- continue;
- }
-
- if ((ret = __db_salvage_pg(dbp, vdp, p, subpg,
- handle, callback, flags)) != 0)
- err_ret = ret;
-
- if (dbp->type == DB_QUEUE)
-#ifdef HAVE_QUEUE
- ret = __qam_fput(dbc, p, subpg, dbp->priority);
-#else
- ret = __db_no_queue_am(env);
-#endif
- else
- ret = __memp_fput(mpf,
- vdp->thread_info, subpg, dbp->priority);
- if (ret != 0)
- err_ret = ret;
- }
-
- if (ret == DB_NOTFOUND)
- ret = 0;
-
-err:
- if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0)
- ret = t_ret;
- if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0)
- ret = t_ret;
- if (pgset != NULL && (t_ret = __db_close(pgset, NULL, 0)) != 0)
- ret = t_ret;
-
- return ((err_ret != 0) ? err_ret : ret);
-}
-
-/*
- * __db_meta2pgset --
- * Given a known-safe meta page number, return the set of pages
- * corresponding to the database it represents. Return DB_VERIFY_BAD if
- * it's not a suitable meta page or is invalid.
- */
-static int
-__db_meta2pgset(dbp, vdp, pgno, flags, pgset)
- DB *dbp;
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- u_int32_t flags;
- DB *pgset;
-{
- DB_MPOOLFILE *mpf;
- PAGE *h;
- int ret, t_ret;
-
- mpf = dbp->mpf;
-
- if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0)
- return (ret);
-
- switch (TYPE(h)) {
- case P_BTREEMETA:
- ret = __bam_meta2pgset(dbp, vdp, (BTMETA *)h, flags, pgset);
- break;
- case P_HASHMETA:
- ret = __ham_meta2pgset(dbp, vdp, (HMETA *)h, flags, pgset);
- break;
- case P_QAMMETA:
-#ifdef HAVE_QUEUE
- ret = __qam_meta2pgset(dbp, vdp, pgset);
- break;
-#endif
- default:
- ret = DB_VERIFY_BAD;
- break;
- }
-
- if ((t_ret = __memp_fput(mpf, vdp->thread_info, h, dbp->priority)) != 0)
- return (t_ret);
- return (ret);
-}
-
-/*
- * __db_guesspgsize --
- * Try to guess what the pagesize is if the one on the meta page
- * and the one in the db are invalid.
- */
-static u_int
-__db_guesspgsize(env, fhp)
- ENV *env;
- DB_FH *fhp;
-{
- db_pgno_t i;
- size_t nr;
- u_int32_t guess;
- u_int8_t type;
-
- for (guess = DB_MAX_PGSIZE; guess >= DB_MIN_PGSIZE; guess >>= 1) {
- /*
- * We try to read three pages ahead after the first one
- * and make sure we have plausible types for all of them.
- * If the seeks fail, continue with a smaller size;
- * we're probably just looking past the end of the database.
- * If they succeed and the types are reasonable, also continue
- * with a size smaller; we may be looking at pages N,
- * 2N, and 3N for some N > 1.
- *
- * As soon as we hit an invalid type, we stop and return
- * our previous guess; that last one was probably the page size.
- */
- for (i = 1; i <= 3; i++) {
- if (__os_seek(
- env, fhp, i, guess, SSZ(DBMETA, type)) != 0)
- break;
- if (__os_read(env,
- fhp, &type, 1, &nr) != 0 || nr == 0)
- break;
- if (type == P_INVALID || type >= P_PAGETYPE_MAX)
- return (guess << 1);
- }
- }
-
- /*
- * If we're just totally confused--the corruption takes up most of the
- * beginning pages of the database--go with the default size.
- */
- return (DB_DEF_IOSIZE);
-}
diff --git a/db/db_vrfy_stub.c b/db/db_vrfy_stub.c
deleted file mode 100644
index 9ed5acd..0000000
--- a/db/db_vrfy_stub.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 1996-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#ifndef HAVE_VERIFY
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_am.h"
-#include "dbinc/db_verify.h"
-
-/*
- * If the library wasn't compiled with the verification support, various
- * routines aren't available. Stub them here, returning an appropriate
- * error.
- */
-
-static int __db_novrfy __P((ENV *));
-
-/*
- * __db_novrfy --
- * Error when a Berkeley DB build doesn't include the access method.
- */
-static int
-__db_novrfy(env)
- ENV *env;
-{
- __db_errx(env,
- "library build did not include support for database verification");
- return (DB_OPNOTSUP);
-}
-
-int
-__db_verify_pp(dbp, file, database, outfile, flags)
- DB *dbp;
- const char *file, *database;
- FILE *outfile;
- u_int32_t flags;
-{
- int ret;
-
- COMPQUIET(file, NULL);
- COMPQUIET(database, NULL);
- COMPQUIET(outfile, NULL);
- COMPQUIET(flags, 0);
-
- ret = __db_novrfy(dbp->env);
-
- /* The verify method is a destructor. */
- (void)__db_close(dbp, NULL, 0);
-
- return (ret);
-}
-
-int
-__db_verify_internal(dbp, name, subdb, handle, callback, flags)
- DB *dbp;
- const char *name, *subdb;
- void *handle;
- int (*callback) __P((void *, const void *));
- u_int32_t flags;
-{
- COMPQUIET(dbp, NULL);
- COMPQUIET(name, NULL);
- COMPQUIET(subdb, NULL);
- COMPQUIET(handle, NULL);
- COMPQUIET(callback, NULL);
- COMPQUIET(flags, 0);
- return (0);
-}
-
-int
-__db_vrfy_getpageinfo(vdp, pgno, pipp)
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- VRFY_PAGEINFO **pipp;
-{
- COMPQUIET(pgno, 0);
- COMPQUIET(pipp, NULL);
- return (__db_novrfy(vdp->pgdbp->env));
-}
-
-int
-__db_vrfy_putpageinfo(env, vdp, pip)
- ENV *env;
- VRFY_DBINFO *vdp;
- VRFY_PAGEINFO *pip;
-{
- COMPQUIET(vdp, NULL);
- COMPQUIET(pip, NULL);
- return (__db_novrfy(env));
-}
-
-int
-__db_vrfy_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, vdp)
- DBT *dbtp;
- int checkprint;
- const char *prefix;
- void *handle;
- int (*callback) __P((void *, const void *));
- int is_recno;
- VRFY_DBINFO *vdp;
-{
- COMPQUIET(dbtp, NULL);
- COMPQUIET(checkprint, 0);
- COMPQUIET(prefix, NULL);
- COMPQUIET(handle, NULL);
- COMPQUIET(callback, NULL);
- COMPQUIET(is_recno, 0);
- return (__db_novrfy(vdp->pgdbp->env));
-}
-#endif /* !HAVE_VERIFY */
diff --git a/db/db_vrfyutil.c b/db/db_vrfyutil.c
deleted file mode 100644
index 04d73d9..0000000
--- a/db/db_vrfyutil.c
+++ /dev/null
@@ -1,916 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 2000-2009 Oracle. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_verify.h"
-#include "dbinc/db_am.h"
-
-static int __db_vrfy_childinc __P((DBC *, VRFY_CHILDINFO *));
-static int __db_vrfy_pageinfo_create __P((ENV *, VRFY_PAGEINFO **));
-
-/*
- * __db_vrfy_dbinfo_create --
- * Allocate and initialize a VRFY_DBINFO structure.
- *
- * PUBLIC: int __db_vrfy_dbinfo_create
- * PUBLIC: __P((ENV *, DB_THREAD_INFO *, u_int32_t, VRFY_DBINFO **));
- */
-int
-__db_vrfy_dbinfo_create(env, ip, pgsize, vdpp)
- ENV *env;
- DB_THREAD_INFO *ip;
- u_int32_t pgsize;
- VRFY_DBINFO **vdpp;
-{
- DB *cdbp, *pgdbp, *pgset;
- VRFY_DBINFO *vdp;
- int ret;
-
- vdp = NULL;
- cdbp = pgdbp = pgset = NULL;
-
- if ((ret = __os_calloc(NULL, 1, sizeof(VRFY_DBINFO), &vdp)) != 0)
- goto err;
-
- if ((ret = __db_create_internal(&cdbp, env, 0)) != 0)
- goto err;
-
- if ((ret = __db_set_flags(cdbp, DB_DUP)) != 0)
- goto err;
-
- if ((ret = __db_set_pagesize(cdbp, pgsize)) != 0)
- goto err;
-
- /* If transactional, make sure we don't log. */
- if (TXN_ON(env) &&
- (ret = __db_set_flags(cdbp, DB_TXN_NOT_DURABLE)) != 0)
- goto err;
- if ((ret = __db_open(cdbp, ip,
- NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0600, PGNO_BASE_MD)) != 0)
- goto err;
-
- if ((ret = __db_create_internal(&pgdbp, env, 0)) != 0)
- goto err;
-
- if ((ret = __db_set_pagesize(pgdbp, pgsize)) != 0)
- goto err;
-
- /* If transactional, make sure we don't log. */
- if (TXN_ON(env) &&
- (ret = __db_set_flags(pgdbp, DB_TXN_NOT_DURABLE)) != 0)
- goto err;
-
- if ((ret = __db_open(pgdbp, ip,
- NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0600, PGNO_BASE_MD)) != 0)
- goto err;
-
- if ((ret = __db_vrfy_pgset(env, ip, pgsize, &pgset)) != 0)
- goto err;
-
- LIST_INIT(&vdp->subdbs);
- LIST_INIT(&vdp->activepips);
-
- vdp->cdbp = cdbp;
- vdp->pgdbp = pgdbp;
- vdp->pgset = pgset;
- vdp->thread_info = ip;
- *vdpp = vdp;
- return (0);
-
-err: if (cdbp != NULL)
- (void)__db_close(cdbp, NULL, 0);
- if (pgdbp != NULL)
- (void)__db_close(pgdbp, NULL, 0);
- if (vdp != NULL)
- __os_free(env, vdp);
- return (ret);
-}
-
-/*
- * __db_vrfy_dbinfo_destroy --
- * Destructor for VRFY_DBINFO. Destroys VRFY_PAGEINFOs and deallocates
- * structure.
- *
- * PUBLIC: int __db_vrfy_dbinfo_destroy __P((ENV *, VRFY_DBINFO *));
- */
-int
-__db_vrfy_dbinfo_destroy(env, vdp)
- ENV *env;
- VRFY_DBINFO *vdp;
-{
- VRFY_CHILDINFO *c;
- int t_ret, ret;
-
- ret = 0;
-
- /*
- * Discard active page structures. Ideally there wouldn't be any,
- * but in some error cases we may not have cleared them all out.
- */
- while (LIST_FIRST(&vdp->activepips) != NULL)
- if ((t_ret = __db_vrfy_putpageinfo(
- env, vdp, LIST_FIRST(&vdp->activepips))) != 0) {
- if (ret == 0)
- ret = t_ret;
- break;
- }
-
- /* Discard subdatabase list structures. */
- while ((c = LIST_FIRST(&vdp->subdbs)) != NULL) {
- LIST_REMOVE(c, links);
- __os_free(NULL, c);
- }
-
- if ((t_ret = __db_close(vdp->pgdbp, NULL, 0)) != 0)
- ret = t_ret;
-
- if ((t_ret = __db_close(vdp->cdbp, NULL, 0)) != 0 && ret == 0)
- ret = t_ret;
-
- if ((t_ret = __db_close(vdp->pgset, NULL, 0)) != 0 && ret == 0)
- ret = t_ret;
-
- if (vdp->extents != NULL)
- __os_free(env, vdp->extents);
- __os_free(env, vdp);
- return (ret);
-}
-
-/*
- * __db_vrfy_getpageinfo --
- * Get a PAGEINFO structure for a given page, creating it if necessary.
- *
- * PUBLIC: int __db_vrfy_getpageinfo
- * PUBLIC: __P((VRFY_DBINFO *, db_pgno_t, VRFY_PAGEINFO **));
- */
-int
-__db_vrfy_getpageinfo(vdp, pgno, pipp)
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- VRFY_PAGEINFO **pipp;
-{
- DB *pgdbp;
- DBT key, data;
- ENV *env;
- VRFY_PAGEINFO *pip;
- int ret;
-
- /*
- * We want a page info struct. There are three places to get it from,
- * in decreasing order of preference:
- *
- * 1. vdp->activepips. If it's already "checked out", we're
- * already using it, we return the same exact structure with a
- * bumped refcount. This is necessary because this code is
- * replacing array accesses, and it's common for f() to make some
- * changes to a pip, and then call g() and h() which each make
- * changes to the same pip. vdps are never shared between threads
- * (they're never returned to the application), so this is safe.
- * 2. The pgdbp. It's not in memory, but it's in the database, so
- * get it, give it a refcount of 1, and stick it on activepips.
- * 3. malloc. It doesn't exist yet; create it, then stick it on
- * activepips. We'll put it in the database when we putpageinfo
- * later.
- */
-
- /* Case 1. */
- LIST_FOREACH(pip, &vdp->activepips, links)
- if (pip->pgno == pgno)
- goto found;
-
- /* Case 2. */
- pgdbp = vdp->pgdbp;
- env = pgdbp->env;
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
- F_SET(&data, DB_DBT_MALLOC);
- key.data = &pgno;
- key.size = sizeof(db_pgno_t);
-
- if ((ret = __db_get(pgdbp,
- vdp->thread_info, NULL, &key, &data, 0)) == 0) {
- /* Found it. */
- DB_ASSERT(env, data.size == sizeof(VRFY_PAGEINFO));
- pip = data.data;
- LIST_INSERT_HEAD(&vdp->activepips, pip, links);
- goto found;
- } else if (ret != DB_NOTFOUND) /* Something nasty happened. */
- return (ret);
-
- /* Case 3 */
- if ((ret = __db_vrfy_pageinfo_create(env, &pip)) != 0)
- return (ret);
-
- LIST_INSERT_HEAD(&vdp->activepips, pip, links);
-found: pip->pi_refcount++;
-
- *pipp = pip;
- return (0);
-}
-
-/*
- * __db_vrfy_putpageinfo --
- * Put back a VRFY_PAGEINFO that we're done with.
- *
- * PUBLIC: int __db_vrfy_putpageinfo __P((ENV *,
- * PUBLIC: VRFY_DBINFO *, VRFY_PAGEINFO *));
- */
-int
-__db_vrfy_putpageinfo(env, vdp, pip)
- ENV *env;
- VRFY_DBINFO *vdp;
- VRFY_PAGEINFO *pip;
-{
- DB *pgdbp;
- DBT key, data;
- VRFY_PAGEINFO *p;
- int ret;
-
- if (--pip->pi_refcount > 0)
- return (0);
-
- pgdbp = vdp->pgdbp;
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- key.data = &pip->pgno;
- key.size = sizeof(db_pgno_t);
- data.data = pip;
- data.size = sizeof(VRFY_PAGEINFO);
-
- if ((ret = __db_put(pgdbp,
- vdp->thread_info, NULL, &key, &data, 0)) != 0)
- return (ret);
-
- LIST_FOREACH(p, &vdp->activepips, links)
- if (p == pip)
- break;
- if (p != NULL)
- LIST_REMOVE(p, links);
-
- __os_ufree(env, p);
- return (0);
-}
-
-/*
- * __db_vrfy_pgset --
- * Create a temporary database for the storing of sets of page numbers.
- * (A mapping from page number to int, used by the *_meta2pgset functions,
- * as well as for keeping track of which pages the verifier has seen.)
- *
- * PUBLIC: int __db_vrfy_pgset __P((ENV *,
- * PUBLIC: DB_THREAD_INFO *, u_int32_t, DB **));
- */
-int
-__db_vrfy_pgset(env, ip, pgsize, dbpp)
- ENV *env;
- DB_THREAD_INFO *ip;
- u_int32_t pgsize;
- DB **dbpp;
-{
- DB *dbp;
- int ret;
-
- if ((ret = __db_create_internal(&dbp, env, 0)) != 0)
- return (ret);
- if ((ret = __db_set_pagesize(dbp, pgsize)) != 0)
- goto err;
-
- /* If transactional, make sure we don't log. */
- if (TXN_ON(env) &&
- (ret = __db_set_flags(dbp, DB_TXN_NOT_DURABLE)) != 0)
- goto err;
- if ((ret = __db_open(dbp, ip,
- NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0600, PGNO_BASE_MD)) == 0)
- *dbpp = dbp;
- else
-err: (void)__db_close(dbp, NULL, 0);
-
- return (ret);
-}
-
-/*
- * __db_vrfy_pgset_get --
- * Get the value associated in a page set with a given pgno. Return
- * a 0 value (and succeed) if we've never heard of this page.
- *
- * PUBLIC: int __db_vrfy_pgset_get __P((DB *, DB_THREAD_INFO *, db_pgno_t,
- * PUBLIC: int *));
- */
-int
-__db_vrfy_pgset_get(dbp, ip, pgno, valp)
- DB *dbp;
- DB_THREAD_INFO *ip;
- db_pgno_t pgno;
- int *valp;
-{
- DBT key, data;
- int ret, val;
-
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- key.data = &pgno;
- key.size = sizeof(db_pgno_t);
- data.data = &val;
- data.ulen = sizeof(int);
- F_SET(&data, DB_DBT_USERMEM);
-
- if ((ret = __db_get(dbp, ip, NULL, &key, &data, 0)) == 0) {
- DB_ASSERT(dbp->env, data.size == sizeof(int));
- } else if (ret == DB_NOTFOUND)
- val = 0;
- else
- return (ret);
-
- *valp = val;
- return (0);
-}
-
-/*
- * __db_vrfy_pgset_inc --
- * Increment the value associated with a pgno by 1.
- *
- * PUBLIC: int __db_vrfy_pgset_inc __P((DB *, DB_THREAD_INFO *, db_pgno_t));
- */
-int
-__db_vrfy_pgset_inc(dbp, ip, pgno)
- DB *dbp;
- DB_THREAD_INFO *ip;
- db_pgno_t pgno;
-{
- DBT key, data;
- int ret;
- int val;
-
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- val = 0;
-
- key.data = &pgno;
- key.size = sizeof(db_pgno_t);
- data.data = &val;
- data.ulen = sizeof(int);
- F_SET(&data, DB_DBT_USERMEM);
-
- if ((ret = __db_get(dbp, ip, NULL, &key, &data, 0)) == 0) {
- DB_ASSERT(dbp->env, data.size == sizeof(int));
- } else if (ret != DB_NOTFOUND)
- return (ret);
-
- data.size = sizeof(int);
- ++val;
-
- return (__db_put(dbp, ip, NULL, &key, &data, 0));
-}
-
-/*
- * __db_vrfy_pgset_next --
- * Given a cursor open in a pgset database, get the next page in the
- * set.
- *
- * PUBLIC: int __db_vrfy_pgset_next __P((DBC *, db_pgno_t *));
- */
-int
-__db_vrfy_pgset_next(dbc, pgnop)
- DBC *dbc;
- db_pgno_t *pgnop;
-{
- DBT key, data;
- db_pgno_t pgno;
- int ret;
-
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
- /* We don't care about the data, just the keys. */
- F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL);
- F_SET(&key, DB_DBT_USERMEM);
- key.data = &pgno;
- key.ulen = sizeof(db_pgno_t);
-
- if ((ret = __dbc_get(dbc, &key, &data, DB_NEXT)) != 0)
- return (ret);
-
- DB_ASSERT(dbc->env, key.size == sizeof(db_pgno_t));
- *pgnop = pgno;
-
- return (0);
-}
-
-/*
- * __db_vrfy_childcursor --
- * Create a cursor to walk the child list with. Returns with a nonzero
- * final argument if the specified page has no children.
- *
- * PUBLIC: int __db_vrfy_childcursor __P((VRFY_DBINFO *, DBC **));
- */
-int
-__db_vrfy_childcursor(vdp, dbcp)
- VRFY_DBINFO *vdp;
- DBC **dbcp;
-{
- DB *cdbp;
- DBC *dbc;
- int ret;
-
- cdbp = vdp->cdbp;
-
- if ((ret = __db_cursor(cdbp, vdp->thread_info, NULL, &dbc, 0)) == 0)
- *dbcp = dbc;
-
- return (ret);
-}
-
-/*
- * __db_vrfy_childput --
- * Add a child structure to the set for a given page.
- *
- * PUBLIC: int __db_vrfy_childput
- * PUBLIC: __P((VRFY_DBINFO *, db_pgno_t, VRFY_CHILDINFO *));
- */
-int
-__db_vrfy_childput(vdp, pgno, cip)
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- VRFY_CHILDINFO *cip;
-{
- DB *cdbp;
- DBC *cc;
- DBT key, data;
- VRFY_CHILDINFO *oldcip;
- int ret;
-
- cdbp = vdp->cdbp;
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- key.data = &pgno;
- key.size = sizeof(db_pgno_t);
-
- /*
- * We want to avoid adding multiple entries for a single child page;
- * we only need to verify each child once, even if a child (such
- * as an overflow key) is multiply referenced.
- *
- * However, we also need to make sure that when walking the list
- * of children, we encounter them in the order they're referenced
- * on a page. (This permits us, for example, to verify the
- * prev_pgno/next_pgno chain of Btree leaf pages.)
- *
- * Check the child database to make sure that this page isn't
- * already a child of the specified page number. If it's not,
- * put it at the end of the duplicate set.
- */
- if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
- return (ret);
- for (ret = __db_vrfy_ccset(cc, pgno, &oldcip); ret == 0;
- ret = __db_vrfy_ccnext(cc, &oldcip))
- if (oldcip->pgno == cip->pgno) {
- /*
- * Found a matching child. Increment its reference
- * count--we've run into it again--but don't put it
- * again.
- */
- if ((ret = __db_vrfy_childinc(cc, oldcip)) != 0 ||
- (ret = __db_vrfy_ccclose(cc)) != 0)
- return (ret);
- return (0);
- }
- if (ret != DB_NOTFOUND) {
- (void)__db_vrfy_ccclose(cc);
- return (ret);
- }
- if ((ret = __db_vrfy_ccclose(cc)) != 0)
- return (ret);
-
- cip->refcnt = 1;
- data.data = cip;
- data.size = sizeof(VRFY_CHILDINFO);
-
- return (__db_put(cdbp, vdp->thread_info, NULL, &key, &data, 0));
-}
-
-/*
- * __db_vrfy_childinc --
- * Increment the refcount of the VRFY_CHILDINFO struct that the child
- * cursor is pointing to. (The caller has just retrieved this struct, and
- * passes it in as cip to save us a get.)
- */
-static int
-__db_vrfy_childinc(dbc, cip)
- DBC *dbc;
- VRFY_CHILDINFO *cip;
-{
- DBT key, data;
-
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- cip->refcnt++;
- data.data = cip;
- data.size = sizeof(VRFY_CHILDINFO);
-
- return (__dbc_put(dbc, &key, &data, DB_CURRENT));
-}
-
-/*
- * __db_vrfy_ccset --
- * Sets a cursor created with __db_vrfy_childcursor to the first
- * child of the given pgno, and returns it in the third arg.
- *
- * PUBLIC: int __db_vrfy_ccset __P((DBC *, db_pgno_t, VRFY_CHILDINFO **));
- */
-int
-__db_vrfy_ccset(dbc, pgno, cipp)
- DBC *dbc;
- db_pgno_t pgno;
- VRFY_CHILDINFO **cipp;
-{
- DBT key, data;
- int ret;
-
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- key.data = &pgno;
- key.size = sizeof(db_pgno_t);
-
- if ((ret = __dbc_get(dbc, &key, &data, DB_SET)) != 0)
- return (ret);
-
- DB_ASSERT(dbc->env, data.size == sizeof(VRFY_CHILDINFO));
- *cipp = (VRFY_CHILDINFO *)data.data;
-
- return (0);
-}
-
-/*
- * __db_vrfy_ccnext --
- * Gets the next child of the given cursor created with
- * __db_vrfy_childcursor, and returns it in the memory provided in the
- * second arg.
- *
- * PUBLIC: int __db_vrfy_ccnext __P((DBC *, VRFY_CHILDINFO **));
- */
-int
-__db_vrfy_ccnext(dbc, cipp)
- DBC *dbc;
- VRFY_CHILDINFO **cipp;
-{
- DBT key, data;
- int ret;
-
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- if ((ret = __dbc_get(dbc, &key, &data, DB_NEXT_DUP)) != 0)
- return (ret);
-
- DB_ASSERT(dbc->env, data.size == sizeof(VRFY_CHILDINFO));
- *cipp = (VRFY_CHILDINFO *)data.data;
-
- return (0);
-}
-
-/*
- * __db_vrfy_ccclose --
- * Closes the cursor created with __db_vrfy_childcursor.
- *
- * This doesn't actually do anything interesting now, but it's
- * not inconceivable that we might change the internal database usage
- * and keep the interfaces the same, and a function call here or there
- * seldom hurts anyone.
- *
- * PUBLIC: int __db_vrfy_ccclose __P((DBC *));
- */
-int
-__db_vrfy_ccclose(dbc)
- DBC *dbc;
-{
-
- return (__dbc_close(dbc));
-}
-
-/*
- * __db_vrfy_pageinfo_create --
- * Constructor for VRFY_PAGEINFO; allocates and initializes.
- */
-static int
-__db_vrfy_pageinfo_create(env, pipp)
- ENV *env;
- VRFY_PAGEINFO **pipp;
-{
- VRFY_PAGEINFO *pip;
- int ret;
-
- /*
- * pageinfo structs are sometimes allocated here and sometimes
- * allocated by fetching them from a database with DB_DBT_MALLOC.
- * There's no easy way for the destructor to tell which was
- * used, and so we always allocate with __os_umalloc so we can free
- * with __os_ufree.
- */
- if ((ret = __os_umalloc(env, sizeof(VRFY_PAGEINFO), &pip)) != 0)
- return (ret);
- memset(pip, 0, sizeof(VRFY_PAGEINFO));
-
- *pipp = pip;
- return (0);
-}
-
-/*
- * __db_salvage_init --
- * Set up salvager database.
- *
- * PUBLIC: int __db_salvage_init __P((VRFY_DBINFO *));
- */
-int
-__db_salvage_init(vdp)
- VRFY_DBINFO *vdp;
-{
- DB *dbp;
- int ret;
-
- if ((ret = __db_create_internal(&dbp, NULL, 0)) != 0)
- return (ret);
-
- if ((ret = __db_set_pagesize(dbp, 1024)) != 0)
- goto err;
-
- if ((ret = __db_open(dbp, vdp->thread_info,
- NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0, PGNO_BASE_MD)) != 0)
- goto err;
-
- vdp->salvage_pages = dbp;
- return (0);
-
-err: (void)__db_close(dbp, NULL, 0);
- return (ret);
-}
-
-/*
- * __db_salvage_destroy --
- * Close salvager database.
- * PUBLIC: int __db_salvage_destroy __P((VRFY_DBINFO *));
- */
-int
-__db_salvage_destroy(vdp)
- VRFY_DBINFO *vdp;
-{
- return (vdp->salvage_pages == NULL ? 0 :
- __db_close(vdp->salvage_pages, NULL, 0));
-}
-
-/*
- * __db_salvage_getnext --
- * Get the next (first) unprinted page in the database of pages we need to
- * print still. Delete entries for any already-printed pages we encounter
- * in this search, as well as the page we're returning.
- *
- * PUBLIC: int __db_salvage_getnext
- * PUBLIC: __P((VRFY_DBINFO *, DBC **, db_pgno_t *, u_int32_t *, int));
- */
-int
-__db_salvage_getnext(vdp, dbcp, pgnop, pgtypep, skip_overflow)
- VRFY_DBINFO *vdp;
- DBC **dbcp;
- db_pgno_t *pgnop;
- u_int32_t *pgtypep;
- int skip_overflow;
-{
- DB *dbp;
- DBT key, data;
- int ret;
- u_int32_t pgtype;
-
- dbp = vdp->salvage_pages;
-
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- if (*dbcp == NULL &&
- (ret = __db_cursor(dbp, vdp->thread_info, NULL, dbcp, 0)) != 0)
- return (ret);
-
- while ((ret = __dbc_get(*dbcp, &key, &data, DB_NEXT)) == 0) {
- DB_ASSERT(dbp->env, data.size == sizeof(u_int32_t));
- memcpy(&pgtype, data.data, sizeof(pgtype));
-
- if (skip_overflow && pgtype == SALVAGE_OVERFLOW)
- continue;
-
- if ((ret = __dbc_del(*dbcp, 0)) != 0)
- return (ret);
- if (pgtype != SALVAGE_IGNORE) {
- DB_ASSERT(dbp->env, key.size == sizeof(db_pgno_t));
- DB_ASSERT(dbp->env, data.size == sizeof(u_int32_t));
-
- *pgnop = *(db_pgno_t *)key.data;
- *pgtypep = *(u_int32_t *)data.data;
- break;
- }
- }
-
- return (ret);
-}
-
-/*
- * __db_salvage_isdone --
- * Return whether or not the given pgno is already marked
- * SALVAGE_IGNORE (meaning that we don't need to print it again).
- *
- * Returns DB_KEYEXIST if it is marked, 0 if not, or another error on
- * error.
- *
- * PUBLIC: int __db_salvage_isdone __P((VRFY_DBINFO *, db_pgno_t));
- */
-int
-__db_salvage_isdone(vdp, pgno)
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
-{
- DB *dbp;
- DBT key, data;
- int ret;
- u_int32_t currtype;
-
- dbp = vdp->salvage_pages;
-
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- currtype = SALVAGE_INVALID;
- data.data = &currtype;
- data.ulen = sizeof(u_int32_t);
- data.flags = DB_DBT_USERMEM;
-
- key.data = &pgno;
- key.size = sizeof(db_pgno_t);
-
- /*
- * Put an entry for this page, with pgno as key and type as data,
- * unless it's already there and is marked done.
- * If it's there and is marked anything else, that's fine--we
- * want to mark it done.
- */
- if ((ret = __db_get(dbp,
- vdp->thread_info, NULL, &key, &data, 0)) == 0) {
- /*
- * The key's already here. Check and see if it's already
- * marked done. If it is, return DB_KEYEXIST. If it's not,
- * return 0.
- */
- if (currtype == SALVAGE_IGNORE)
- return (DB_KEYEXIST);
- else
- return (0);
- } else if (ret != DB_NOTFOUND)
- return (ret);
-
- /* The pgno is not yet marked anything; return 0. */
- return (0);
-}
-
-/*
- * __db_salvage_markdone --
- * Mark as done a given page.
- *
- * PUBLIC: int __db_salvage_markdone __P((VRFY_DBINFO *, db_pgno_t));
- */
-int
-__db_salvage_markdone(vdp, pgno)
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
-{
- DB *dbp;
- DBT key, data;
- int pgtype, ret;
- u_int32_t currtype;
-
- pgtype = SALVAGE_IGNORE;
- dbp = vdp->salvage_pages;
-
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- currtype = SALVAGE_INVALID;
- data.data = &currtype;
- data.ulen = sizeof(u_int32_t);
- data.flags = DB_DBT_USERMEM;
-
- key.data = &pgno;
- key.size = sizeof(db_pgno_t);
-
- /*
- * Put an entry for this page, with pgno as key and type as data,
- * unless it's already there and is marked done.
- * If it's there and is marked anything else, that's fine--we
- * want to mark it done, but db_salvage_isdone only lets
- * us know if it's marked IGNORE.
- *
- * We don't want to return DB_KEYEXIST, though; this will
- * likely get passed up all the way and make no sense to the
- * application. Instead, use DB_VERIFY_BAD to indicate that
- * we've seen this page already--it probably indicates a
- * multiply-linked page.
- */
- if ((ret = __db_salvage_isdone(vdp, pgno)) != 0)
- return (ret == DB_KEYEXIST ? DB_VERIFY_BAD : ret);
-
- data.size = sizeof(u_int32_t);
- data.data = &pgtype;
-
- return (__db_put(dbp, vdp->thread_info, NULL, &key, &data, 0));
-}
-
-/*
- * __db_salvage_markneeded --
- * If it has not yet been printed, make note of the fact that a page
- * must be dealt with later.
- *
- * PUBLIC: int __db_salvage_markneeded
- * PUBLIC: __P((VRFY_DBINFO *, db_pgno_t, u_int32_t));
- */
-int
-__db_salvage_markneeded(vdp, pgno, pgtype)
- VRFY_DBINFO *vdp;
- db_pgno_t pgno;
- u_int32_t pgtype;
-{
- DB *dbp;
- DBT key, data;
- int ret;
-
- dbp = vdp->salvage_pages;
-
- memset(&key, 0, sizeof(DBT));
- memset(&data, 0, sizeof(DBT));
-
- key.data = &pgno;
- key.size = sizeof(db_pgno_t);
-
- data.data = &pgtype;
- data.size = sizeof(u_int32_t);
-
- /*
- * Put an entry for this page, with pgno as key and type as data,
- * unless it's already there, in which case it's presumably
- * already been marked done.
- */
- ret = __db_put(dbp,
- vdp->thread_info, NULL, &key, &data, DB_NOOVERWRITE);
- return (ret == DB_KEYEXIST ? 0 : ret);
-}
-
-/*
- * __db_vrfy_prdbt --
- * Print out a DBT data element from a verification routine.
- *
- * PUBLIC: int __db_vrfy_prdbt __P((DBT *, int, const char *, void *,
- * PUBLIC: int (*)(void *, const void *), int, VRFY_DBINFO *));
- */
-int
-__db_vrfy_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, vdp)
- DBT *dbtp;
- int checkprint;
- const char *prefix;
- void *handle;
- int (*callback) __P((void *, const void *));
- int is_recno;
- VRFY_DBINFO *vdp;
-{
- if (vdp != NULL) {
- /*
- * If vdp is non-NULL, we might be the first key in the
- * "fake" subdatabase used for key/data pairs we can't
- * associate with a known subdb.
- *
- * Check and clear the SALVAGE_PRINTHEADER flag; if
- * it was set, print a subdatabase header.
- */
- if (F_ISSET(vdp, SALVAGE_PRINTHEADER)) {
- (void)__db_prheader(
- NULL, "__OTHER__", 0, 0, handle, callback, vdp, 0);
- F_CLR(vdp, SALVAGE_PRINTHEADER);
- F_SET(vdp, SALVAGE_PRINTFOOTER);
- }
-
- /*
- * Even if the printable flag wasn't set by our immediate
- * caller, it may be set on a salvage-wide basis.
- */
- if (F_ISSET(vdp, SALVAGE_PRINTABLE))
- checkprint = 1;
- }
- return (
- __db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno));
-}
diff --git a/db/partition.c b/db/partition.c
deleted file mode 100644
index 4e89ede..0000000
--- a/db/partition.c
+++ /dev/null
@@ -1,2048 +0,0 @@
-/*-
- * See the file LICENSE for redistribution information.
- *
- * Copyright (c) 2001, 2010 Oracle and/or its affiliates. All rights reserved.
- *
- * $Id$
- */
-
-#include "db_config.h"
-
-#include "db_int.h"
-#include "dbinc/db_page.h"
-#include "dbinc/db_verify.h"
-#include "dbinc/btree.h"
-#ifdef HAVE_HASH
-#include "dbinc/hash.h"
-#endif
-#include "dbinc/lock.h"
-#include "dbinc/log.h"
-#include "dbinc/mp.h"
-#include "dbinc/partition.h"
-#include "dbinc/txn.h"
-#ifdef HAVE_PARTITION
-
-static int __part_rr __P((DB *, DB_THREAD_INFO *, DB_TXN *,
- const char *, const char *, const char *, u_int32_t));
-static int __partc_close __P((DBC *, db_pgno_t, int *));
-static int __partc_del __P((DBC*, u_int32_t));
-static int __partc_destroy __P((DBC*));
-static int __partc_get_pp __P((DBC*, DBT *, DBT *, u_int32_t));
-static int __partc_put __P((DBC*, DBT *, DBT *, u_int32_t, db_pgno_t *));
-static int __partc_writelock __P((DBC*));
-static int __partition_chk_meta __P((DB *,
- DB_THREAD_INFO *, DB_TXN *, u_int32_t));
-static int __partition_setup_keys __P((DBC *,
- DB_PARTITION *, DBMETA *, u_int32_t));
-static int __part_key_cmp __P((const void *, const void *));
-static inline void __part_search __P((DB *,
- DB_PARTITION *, DBT *, u_int32_t *));
-
-static char *Alloc_err = "Partition open failed to allocate %d bytes";
-
-/*
- * Allocate a partition cursor and copy flags to the partition cursor.
- * Not passed:
- * DBC_PARTITIONED -- the subcursors are not.
- * DBC_OWN_LID -- the arg dbc owns the lock id.
- * DBC_WRITECURSOR DBC_WRITER -- CDS locking happens on
- * the whole DB, not the partition.
- */
-#define GET_PART_CURSOR(dbc, new_dbc, part_id) do { \
- DB *__part_dbp; \
- __part_dbp = part->handles[part_id]; \
- if ((ret = __db_cursor_int(__part_dbp, \
- (dbc)->thread_info, (dbc)->txn, __part_dbp->type, \
- PGNO_INVALID, 0, (dbc)->locker, &new_dbc)) != 0) \
- goto err; \
- (new_dbc)->flags = (dbc)->flags & \
- ~(DBC_PARTITIONED|DBC_OWN_LID|DBC_WRITECURSOR|DBC_WRITER); \
-} while (0)
-
-/*
- * Search for the correct partition.
- */
-static inline void __part_search(dbp, part, key, part_idp)
- DB *dbp;
- DB_PARTITION *part;
- DBT *key;
- u_int32_t *part_idp;
-{
- db_indx_t base, indx, limit;
- int cmp;
- int (*func) __P((DB *, const DBT *, const DBT *));
-
- DB_ASSERT(dbp->env, part->nparts != 0);
- COMPQUIET(cmp, 0);
- COMPQUIET(indx, 0);
-
- func = ((BTREE *)dbp->bt_internal)->bt_compare;
- DB_BINARY_SEARCH_FOR(base, limit, part->nparts, O_INDX) {
- DB_BINARY_SEARCH_INCR(indx, base, limit, O_INDX);
- cmp = func(dbp, key, &part->keys[indx]);
- if (cmp == 0)
- break;
- if (cmp > 0)
- DB_BINARY_SEARCH_SHIFT_BASE(indx, base, limit, O_INDX);
- }
- if (cmp == 0)
- *part_idp = indx;
- else if ((*part_idp = base) != 0)
- (*part_idp)--;
-}
-
-/*
- * __partition_init --
- * Initialize the partition structure.
- * Called when the meta data page is read in during database open or
- * when partition keys or a callback are set.
- *
- * PUBLIC: int __partition_init __P((DB *, u_int32_t));
- */
-int
-__partition_init(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- DB_PARTITION *part;
- int ret;
-
- if ((part = dbp->p_internal) != NULL) {
- if ((LF_ISSET(DBMETA_PART_RANGE) &&
- F_ISSET(part, PART_CALLBACK)) ||
- (LF_ISSET(DBMETA_PART_CALLBACK) &&
- F_ISSET(part, PART_RANGE))) {
- __db_errx(dbp->env,
- "Cannot specify callback and range keys.");
- return (EINVAL);
- }
- } else if ((ret = __os_calloc(dbp->env, 1, sizeof(*part), &part)) != 0)
- return (ret);
-
- if (LF_ISSET(DBMETA_PART_RANGE))
- F_SET(part, PART_RANGE);
- if (LF_ISSET(DBMETA_PART_CALLBACK))
- F_SET(part, PART_CALLBACK);
- dbp->p_internal = part;
- /* Set up AM-specific methods that do not require an open. */
- dbp->db_am_rename = __part_rename;
- dbp->db_am_remove = __part_remove;
- return (0);
-}
-/*
- * __partition_set --
- * Set the partitioning keys or callback function.
- * This routine must be called prior to creating the database.
- * PUBLIC: int __partition_set __P((DB *, u_int32_t, DBT *,
- * PUBLIC: u_int32_t (*callback)(DB *, DBT *key)));
- */
-
-int
-__partition_set(dbp, parts, keys, callback)
- DB *dbp;
- u_int32_t parts;
- DBT *keys;
- u_int32_t (*callback)(DB *, DBT *key);
-{
- DB_PARTITION *part;
- ENV *env;
- int ret;
-
- DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_partition");
- env = dbp->dbenv->env;
-
- if (parts < 2) {
- __db_errx(env, "Must specify at least 2 partitions.");
- return (EINVAL);
- }
-
- if (keys == NULL && callback == NULL) {
- __db_errx(env, "Must specify either keys or a callback.");
- return (EINVAL);
- }
- if (keys != NULL && callback != NULL) {
-bad: __db_errx(env, "May not specify both keys and a callback.");
- return (EINVAL);
- }
-
- if ((part = dbp->p_internal) == NULL) {
- if ((ret = __partition_init(dbp,
- keys != NULL ?
- DBMETA_PART_RANGE : DBMETA_PART_CALLBACK)) != 0)
- return (ret);
- part = dbp->p_internal;
- } else if ((part->keys != NULL && callback != NULL) ||
- (part->callback != NULL && keys != NULL))
- goto bad;
-
- part->nparts = parts;
- part->keys = keys;
- part->callback = callback;
-
- return (0);
-}
-
-/*
- * __partition_set_dirs --
- * Set the directories for creating the partition databases.
- * They must be in the environment.
- * PUBLIC: int __partition_set_dirs __P((DB *, const char **));
- */
-int
-__partition_set_dirs(dbp, dirp)
- DB *dbp;
- const char **dirp;
-{
- DB_ENV *dbenv;
- DB_PARTITION *part;
- ENV *env;
- u_int32_t ndirs, slen;
- int i, ret;
- const char **dir;
- char *cp, **part_dirs, **pd;
-
- DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_partition_dirs");
- dbenv = dbp->dbenv;
- env = dbp->env;
-
- ndirs = 1;
- slen = 0;
- for (dir = dirp; *dir != NULL; dir++) {
- if (F_ISSET(env, ENV_DBLOCAL))
- slen += (u_int32_t)strlen(*dir) + 1;
- ndirs++;
- }
-
- slen += sizeof(char *) * ndirs;
- if ((ret = __os_malloc(env, slen, &part_dirs)) != 0)
- return (EINVAL);
- memset(part_dirs, 0, slen);
-
- cp = (char *) part_dirs + (sizeof(char *) * ndirs);
- pd = part_dirs;
- for (dir = dirp; *dir != NULL; dir++, pd++) {
- if (F_ISSET(env, ENV_DBLOCAL)) {
- (void)strcpy(cp, *dir);
- *pd = cp;
- cp += strlen(*dir) + 1;
- continue;
- }
- for (i = 0; i < dbenv->data_next; i++)
- if (strcmp(*dir, dbenv->db_data_dir[i]) == 0)
- break;
- if (i == dbenv->data_next) {
- __db_errx(dbp->env,
- "Directory not in environment list %s", *dir);
- __os_free(env, part_dirs);
- return (EINVAL);
- }
- *pd = dbenv->db_data_dir[i];
- }
-
- if ((part = dbp->p_internal) == NULL) {
- if ((ret = __partition_init(dbp, 0)) != 0)
- return (ret);
- part = dbp->p_internal;
- }
-
- part->dirs = (const char **)part_dirs;
-
- return (0);
-}
-
-/*
- * __partition_open --
- * Open/create a partitioned database.
- * PUBLIC: int __partition_open __P((DB *, DB_THREAD_INFO *,
- * PUBLIC: DB_TXN *, const char *, DBTYPE, u_int32_t, int, int));
- */
-int
-__partition_open(dbp, ip, txn, fname, type, flags, mode, do_open)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *fname;
- DBTYPE type;
- u_int32_t flags;
- int mode, do_open;
-{
- DB *part_db;
- DB_PARTITION *part;
- DBC *dbc;
- ENV *env;
- u_int32_t part_id;
- int ret;
- char *name, *sp;
- const char **dirp, *np;
-
- part = dbp->p_internal;
- env = dbp->dbenv->env;
- name = NULL;
-
- if ((ret = __partition_chk_meta(dbp, ip, txn, flags)) != 0 && do_open)
- goto err;
-
- if ((ret = __os_calloc(env,
- part->nparts, sizeof(*part->handles), &part->handles)) != 0) {
- __db_errx(env,
- Alloc_err, part->nparts * sizeof(*part->handles));
- goto err;
- }
-
- DB_ASSERT(env, fname != NULL);
- if ((ret = __os_malloc(env,
- strlen(fname) + PART_LEN + 1, &name)) != 0) {
- __db_errx(env, Alloc_err, strlen(fname) + PART_LEN + 1);
- goto err;
- }
-
- sp = name;
- np = __db_rpath(fname);
- if (np == NULL)
- np = fname;
- else {
- np++;
- (void)strncpy(name, fname, (size_t)(np - fname));
- sp = name + (np - fname);
- }
-
- if (F_ISSET(dbp, DB_AM_RECOVER))
- goto done;
- dirp = part->dirs;
- for (part_id = 0; part_id < part->nparts; part_id++) {
- if ((ret = __db_create_internal(
- &part->handles[part_id], dbp->env, 0)) != 0)
- goto err;
-
- part_db = part->handles[part_id];
- part_db->flags = F_ISSET(dbp,
- ~(DB_AM_CREATED | DB_AM_CREATED_MSTR | DB_AM_OPEN_CALLED));
- part_db->adj_fileid = dbp->adj_fileid;
- part_db->pgsize = dbp->pgsize;
- part_db->priority = dbp->priority;
- part_db->db_append_recno = dbp->db_append_recno;
- part_db->db_feedback = dbp->db_feedback;
- part_db->dup_compare = dbp->dup_compare;
- part_db->app_private = dbp->app_private;
- part_db->api_internal = dbp->api_internal;
-
- if (dbp->type == DB_BTREE)
- __bam_copy_config(dbp, part_db, part->nparts);
-#ifdef HAVE_HASH
- if (dbp->type == DB_HASH)
- __ham_copy_config(dbp, part_db, part->nparts);
-#endif
-
- (void)sprintf(sp, PART_NAME, np, part_id);
- if ((ret = __os_strdup(env, name, &part_db->fname)) != 0)
- goto err;
- if (do_open) {
- /*
- * Cycle through the directory names passed in,
- * if any.
- */
- if (dirp != NULL &&
- (part_db->dirname = *dirp++) == NULL)
- part_db->dirname = *(dirp = part->dirs);
- if ((ret = __db_open(part_db, ip, txn,
- name, NULL, type, flags, mode, PGNO_BASE_MD)) != 0)
- goto err;
- }
- }
-
- /* Get rid of the cursor used to open the database its the wrong type */
-done: while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL)
- if ((ret = __dbc_destroy(dbc)) != 0)
- break;
-
- if (0) {
-err: (void)__partition_close(dbp, txn, 0);
- }
- if (name != NULL)
- __os_free(env, name);
- return (ret);
-}
-
-/*
- * __partition_chk_meta --
- * Check for a consistent meta data page and parameters when opening a
- * partitioned database.
- */
-static int
-__partition_chk_meta(dbp, ip, txn, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- u_int32_t flags;
-{
- DBMETA *meta;
- DB_PARTITION *part;
- DBC *dbc;
- DB_LOCK metalock;
- DB_MPOOLFILE *mpf;
- ENV *env;
- db_pgno_t base_pgno;
- int ret, t_ret;
-
- dbc = NULL;
- meta = NULL;
- LOCK_INIT(metalock);
- part = dbp->p_internal;
- mpf = dbp->mpf;
- env = dbp->env;
- ret = 0;
-
- /* Get a cursor on the main db. */
- dbp->p_internal = NULL;
- if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0)
- goto err;
-
- /* Get the metadata page. */
- base_pgno = PGNO_BASE_MD;
- if ((ret =
- __db_lget(dbc, 0, base_pgno, DB_LOCK_READ, 0, &metalock)) != 0)
- goto err;
- if ((ret = __memp_fget(mpf, &base_pgno, ip, dbc->txn, 0, &meta)) != 0)
- goto err;
-
- if (meta->magic != DB_HASHMAGIC &&
- (meta->magic != DB_BTREEMAGIC || F_ISSET(meta, BTM_RECNO))) {
- __db_errx(env,
- "Partitioning may only specified on BTREE and HASH databases.");
- ret = EINVAL;
- goto err;
- }
- if (!FLD_ISSET(meta->metaflags,
- DBMETA_PART_RANGE | DBMETA_PART_CALLBACK)) {
- __db_errx(env,
- "Partitioning specified on a non-partitioned database.");
- ret = EINVAL;
- goto err;
- }
-
- if ((F_ISSET(part, PART_RANGE) &&
- FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK)) ||
- (F_ISSET(part, PART_CALLBACK) &&
- FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE))) {
- __db_errx(env, "Incompatible partitioning specified.");
- ret = EINVAL;
- goto err;
- }
-
- if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK) &&
- part->callback == NULL && !IS_RECOVERING(env) &&
- !F_ISSET(dbp, DB_AM_RECOVER) && !LF_ISSET(DB_RDWRMASTER)) {
- __db_errx(env, "Partition callback not specified.");
- ret = EINVAL;
- goto err;
- }
-
- if (F_ISSET(dbp, DB_AM_RECNUM)) {
- __db_errx(env,
- "Record numbers are not supported in partitioned databases.");
- ret = EINVAL;
- goto err;
- }
-
- if (part->nparts == 0) {
- if (LF_ISSET(DB_CREATE) && meta->nparts == 0) {
- __db_errx(env, "Zero paritions specified.");
- ret = EINVAL;
- goto err;
- } else
- part->nparts = meta->nparts;
- } else if (meta->nparts != 0 && part->nparts != meta->nparts) {
- __db_errx(env, "Number of partitions does not match.");
- ret = EINVAL;
- goto err;
- }
-
- if (meta->magic == DB_HASHMAGIC) {
- if (!F_ISSET(part, PART_CALLBACK)) {
- __db_errx(env,
- "Hash database must specify a partition callback.");
- ret = EINVAL;
- }
- } else if (meta->magic != DB_BTREEMAGIC) {
- __db_errx(env,
- "Partitioning only supported on BTREE nad HASH.");
- ret = EINVAL;
- } else
- ret = __partition_setup_keys(dbc, part, meta, flags);
-
-err: /* Put the metadata page back. */
- if (meta != NULL && (t_ret = __memp_fput(mpf,
- ip, meta, dbc->priority)) != 0 && ret == 0)
- ret = t_ret;
- if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
- ret = t_ret;
-
- if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- dbp->p_internal = part;
- return (ret);
-}
-
-/*
- * Support for sorting keys. Keys must be sorted using the btree
- * compare function so if we call qsort in __partiton_setup_keys
- * we use this structure to pass the DBP and compare function.
- */
-struct key_sort {
- DB *dbp;
- DBT *key;
- int (*compare) __P((DB *, const DBT *, const DBT *));
-};
-
-static int __part_key_cmp(a, b)
- const void *a, *b;
-{
- const struct key_sort *ka, *kb;
-
- ka = a;
- kb = b;
- return (ka->compare(ka->dbp, ka->key, kb->key));
-}
-/*
- * __partition_setup_keys --
- * Get the partition keys into memory, or put them to disk if we
- * are creating a partitioned database.
- */
-static int
-__partition_setup_keys(dbc, part, meta, flags)
- DBC *dbc;
- DB_PARTITION *part;
- DBMETA *meta;
- u_int32_t flags;
-{
- BTREE *t;
- DB *dbp;
- DBT data, key, *keys, *kp;
- ENV *env;
- u_int32_t ds, i, j;
- u_int8_t *dd;
- struct key_sort *ks;
- int have_keys, ret;
- int (*compare) __P((DB *, const DBT *, const DBT *));
- void *dp;
-
- COMPQUIET(dd, NULL);
- COMPQUIET(ds, 0);
- memset(&data, 0, sizeof(data));
- memset(&key, 0, sizeof(key));
- ks = NULL;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- /* Need to just read the main database. */
- dbp->p_internal = NULL;
- have_keys = 0;
-
- /* First verify that things what we expect. */
- if ((ret = __dbc_get(dbc, &key, &data, DB_FIRST)) != 0) {
- if (ret != DB_NOTFOUND)
- goto err;
- if (F_ISSET(part, PART_CALLBACK)) {
- ret = 0;
- goto done;
- }
- if (!LF_ISSET(DB_CREATE) && !F_ISSET(dbp, DB_AM_RECOVER) &&
- !LF_ISSET(DB_RDWRMASTER)) {
- __db_errx(env, "No range keys found.");
- ret = EINVAL;
- goto err;
- }
- } else {
- if (F_ISSET(part, PART_CALLBACK)) {
- __db_errx(env, "Keys found and callback set.");
- ret = EINVAL;
- goto err;
- }
- if (key.size != 0) {
- __db_errx(env, "Partition key 0 is not empty.");
- ret = EINVAL;
- goto err;
- }
- have_keys = 1;
- }
-
- if (LF_ISSET(DB_CREATE) && have_keys == 0) {
- /* Insert the keys into the master database. */
- for (i = 0; i < part->nparts - 1; i++) {
- if ((ret = __db_put(dbp, dbc->thread_info,
- dbc->txn, &part->keys[i], &data, 0)) != 0)
- goto err;
- }
-
- /*
- * Insert the "0" pointer. All records less than the first
- * given key go into this partition. We must use the default
- * compare to insert this key, otherwise it might not be first.
- */
- t = dbc->dbp->bt_internal;
- compare = t->bt_compare;
- t->bt_compare = __bam_defcmp;
- memset(&key, 0, sizeof(key));
- ret = __db_put(dbp, dbc->thread_info, dbc->txn, &key, &data, 0);
- t->bt_compare = compare;
- if (ret != 0)
- goto err;
- }
-done: if (F_ISSET(part, PART_RANGE)) {
- /*
- * Allocate one page to hold the keys plus space at the
- * end of the buffer to put an array of DBTs. If there
- * is not enough space __dbc_get will return how much
- * is needed and we realloc.
- */
- if ((ret = __os_malloc(env,
- meta->pagesize + (sizeof(DBT) * part->nparts),
- &part->data)) != 0) {
- __db_errx(env, Alloc_err, meta->pagesize);
- goto err;
- }
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
- data.data = part->data;
- data.ulen = meta->pagesize;
- data.flags = DB_DBT_USERMEM;
-again: if ((ret = __dbc_get(dbc, &key, &data,
- DB_FIRST | DB_MULTIPLE_KEY)) == DB_BUFFER_SMALL) {
- if ((ret = __os_realloc(env,
- data.size + (sizeof(DBT) * part->nparts),
- &part->data)) != 0)
- goto err;
- data.data = part->data;
- data.ulen = data.size;
- goto again;
- }
- if (ret == 0) {
- /*
- * They passed in keys, they must match.
- */
- keys = NULL;
- compare = NULL;
- if (have_keys == 1 && (keys = part->keys) != NULL) {
- t = dbc->dbp->bt_internal;
- compare = t->bt_compare;
- if ((ret = __os_malloc(env, (part->nparts - 1)
- * sizeof(struct key_sort), &ks)) != 0)
- goto err;
- for (j = 0; j < part->nparts - 1; j++) {
- ks[j].dbp = dbc->dbp;
- ks[j].compare = compare;
- ks[j].key = &keys[j];
- }
-
- qsort(ks, (size_t)part->nparts - 1,
- sizeof(struct key_sort), __part_key_cmp);
- }
- DB_MULTIPLE_INIT(dp, &data);
- part->keys = (DBT *)
- ((u_int8_t *)part->data + data.size);
- j = 0;
- for (kp = part->keys;
- kp < &part->keys[part->nparts]; kp++, j++) {
- DB_MULTIPLE_KEY_NEXT(dp,
- &data, kp->data, kp->size, dd, ds);
- if (dp == NULL) {
- ret = DB_NOTFOUND;
- break;
- }
- if (keys != NULL && j != 0 &&
- compare(dbc->dbp, ks[j - 1].key, kp) != 0) {
- if (kp->data == NULL &&
- F_ISSET(dbp, DB_AM_RECOVER))
- goto err;
- __db_errx(env,
- "Partition key %d does not match", j);
- ret = EINVAL;
- goto err;
- }
- }
- }
- }
- if (ret == DB_NOTFOUND && F_ISSET(dbp, DB_AM_RECOVER))
- ret = 0;
-
-err: dbp->p_internal = part;
- if (ks != NULL)
- __os_free(env, ks);
- return (ret);
-}
-
-/*
- * __partition_get_callback --
- * Get the partition callback function.
- * PUBLIC: int __partition_get_callback __P((DB *,
- * PUBLIC: u_int32_t *, u_int32_t (**callback)(DB *, DBT *key)));
- */
-int
-__partition_get_callback(dbp, parts, callback)
- DB *dbp;
- u_int32_t *parts;
- u_int32_t (**callback)(DB *, DBT *key);
-{
- DB_PARTITION *part;
-
- part = dbp->p_internal;
- /* Only return populated results if partitioned using callbacks. */
- if (part != NULL && !F_ISSET(part, PART_CALLBACK))
- part = NULL;
- if (parts != NULL)
- *parts = (part != NULL ? part->nparts : 0);
- if (callback != NULL)
- *callback = (part != NULL ? part->callback : NULL);
-
- return (0);
-}
-
-/*
- * __partition_get_keys --
- * Get partition keys.
- * PUBLIC: int __partition_get_keys __P((DB *, u_int32_t *, DBT **));
- */
-int
-__partition_get_keys(dbp, parts, keys)
- DB *dbp;
- u_int32_t *parts;
- DBT **keys;
-{
- DB_PARTITION *part;
-
- part = dbp->p_internal;
- /* Only return populated results if partitioned using ranges. */
- if (part != NULL && !F_ISSET(part, PART_RANGE))
- part = NULL;
- if (parts != NULL)
- *parts = (part != NULL ? part->nparts : 0);
- if (keys != NULL)
- *keys = (part != NULL ? &part->keys[1] : NULL);
-
- return (0);
-}
-
-/*
- * __partition_get_dirs --
- * Get partition dirs.
- * PUBLIC: int __partition_get_dirs __P((DB *, const char ***));
- */
-int
-__partition_get_dirs(dbp, dirpp)
- DB *dbp;
- const char ***dirpp;
-{
- DB_PARTITION *part;
- ENV *env;
- u_int32_t i;
- int ret;
-
- env = dbp->env;
- if ((part = dbp->p_internal) == NULL) {
- *dirpp = NULL;
- return (0);
- }
- if (!F_ISSET(dbp, DB_AM_OPEN_CALLED)) {
- *dirpp = part->dirs;
- return (0);
- }
-
- /*
- * We build a list once when asked. The original directory list,
- * if any, was discarded at open time.
- */
- if ((*dirpp = part->dirs) != NULL)
- return (0);
-
- if ((ret = __os_calloc(env,
- sizeof(char *), part->nparts + 1, (char **)&part->dirs)) != 0)
- return (ret);
-
- for (i = 0; i < part->nparts; i++)
- part->dirs[i] = part->handles[i]->dirname;
-
- *dirpp = part->dirs;
- return (0);
-}
-
-/*
- * __partc_init --
- * Initialize the access private portion of a cursor
- *
- * PUBLIC: int __partc_init __P((DBC *));
- */
-int
-__partc_init(dbc)
- DBC *dbc;
-{
- ENV *env;
- int ret;
-
- env = dbc->env;
-
- /* Allocate/initialize the internal structure. */
- if (dbc->internal == NULL && (ret =
- __os_calloc(env, 1, sizeof(PART_CURSOR), &dbc->internal)) != 0)
- return (ret);
-
- /* Initialize methods. */
- dbc->close = dbc->c_close = __dbc_close_pp;
- dbc->cmp = __dbc_cmp_pp;
- dbc->count = dbc->c_count = __dbc_count_pp;
- dbc->del = dbc->c_del = __dbc_del_pp;
- dbc->dup = dbc->c_dup = __dbc_dup_pp;
- dbc->get = dbc->c_get = __partc_get_pp;
- dbc->pget = dbc->c_pget = __dbc_pget_pp;
- dbc->put = dbc->c_put = __dbc_put_pp;
- dbc->am_bulk = NULL;
- dbc->am_close = __partc_close;
- dbc->am_del = __partc_del;
- dbc->am_destroy = __partc_destroy;
- dbc->am_get = NULL;
- dbc->am_put = __partc_put;
- dbc->am_writelock = __partc_writelock;
-
- /* We avoid swapping partition cursors since we swap the sub cursors */
- F_SET(dbc, DBC_PARTITIONED);
-
- return (0);
-}
-/*
- * __partc_get_pp --
- * cursor get opeartion on a partitioned database.
- */
-static int
-__partc_get_pp(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- DB *dbp;
- DB_THREAD_INFO *ip;
- ENV *env;
- int ignore_lease, ret;
-
- dbp = dbc->dbp;
- env = dbp->env;
-
- ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0;
- LF_CLR(DB_IGNORE_LEASE);
- if ((ret = __dbc_get_arg(dbc, key, data, flags)) != 0)
- return (ret);
-
- ENV_ENTER(env, ip);
-
- DEBUG_LREAD(dbc, dbc->txn, "DBcursor->get",
- flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags);
-
- ret = __partc_get(dbc, key, data, flags);
- /*
- * Check for master leases.
- */
- if (ret == 0 &&
- IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease)
- ret = __rep_lease_check(env, 1);
-
- ENV_LEAVE(env, ip);
- __dbt_userfree(env, key, NULL, data);
- return (ret);
-}
-/*
- * __partiton_get --
- * cursor get opeartion on a partitioned database.
- *
- * PUBLIC: int __partc_get __P((DBC*, DBT *, DBT *, u_int32_t));
- */
-int
-__partc_get(dbc, key, data, flags)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
-{
- DB *dbp;
- DBC *orig_dbc, *new_dbc;
- DB_PARTITION *part;
- PART_CURSOR *cp;
- u_int32_t multi, part_id;
- int ret, retry, search;
-
- dbp = dbc->dbp;
- cp = (PART_CURSOR*)dbc->internal;
- orig_dbc = cp->sub_cursor;
- part = dbp->p_internal;
-
- new_dbc = NULL;
- retry = search = 0;
- part_id = cp->part_id;
- multi = flags & ~DB_OPFLAGS_MASK;
-
- switch (flags & DB_OPFLAGS_MASK) {
- case DB_CURRENT:
- break;
- case DB_FIRST:
- part_id = 0;
- retry = 1;
- break;
- case DB_GET_BOTH:
- case DB_GET_BOTHC:
- case DB_GET_BOTH_RANGE:
- search = 1;
- break;
- case DB_SET_RANGE:
- search = 1;
- retry = 1;
- break;
- case DB_LAST:
- part_id = part->nparts - 1;
- retry = 1;
- break;
- case DB_NEXT:
- case DB_NEXT_NODUP:
- if (orig_dbc == NULL)
- part_id = 0;
- else
- part_id = cp->part_id;
- retry = 1;
- break;
- case DB_NEXT_DUP:
- break;
- case DB_PREV:
- case DB_PREV_NODUP:
- if (orig_dbc == NULL)
- part_id = part->nparts - 1;
- else
- part_id = cp->part_id;
- retry = 1;
- break;
- case DB_PREV_DUP:
- break;
- case DB_SET:
- search = 1;
- break;
- default:
- return (__db_unknown_flag(dbp->env, "__partc_get", flags));
- }
-
- /*
- * If we need to find the partition to start on, then
- * do a binary search of the in memory partition table.
- */
- if (search == 1 && F_ISSET(part, PART_CALLBACK))
- part_id = part->callback(dbp, key) % part->nparts;
- else if (search == 1)
- __part_search(dbp, part, key, &part_id);
-
- /* Get a new cursor if necessary */
- if (orig_dbc == NULL || cp->part_id != part_id) {
- GET_PART_CURSOR(dbc, new_dbc, part_id);
- } else
- new_dbc = orig_dbc;
-
- while ((ret = __dbc_get(new_dbc,
- key, data, flags)) == DB_NOTFOUND && retry == 1) {
- switch (flags & DB_OPFLAGS_MASK) {
- case DB_FIRST:
- case DB_NEXT:
- case DB_NEXT_NODUP:
- case DB_SET_RANGE:
- if (++part_id < part->nparts) {
- flags = DB_FIRST | multi;
- break;
- }
- goto err;
- case DB_LAST:
- case DB_PREV:
- case DB_PREV_NODUP:
- if (part_id-- > 0) {
- flags = DB_LAST | multi;
- break;
- }
- goto err;
- default:
- goto err;
- }
-
- if (new_dbc != orig_dbc && (ret = __dbc_close(new_dbc)) != 0)
- goto err;
- GET_PART_CURSOR(dbc, new_dbc, part_id);
- }
-
- if (ret != 0)
- goto err;
-
- /* Success: swap original and new cursors. */
- if (new_dbc != orig_dbc) {
- if (orig_dbc != NULL) {
- cp->sub_cursor = NULL;
- if ((ret = __dbc_close(orig_dbc)) != 0)
- goto err;
- }
- cp->sub_cursor = new_dbc;
- cp->part_id = part_id;
- }
-
- return (0);
-
-err: if (new_dbc != NULL && new_dbc != orig_dbc)
- (void)__dbc_close(new_dbc);
- return (ret);
-}
-
-/*
- * __partc_put --
- * cursor put opeartion on a partitioned cursor.
- *
- */
-static int
-__partc_put(dbc, key, data, flags, pgnop)
- DBC *dbc;
- DBT *key, *data;
- u_int32_t flags;
- db_pgno_t *pgnop;
-{
- DB *dbp;
- DB_PARTITION *part;
- DBC *new_dbc;
- PART_CURSOR *cp;
- u_int32_t part_id;
- int ret;
-
- dbp = dbc->dbp;
- cp = (PART_CURSOR*)dbc->internal;
- part_id = cp->part_id;
- part = dbp->p_internal;
- *pgnop = PGNO_INVALID;
-
- switch (flags) {
- case DB_KEYFIRST:
- case DB_KEYLAST:
- case DB_NODUPDATA:
- case DB_NOOVERWRITE:
- case DB_OVERWRITE_DUP:
- if (F_ISSET(part, PART_CALLBACK)) {
- part_id = part->callback(dbp, key) % part->nparts;
- break;
- }
- __part_search(dbp, part, key, &part_id);
- break;
- default:
- break;
- }
-
- if ((new_dbc = cp->sub_cursor) == NULL || cp->part_id != part_id) {
- if ((ret = __db_cursor_int(part->handles[part_id],
- dbc->thread_info, dbc->txn, part->handles[part_id]->type,
- PGNO_INVALID, 0, dbc->locker, &new_dbc)) != 0)
- goto err;
- }
-
- if (F_ISSET(dbc, DBC_WRITER | DBC_WRITECURSOR))
- F_SET(new_dbc, DBC_WRITER);
- if ((ret = __dbc_put(new_dbc, key, data, flags)) != 0)
- goto err;
-
- if (new_dbc != cp->sub_cursor) {
- if (cp->sub_cursor != NULL) {
- if ((ret = __dbc_close(cp->sub_cursor)) != 0)
- goto err;
- cp->sub_cursor = NULL;
- }
- cp->sub_cursor = new_dbc;
- cp->part_id = part_id;
- }
-
- return (0);
-
-err: if (new_dbc != NULL && cp->sub_cursor != new_dbc)
- (void)__dbc_close(new_dbc);
- return (ret);
-}
-
-/*
- * __partc_del
- * Delete interface to partitioned cursors.
- *
- */
-static int
-__partc_del(dbc, flags)
- DBC *dbc;
- u_int32_t flags;
-{
- PART_CURSOR *cp;
- cp = (PART_CURSOR*)dbc->internal;
-
- if (F_ISSET(dbc, DBC_WRITER | DBC_WRITECURSOR))
- F_SET(cp->sub_cursor, DBC_WRITER);
- return (__dbc_del(cp->sub_cursor, flags));
-}
-
-/*
- * __partc_writelock
- * Writelock interface to partitioned cursors.
- *
- */
-static int
-__partc_writelock(dbc)
- DBC *dbc;
-{
- PART_CURSOR *cp;
- cp = (PART_CURSOR*)dbc->internal;
-
- return (cp->sub_cursor->am_writelock(cp->sub_cursor));
-}
-
-/*
- * __partc_close
- * Close interface to partitioned cursors.
- *
- */
-static int
-__partc_close(dbc, root_pgno, rmroot)
- DBC *dbc;
- db_pgno_t root_pgno;
- int *rmroot;
-{
- PART_CURSOR *cp;
- int ret;
-
- COMPQUIET(root_pgno, 0);
- COMPQUIET(rmroot, NULL);
-
- cp = (PART_CURSOR*)dbc->internal;
-
- if (cp->sub_cursor == NULL)
- return (0);
- ret = __dbc_close(cp->sub_cursor);
- cp->sub_cursor = NULL;
- return (ret);
-}
-
-/*
- * __partc_destroy --
- * Destroy a single cursor.
- */
-static int
-__partc_destroy(dbc)
- DBC *dbc;
-{
- PART_CURSOR *cp;
- ENV *env;
-
- cp = (PART_CURSOR *)dbc->internal;
- env = dbc->env;
-
- /* Discard the structure. Don't recurse. */
- __os_free(env, cp);
-
- return (0);
-}
-
-/*
- * __partiton_close
- * Close a partitioned database.
- *
- * PUBLIC: int __partition_close __P((DB *, DB_TXN *, u_int32_t));
- */
-int
-__partition_close(dbp, txn, flags)
- DB *dbp;
- DB_TXN *txn;
- u_int32_t flags;
-{
- DB **pdbp;
- DB_PARTITION *part;
- ENV *env;
- u_int32_t i;
- int ret, t_ret;
-
- if ((part = dbp->p_internal) == NULL)
- return (0);
-
- env = dbp->env;
- ret = 0;
-
- if ((pdbp = part->handles) != NULL) {
- for (i = 0; i < part->nparts; i++, pdbp++)
- if (*pdbp != NULL && (t_ret =
- __db_close(*pdbp, txn, flags)) != 0 && ret == 0)
- ret = t_ret;
- __os_free(env, part->handles);
- }
- if (part->dirs != NULL)
- __os_free(env, (char **)part->dirs);
- if (part->data != NULL)
- __os_free(env, (char **)part->data);
- __os_free(env, part);
- dbp->p_internal = NULL;
-
- return (ret);
-}
-
-/*
- * __partiton_sync
- * Sync a partitioned database.
- *
- * PUBLIC: int __partition_sync __P((DB *));
- */
-int
-__partition_sync(dbp)
- DB *dbp;
-{
- DB **pdbp;
- DB_PARTITION *part;
- u_int32_t i;
- int ret, t_ret;
-
- ret = 0;
- part = dbp->p_internal;
-
- if ((pdbp = part->handles) != NULL) {
- for (i = 0; i < part->nparts; i++, pdbp++)
- if (*pdbp != NULL &&
- F_ISSET(*pdbp, DB_AM_OPEN_CALLED) && (t_ret =
- __memp_fsync((*pdbp)->mpf)) != 0 && ret == 0)
- ret = t_ret;
- }
- if ((t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
-
-/*
- * __partiton_stat
- * Stat a partitioned database.
- *
- * PUBLIC: int __partition_stat __P((DBC *, void *, u_int32_t));
- */
-int
-__partition_stat(dbc, spp, flags)
- DBC *dbc;
- void *spp;
- u_int32_t flags;
-{
- DB *dbp, **pdbp;
- DB_BTREE_STAT *fsp, *bsp;
-#ifdef HAVE_HASH
- DB_HASH_STAT *hfsp, *hsp;
-#endif
- DB_PARTITION *part;
- DBC *new_dbc;
- ENV *env;
- u_int32_t i;
- int ret;
-
- dbp = dbc->dbp;
- part = dbp->p_internal;
- env = dbp->env;
- fsp = NULL;
-#ifdef HAVE_HASH
- hfsp = NULL;
-#endif
-
- pdbp = part->handles;
- for (i = 0; i < part->nparts; i++, pdbp++) {
- if ((ret = __db_cursor_int(*pdbp, dbc->thread_info, dbc->txn,
- (*pdbp)->type, PGNO_INVALID,
- 0, dbc->locker, &new_dbc)) != 0)
- goto err;
- switch (new_dbc->dbtype) {
- case DB_BTREE:
- if ((ret = __bam_stat(new_dbc, &bsp, flags)) != 0)
- goto err;
- if (fsp == NULL) {
- fsp = bsp;
- *(DB_BTREE_STAT **)spp = fsp;
- } else {
- fsp->bt_nkeys += bsp->bt_nkeys;
- fsp->bt_ndata += bsp->bt_ndata;
- fsp->bt_pagecnt += bsp->bt_pagecnt;
- if (fsp->bt_levels < bsp->bt_levels)
- fsp->bt_levels = bsp->bt_levels;
- fsp->bt_int_pg += bsp->bt_int_pg;
- fsp->bt_leaf_pg += bsp->bt_leaf_pg;
- fsp->bt_dup_pg += bsp->bt_dup_pg;
- fsp->bt_over_pg += bsp->bt_over_pg;
- fsp->bt_free += bsp->bt_free;
- fsp->bt_int_pgfree += bsp->bt_int_pgfree;
- fsp->bt_leaf_pgfree += bsp->bt_leaf_pgfree;
- fsp->bt_dup_pgfree += bsp->bt_dup_pgfree;
- fsp->bt_over_pgfree += bsp->bt_over_pgfree;
- __os_ufree(env, bsp);
- }
- break;
-#ifdef HAVE_HASH
- case DB_HASH:
- if ((ret = __ham_stat(new_dbc, &hsp, flags)) != 0)
- goto err;
- if (hfsp == NULL) {
- hfsp = hsp;
- *(DB_HASH_STAT **)spp = hfsp;
- } else {
- hfsp->hash_nkeys += hsp->hash_nkeys;
- hfsp->hash_ndata += hsp->hash_ndata;
- hfsp->hash_pagecnt += hsp->hash_pagecnt;
- hfsp->hash_ffactor += hsp->hash_ffactor;
- hfsp->hash_buckets += hsp->hash_buckets;
- hfsp->hash_free += hsp->hash_free;
- hfsp->hash_bfree += hsp->hash_bfree;
- hfsp->hash_bigpages += hsp->hash_bigpages;
- hfsp->hash_big_bfree += hsp->hash_big_bfree;
- hfsp->hash_overflows += hsp->hash_overflows;
- hfsp->hash_ovfl_free += hsp->hash_ovfl_free;
- hfsp->hash_dup += hsp->hash_dup;
- hfsp->hash_dup_free += hsp->hash_dup_free;
- __os_ufree(env, hsp);
- }
- break;
-#endif
- default:
- break;
- }
- if ((ret = __dbc_close(new_dbc)) != 0)
- goto err;
- }
- return (0);
-
-err:
- if (fsp != NULL)
- __os_ufree(env, fsp);
- *(DB_BTREE_STAT **)spp = NULL;
- return (ret);
-}
-
-/*
- * __part_truncate --
- * Truncate a database.
- *
- * PUBLIC: int __part_truncate __P((DBC *, u_int32_t *));
- */
-int
-__part_truncate(dbc, countp)
- DBC *dbc;
- u_int32_t *countp;
-{
- DB *dbp, **pdbp;
- DB_PARTITION *part;
- DBC *new_dbc;
- u_int32_t count, i;
- int ret, t_ret;
-
- dbp = dbc->dbp;
- part = dbp->p_internal;
- pdbp = part->handles;
- ret = 0;
-
- if (countp != NULL)
- *countp = 0;
- for (i = 0; ret == 0 && i < part->nparts; i++, pdbp++) {
- if ((ret = __db_cursor_int(*pdbp, dbc->thread_info, dbc->txn,
- (*pdbp)->type, PGNO_INVALID,
- 0, dbc->locker, &new_dbc)) != 0)
- break;
- switch (dbp->type) {
- case DB_BTREE:
- case DB_RECNO:
- ret = __bam_truncate(new_dbc, &count);
- break;
- case DB_HASH:
-#ifdef HAVE_HASH
- ret = __ham_truncate(new_dbc, &count);
- break;
-#endif
- case DB_QUEUE:
- case DB_UNKNOWN:
- default:
- ret = __db_unknown_type(dbp->env,
- "DB->truncate", dbp->type);
- count = 0;
- break;
- }
- if ((t_ret = __dbc_close(new_dbc)) != 0 && ret == 0)
- ret = t_ret;
- if (countp != NULL)
- *countp += count;
- }
-
- return (ret);
-}
-/*
- * __part_compact -- compact a partitioned database.
- *
- * PUBLIC: int __part_compact __P((DB *, DB_THREAD_INFO *, DB_TXN *,
- * PUBLIC: DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *));
- */
-int
-__part_compact(dbp, ip, txn, start, stop, c_data, flags, end)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- DBT *start, *stop;
- DB_COMPACT *c_data;
- u_int32_t flags;
- DBT *end;
-{
- DB **pdbp;
- DB_PARTITION *part;
- u_int32_t i;
- int ret;
-
- part = dbp->p_internal;
- pdbp = part->handles;
- ret = 0;
-
- for (i = 0; ret == 0 && i < part->nparts; i++, pdbp++) {
- switch (dbp->type) {
- case DB_HASH:
- if (!LF_ISSET(DB_FREELIST_ONLY))
- goto err;
- /* FALLTHROUGH */
- case DB_BTREE:
- case DB_RECNO:
- ret = __bam_compact(*pdbp,
- ip, txn, start, stop, c_data, flags, end);
- break;
-
- default:
- err: ret = __dbh_am_chk(dbp, DB_OK_BTREE);
- break;
- }
- }
- return (ret);
-}
-
-/*
- * __part_lsn_reset --
- * reset the lsns on each partition.
- *
- * PUBLIC: int __part_lsn_reset __P((DB *, DB_THREAD_INFO *));
- */
-int
-__part_lsn_reset(dbp, ip)
- DB *dbp;
- DB_THREAD_INFO *ip;
-{
- DB **pdbp;
- DB_PARTITION *part;
- u_int32_t i;
- int ret;
-
- part = dbp->p_internal;
- pdbp = part->handles;
- ret = 0;
-
- for (i = 0; ret == 0 && i < part->nparts; i++, pdbp++)
- ret = __db_lsn_reset((*pdbp)->mpf, ip);
-
- return (ret);
-}
-
-/*
- * __part_fileid_reset --
- * reset the fileid on each partition.
- *
- * PUBLIC: int __part_fileid_reset
- * PUBLIC: __P((ENV *, DB_THREAD_INFO *, const char *, u_int32_t, int));
- */
-int
-__part_fileid_reset(env, ip, fname, nparts, encrypted)
- ENV *env;
- DB_THREAD_INFO *ip;
- const char *fname;
- u_int32_t nparts;
- int encrypted;
-{
- int ret;
- u_int32_t part_id;
- char *name, *sp;
- const char *np;
-
- if ((ret = __os_malloc(env,
- strlen(fname) + PART_LEN + 1, &name)) != 0) {
- __db_errx(env, Alloc_err, strlen(fname) + PART_LEN + 1);
- return (ret);
- }
-
- sp = name;
- np = __db_rpath(fname);
- if (np == NULL)
- np = fname;
- else {
- np++;
- (void)strncpy(name, fname, (size_t)(np - fname));
- sp = name + (np - fname);
- }
-
- for (part_id = 0; ret == 0 && part_id < nparts; part_id++) {
- (void)sprintf(sp, PART_NAME, np, part_id);
- ret = __env_fileid_reset(env, ip, sp, encrypted);
- }
-
- __os_free(env, name);
- return (ret);
-}
-#ifndef HAVE_BREW
-/*
- * __part_key_range --
- * Return proportion of keys relative to given key.
- *
- * PUBLIC: int __part_key_range __P((DBC *, DBT *, DB_KEY_RANGE *, u_int32_t));
- */
-int
-__part_key_range(dbc, dbt, kp, flags)
- DBC *dbc;
- DBT *dbt;
- DB_KEY_RANGE *kp;
- u_int32_t flags;
-{
- BTREE_CURSOR *cp;
- DBC *new_dbc;
- DB_PARTITION *part;
- PAGE *h;
- u_int32_t id, part_id;
- u_int32_t elems, empty, less_elems, my_elems, greater_elems;
- u_int32_t levels, max_levels, my_levels;
- int ret;
- double total_elems;
-
- COMPQUIET(flags, 0);
-
- part = dbc->dbp->p_internal;
-
- /*
- * First we find the key range for the partition that contains the
- * key. Then we scale based on estimates of the other partitions.
- */
- if (F_ISSET(part, PART_CALLBACK))
- part_id = part->callback(dbc->dbp, dbt) % part->nparts;
- else
- __part_search(dbc->dbp, part, dbt, &part_id);
- GET_PART_CURSOR(dbc, new_dbc, part_id);
-
- if ((ret = __bam_key_range(new_dbc, dbt, kp, flags)) != 0)
- goto err;
-
- cp = (BTREE_CURSOR *)new_dbc->internal;
-
- if ((ret = __memp_fget(new_dbc->dbp->mpf,
- &cp->root, new_dbc->thread_info, new_dbc->txn, 0, &h)) != 0)
- goto c_err;
-
- my_elems = NUM_ENT(h);
- my_levels = LEVEL(h);
- max_levels = my_levels;
-
- if ((ret = __memp_fput(new_dbc->dbp->mpf,
- new_dbc->thread_info, h, new_dbc->priority)) != 0)
- goto c_err;
-
- if ((ret = __dbc_close(new_dbc)) != 0)
- goto err;
- /*
- * We have the range within one subtree. Now estimate
- * what part of the whole range that subtree is. Figure
- * out how many levels each part has and how many entries
- * in the level below the root.
- */
- empty = less_elems = greater_elems = 0;
- for (id = 0; id < part->nparts; id++) {
- if (id == part_id) {
- empty = 0;
- continue;
- }
- GET_PART_CURSOR(dbc, new_dbc, id);
- cp = (BTREE_CURSOR *)new_dbc->internal;
- if ((ret = __memp_fget(new_dbc->dbp->mpf, &cp->root,
- new_dbc->thread_info, new_dbc->txn, 0, &h)) != 0)
- goto c_err;
-
- elems = NUM_ENT(h);
- levels = LEVEL(h);
- if (levels == 1)
- elems /= 2;
-
- if ((ret = __memp_fput(new_dbc->dbp->mpf,
- new_dbc->thread_info, h, new_dbc->priority)) != 0)
- goto c_err;
-
- if ((ret = __dbc_close(new_dbc)) != 0)
- goto err;
-
- /* If the tree is empty, ignore it. */
- if (elems == 0) {
- empty++;
- continue;
- }
-
- /*
- * If a tree has fewer levels than the max just count
- * it as a single element in the higher level.
- */
- if (id < part_id) {
- if (levels > max_levels) {
- max_levels = levels;
- less_elems = id + elems - empty;
- } else if (levels < max_levels)
- less_elems++;
- else
- less_elems += elems;
- } else {
- if (levels > max_levels) {
- max_levels = levels;
- greater_elems = (id - part_id) + elems - empty;
- } else if (levels < max_levels)
- greater_elems++;
- else
- greater_elems += elems;
- }
-
- }
-
- if (my_levels < max_levels) {
- /*
- * The subtree containing the key is not the tallest one.
- * Reduce its share by the number of records at the highest
- * level. Scale the greater and lesser components up
- * by the number of records on either side of this
- * subtree.
- */
- total_elems = 1 + greater_elems + less_elems;
- kp->equal /= total_elems;
- kp->less /= total_elems;
- kp->less += less_elems/total_elems;
- kp->greater /= total_elems;
- kp->greater += greater_elems/total_elems;
- } else if (my_levels == max_levels) {
- /*
- * The key is in one of the tallest subtrees. We will
- * scale the values by the ratio of the records at the
- * top of this stubtree to the number of records at the
- * highest level.
- */
- total_elems = greater_elems + less_elems;
- if (total_elems != 0) {
- /*
- * First scale down by the fraction of elements
- * in this subtree.
- */
- total_elems += my_elems;
- kp->equal *= my_elems;
- kp->equal /= total_elems;
- kp->less *= my_elems;
- kp->less /= total_elems;
- kp->greater *= my_elems;
- kp->greater /= total_elems;
- /*
- * Proportially add weight from the subtrees to the
- * left and right of this one.
- */
- kp->less += less_elems / total_elems;
- kp->greater += greater_elems / total_elems;
- }
- }
-
- if (0) {
-c_err: (void)__dbc_close(new_dbc);
- }
-
-err: return (ret);
-}
-#endif
-
-/*
- * __part_remove --
- * Remove method for a partitioned database.
- *
- * PUBLIC: int __part_remove __P((DB *, DB_THREAD_INFO *,
- * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t));
- */
-int
-__part_remove(dbp, ip, txn, name, subdb, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *name, *subdb;
- u_int32_t flags;
-{
- return (__part_rr(dbp, ip, txn, name, subdb, NULL, flags));
-}
-
-/*
- * __part_rename --
- * Rename method for a partitioned database.
- *
- * PUBLIC: int __part_rename __P((DB *, DB_THREAD_INFO *,
- * PUBLIC: DB_TXN *, const char *, const char *, const char *));
- */
-int
-__part_rename(dbp, ip, txn, name, subdb, newname)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *name, *subdb, *newname;
-{
- return (__part_rr(dbp, ip, txn, name, subdb, newname, 0));
-}
-
-/*
- * __part_rr --
- * Remove/Rename method for a partitioned database.
- */
-static int
-__part_rr(dbp, ip, txn, name, subdb, newname, flags)
- DB *dbp;
- DB_THREAD_INFO *ip;
- DB_TXN *txn;
- const char *name, *subdb, *newname;
- u_int32_t flags;
-{
- DB **pdbp, *ptmpdbp, *tmpdbp;
- DB_PARTITION *part;
- ENV *env;
- u_int32_t i;
- int ret, t_ret;
- char *np;
-
- env = dbp->env;
- ret = 0;
-
- if (subdb != NULL && name != NULL) {
- __db_errx(env,
- "A partitioned database can not be in a multiple databases file");
- return (EINVAL);
- }
- ENV_GET_THREAD_INFO(env, ip);
-
- /*
- * Since rename no longer opens the database, we have
- * to do it here.
- */
- if ((ret = __db_create_internal(&tmpdbp, env, 0)) != 0)
- return (ret);
-
- /*
- * We need to make sure we don't self-deadlock, so give
- * this dbp the same locker as the incoming one.
- */
- tmpdbp->locker = dbp->locker;
- if ((ret = __db_open(tmpdbp, ip, txn, name, NULL, dbp->type,
- DB_RDWRMASTER | DB_RDONLY, 0, PGNO_BASE_MD)) != 0)
- goto err;
-
- part = tmpdbp->p_internal;
- pdbp = part->handles;
- COMPQUIET(np, NULL);
- if (newname != NULL && (ret = __os_malloc(env,
- strlen(newname) + PART_LEN + 1, &np)) != 0) {
- __db_errx(env, Alloc_err, strlen(newname) + PART_LEN + 1);
- goto err;
- }
- for (i = 0; i < part->nparts; i++, pdbp++) {
- if ((ret = __db_create_internal(&ptmpdbp, env, 0)) != 0)
- break;
- ptmpdbp->locker = (*pdbp)->locker;
- if (newname == NULL)
- ret = __db_remove_int(ptmpdbp,
- ip, txn, (*pdbp)->fname, NULL, flags);
- else {
- DB_ASSERT(env, np != NULL);
- (void)sprintf(np, PART_NAME, newname, i);
- ret = __db_rename_int(ptmpdbp,
- ip, txn, (*pdbp)->fname, NULL, np);
- }
- ptmpdbp->locker = NULL;
- (void)__db_close(ptmpdbp, NULL, DB_NOSYNC);
- if (ret != 0)
- break;
- }
-
- if (newname != NULL)
- __os_free(env, np);
-
- if (!F_ISSET(dbp, DB_AM_OPEN_CALLED)) {
-err: /*
- * Since we copied the locker ID from the dbp, we'd better not
- * free it here.
- */
- tmpdbp->locker = NULL;
-
- /* We need to remove the lock event we associated with this. */
- if (txn != NULL)
- __txn_remlock(env,
- txn, &tmpdbp->handle_lock, DB_LOCK_INVALIDID);
-
- if ((t_ret = __db_close(tmpdbp,
- txn, DB_NOSYNC)) != 0 && ret == 0)
- ret = t_ret;
- }
- return (ret);
-}
-#ifdef HAVE_VERIFY
-/*
- * __part_verify --
- * Verify a partitioned database.
- *
- * PUBLIC: int __part_verify __P((DB *, VRFY_DBINFO *, const char *,
- * PUBLIC: void *, int (*)(void *, const void *), u_int32_t));
- */
-int
-__part_verify(dbp, vdp, fname, handle, callback, flags)
- DB *dbp;
- VRFY_DBINFO *vdp;
- const char *fname;
- void *handle;
- int (*callback) __P((void *, const void *));
- u_int32_t flags;
-{
- BINTERNAL *lp, *rp;
- DB **pdbp;
- DB_PARTITION *part;
- DBC *dbc;
- DBT *key;
- ENV *env;
- DB_THREAD_INFO *ip;
- u_int32_t i;
- int ret, t_ret;
-
- env = dbp->env;
- lp = rp = NULL;
- dbc = NULL;
- ip = vdp->thread_info;
-
- if (dbp->type == DB_BTREE) {
- if ((ret = __bam_open(dbp, ip,
- NULL, fname, PGNO_BASE_MD, flags)) != 0)
- goto err;
- }
-#ifdef HAVE_HASH
- else if ((ret = __ham_open(dbp, ip,
- NULL, fname, PGNO_BASE_MD, flags)) != 0)
- goto err;
-#endif
-
- /*
- * Initalize partition db handles and get the names. Set DB_RDWRMASTER
- * because we may not have the partition callback, but we can still
- * look at the structure of the tree.
- */
- if ((ret = __partition_open(dbp,
- ip, NULL, fname, dbp->type, flags | DB_RDWRMASTER, 0, 0)) != 0)
- goto err;
- part = dbp->p_internal;
-
- if (LF_ISSET(DB_SALVAGE)) {
- /* If we are being aggressive we don't want to dump the keys. */
- if (LF_ISSET(DB_AGGRESSIVE))
- dbp->p_internal = NULL;
- ret = __db_prheader(dbp,
- NULL, 0, 0, handle, callback, vdp, PGNO_BASE_MD);
- dbp->p_internal = part;
- if (ret != 0)
- goto err;
- }
-
- if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0)
- goto err;
-
- pdbp = part->handles;
- for (i = 0; i < part->nparts; i++, pdbp++) {
- if (!F_ISSET(part, PART_RANGE) || part->keys == NULL)
- goto vrfy;
- if (lp != NULL)
- __os_free(env, lp);
- lp = rp;
- rp = NULL;
- if (i + 1 < part->nparts) {
- key = &part->keys[i + 1];
- if ((ret = __os_malloc(env,
- BINTERNAL_SIZE(key->size), &rp)) != 0)
- goto err;
- rp->len = key->size;
- memcpy(rp->data, key->data, key->size);
- B_TSET(rp->type, B_KEYDATA);
- }
-vrfy: if ((t_ret = __db_verify(*pdbp, ip, (*pdbp)->fname,
- NULL, handle, callback,
- lp, rp, flags | DB_VERIFY_PARTITION)) != 0 && ret == 0)
- ret = t_ret;
- }
-
-err: if (lp != NULL)
- __os_free(env, lp);
- if (rp != NULL)
- __os_free(env, rp);
- return (ret);
-}
-#endif
-
-#ifdef CONFIG_TEST
-/*
- * __part_testdocopy -- copy all partitions for testing purposes.
- *
- * PUBLIC: int __part_testdocopy __P((DB *, const char *));
- */
-int
-__part_testdocopy(dbp, name)
- DB *dbp;
- const char *name;
-{
- DB **pdbp;
- DB_PARTITION *part;
- u_int32_t i;
- int ret;
-
- if ((ret = __db_testdocopy(dbp->env, name)) != 0)
- return (ret);
-
- part = dbp->p_internal;
- pdbp = part->handles;
- for (i = 0; i < part->nparts; i++, pdbp++)
- if ((ret = __db_testdocopy(dbp->env, (*pdbp)->fname)) != 0)
- return (ret);
-
- return (0);
-}
-#endif
-#else
-/*
- * __db_nopartition --
- * Error when a Berkeley DB build doesn't include partitioning.
- *
- * PUBLIC: int __db_no_partition __P((ENV *));
- */
-int
-__db_no_partition(env)
- ENV *env;
-{
- __db_errx(env,
- "library build did not include support for the database partitioning");
- return (DB_OPNOTSUP);
-}
-/*
- * __partition_set --
- * Set the partitioning keys or callback function.
- * This routine must be called prior to creating the database.
- * PUBLIC: int __partition_set __P((DB *, u_int32_t, DBT *,
- * PUBLIC: u_int32_t (*callback)(DB *, DBT *key)));
- */
-
-int
-__partition_set(dbp, parts, keys, callback)
- DB *dbp;
- u_int32_t parts;
- DBT *keys;
- u_int32_t (*callback)(DB *, DBT *key);
-{
- COMPQUIET(parts, 0);
- COMPQUIET(keys, NULL);
- COMPQUIET(callback, NULL);
-
- return (__db_no_partition(dbp->env));
-}
-
-/*
- * __partition_get_callback --
- * Set the partition callback function. This routine must be called
- * prior to opening a partition database that requires a function.
- * PUBLIC: int __partition_get_callback __P((DB *,
- * PUBLIC: u_int32_t *, u_int32_t (**callback)(DB *, DBT *key)));
- */
-int
-__partition_get_callback(dbp, parts, callback)
- DB *dbp;
- u_int32_t *parts;
- u_int32_t (**callback)(DB *, DBT *key);
-{
- COMPQUIET(parts, NULL);
- COMPQUIET(callback, NULL);
-
- return (__db_no_partition(dbp->env));
-}
-
-/*
- * __partition_get_dirs --
- * Get partition dirs.
- * PUBLIC: int __partition_get_dirs __P((DB *, const char ***));
- */
-int
-__partition_get_dirs(dbp, dirpp)
- DB *dbp;
- const char ***dirpp;
-{
- COMPQUIET(dirpp, NULL);
- return (__db_no_partition(dbp->env));
-}
-
-/*
- * __partition_get_keys --
- * Get partition keys.
- * PUBLIC: int __partition_get_keys __P((DB *, u_int32_t *, DBT **));
- */
-int
-__partition_get_keys(dbp, parts, keys)
- DB *dbp;
- u_int32_t *parts;
- DBT **keys;
-{
- COMPQUIET(parts, NULL);
- COMPQUIET(keys, NULL);
-
- return (__db_no_partition(dbp->env));
-}
-/*
- * __partition_init --
- * Initialize the partition structure.
- * Called when the meta data page is read in during database open or
- * when partition keys or a callback are set.
- *
- * PUBLIC: int __partition_init __P((DB *, u_int32_t));
- */
-int
-__partition_init(dbp, flags)
- DB *dbp;
- u_int32_t flags;
-{
- COMPQUIET(flags, 0);
-
- return (__db_no_partition(dbp->env));
-}
-/*
- * __part_fileid_reset --
- * reset the fileid on each partition.
- *
- * PUBLIC: int __part_fileid_reset
- * PUBLIC: __P((ENV *, DB_THREAD_INFO *, const char *, u_int32_t, int));
- */
-int
-__part_fileid_reset(env, ip, fname, nparts, encrypted)
- ENV *env;
- DB_THREAD_INFO *ip;
- const char *fname;
- u_int32_t nparts;
- int encrypted;
-{
- COMPQUIET(ip, NULL);
- COMPQUIET(fname, NULL);
- COMPQUIET(nparts, 0);
- COMPQUIET(encrypted, 0);
-
- return (__db_no_partition(env));
-}
-/*
- * __partition_set_dirs --
- * Set the directories for creating the partition databases.
- * They must be in the environment.
- * PUBLIC: int __partition_set_dirs __P((DB *, const char **));
- */
-int
-__partition_set_dirs(dbp, dirp)
- DB *dbp;
- const char **dirp;
-{
- COMPQUIET(dirp, NULL);
-
- return (__db_no_partition(dbp->env));
-}
-#endif
diff --git a/db/tags b/db/tags
new file mode 100644
index 0000000..f9c1143
--- /dev/null
+++ b/db/tags
@@ -0,0 +1,205 @@
+BT_CLR ../btree/btree.h /^#define BT_CLR(t) (t->bt_sp = t->bt_stack)$/
+BT_POP ../btree/btree.h /^#define BT_POP(t) (t->bt_sp == t->bt_stack ? NULL /
+BT_PUSH ../btree/btree.h /^#define BT_PUSH(t, p, i) { \\$/
+BUCKET_TO_PAGE ../hash/hash.h /^#define BUCKET_TO_PAGE(B) \\$/
+BUF_INSERT ../hash/hash_buf.c /^#define BUF_INSERT(B, P) { \\$/
+BUF_REMOVE ../hash/hash_buf.c /^#define BUF_REMOVE(B) { \\$/
+CLRBIT ../hash/hash.h /^#define CLRBIT(A, N) ((A)[(N)\/BITS_PER_MAP] &= ~(/
+DODISK ../hash/hash.h /^#define DODISK(X) ((X) = (char *)((ptrdiff_t)(X)|0/
+DOMOD ../hash/hash.h /^#define DOMOD(X) ((X) = (char *)((ptrdiff_t)(X)|0x/
+FREESPACE ../hash/page.h /^#define FREESPACE(P) ((P)[(P)[0]+1])$/
+F_CLR ../btree/btree.h /^#define F_CLR(p, f) (p)->flags &= ~(f)$/
+F_ISSET ../btree/btree.h /^#define F_ISSET(p, f) ((p)->flags & (f))$/
+F_SET ../btree/btree.h /^#define F_SET(p, f) (p)->flags |= (f)$/
+GETBINTERNAL ../btree/btree.h /^#define GETBINTERNAL(pg, indx) \\$/
+GETBLEAF ../btree/btree.h /^#define GETBLEAF(pg, indx) \\$/
+GETRINTERNAL ../btree/btree.h /^#define GETRINTERNAL(pg, indx) \\$/
+GETRLEAF ../btree/btree.h /^#define GETRLEAF(pg, indx) \\$/
+HASHKEY ../include/mpool.h /^#define HASHKEY(pgno) ((pgno - 1) % HASHSIZE)$/
+ISDISK ../hash/hash.h /^#define ISDISK(X) ((u_int32_t)(ptrdiff_t)(X)&0x2)$/
+ISMOD ../hash/hash.h /^#define ISMOD(X) ((u_int32_t)(ptrdiff_t)(X)&0x1)$/
+ISSET ../hash/hash.h /^#define ISSET(A, N) ((A)[(N)\/BITS_PER_MAP] & (1<</
+IS_BUCKET ../hash/hash.h /^#define IS_BUCKET(X) ((X) & BUF_BUCKET)$/
+KEYSIZE ../hash/page.h /^#define KEYSIZE(K) (4*sizeof(u_int16_t) + (K)->siz/
+LALIGN ../btree/btree.h /^#define LALIGN(n) (((n) + sizeof(pgno_t) - 1) & ~(/
+LRU_INSERT ../hash/hash_buf.c /^#define LRU_INSERT(B) BUF_INSERT((B), LRU)$/
+MOD ../hash/hash.c /^#define MOD(x, y) ((x) & ((y) - 1))$/
+MRU_INSERT ../hash/hash_buf.c /^#define MRU_INSERT(B) BUF_INSERT((B), &hashp->bufh/
+M_16_SWAP ../include/db.h /^#define M_16_SWAP(a) { \\$/
+M_32_SWAP ../include/db.h /^#define M_32_SWAP(a) { \\$/
+NBINTERNAL ../btree/btree.h /^#define NBINTERNAL(len) \\$/
+NBLEAF ../btree/btree.h /^#define NBLEAF(p) NBLEAFDBT((p)->ksize, (p)->dsize/
+NBLEAFDBT ../btree/btree.h /^#define NBLEAFDBT(ksize, dsize) \\$/
+NEXTINDEX ../btree/btree.h /^#define NEXTINDEX(p) (((p)->lower - BTDATAOFF) \/ /
+NRLEAF ../btree/btree.h /^#define NRLEAF(p) NRLEAFDBT((p)->dsize)$/
+NRLEAFDBT ../btree/btree.h /^#define NRLEAFDBT(dsize) \\$/
+OADDR_OF ../hash/hash.h /^#define OADDR_OF(S,O) ((u_int32_t)((u_int32_t)(S) /
+OADDR_TO_PAGE ../hash/hash.h /^#define OADDR_TO_PAGE(B) \\$/
+OFFSET ../hash/page.h /^#define OFFSET(P) ((P)[(P)[0]+2])$/
+OPAGENUM ../hash/hash.h /^#define OPAGENUM(N) ((N) & SPLITMASK)$/
+PAGE_INIT ../hash/hash_page.c /^#define PAGE_INIT(P) { \\$/
+PAGE_META ../hash/page.h /^#define PAGE_META(N) (((N)+3) * sizeof(u_int16_t))/
+PAIRFITS ../hash/page.h /^#define PAIRFITS(P,K,D) \\$/
+PAIRSIZE ../hash/page.h /^#define PAIRSIZE(K,D) (2*sizeof(u_int16_t) + (K)->/
+PTROF ../hash/hash.h /^#define PTROF(X) ((BUFHEAD *)((ptrdiff_t)(X)&~0x3)/
+P_16_COPY ../include/db.h /^#define P_16_COPY(a, b) { \\$/
+P_16_SWAP ../include/db.h /^#define P_16_SWAP(a) { \\$/
+P_32_COPY ../include/db.h /^#define P_32_COPY(a, b) { \\$/
+P_32_SWAP ../include/db.h /^#define P_32_SWAP(a) { \\$/
+RETURN_ERROR ../hash/hash.c /^#define RETURN_ERROR(ERR, LOC) { save_errno = ERR;/
+SETBIT ../hash/hash.h /^#define SETBIT(A, N) ((A)[(N)\/BITS_PER_MAP] |= (1/
+SPLITNUM ../hash/hash.h /^#define SPLITNUM(N) (((u_int32_t)(N)) >> SPLITSHIF/
+WR_BINTERNAL ../btree/btree.h /^#define WR_BINTERNAL(p, size, pgno, flags) { \\/
+WR_BLEAF ../btree/btree.h /^#define WR_BLEAF(p, key, data, flags) { \\$/
+WR_RINTERNAL ../btree/btree.h /^#define WR_RINTERNAL(p, nrecs, pgno) { \\$/
+WR_RLEAF ../btree/btree.h /^#define WR_RLEAF(p, data, flags) { \\$/
+X ../btree/bt_debug.c /^#define X(flag, name) \\$/
+__add_ovflpage ../hash/hash_page.c /^__add_ovflpage(hashp, bufp)$/
+__addel ../hash/hash_page.c /^__addel(hashp, bufp, key, val)$/
+__big_delete ../hash/hash_bigkey.c /^__big_delete(hashp, bufp)$/
+__big_insert ../hash/hash_bigkey.c /^__big_insert(hashp, bufp, key, val)$/
+__big_keydata ../hash/hash_bigkey.c /^__big_keydata(hashp, bufp, key, val, set)$/
+__big_return ../hash/hash_bigkey.c /^__big_return(hashp, bufp, ndx, val, set_current)$/
+__big_split ../hash/hash_bigkey.c /^__big_split(hashp, op, np, big_keyp, addr, obucket/
+__bt_bdelete ../btree/bt_delete.c /^__bt_bdelete(t, key)$/
+__bt_close ../btree/bt_close.c /^__bt_close(dbp)$/
+__bt_cmp ../btree/bt_utils.c /^__bt_cmp(t, k1, e)$/
+__bt_curdel ../btree/bt_delete.c /^__bt_curdel(t, key, h, index)$/
+__bt_defcmp ../btree/bt_utils.c /^__bt_defcmp(a, b)$/
+__bt_defpfx ../btree/bt_utils.c /^__bt_defpfx(a, b)$/
+__bt_delete ../btree/bt_delete.c /^__bt_delete(dbp, key, flags)$/
+__bt_dleaf ../btree/bt_delete.c /^__bt_dleaf(t, key, h, index)$/
+__bt_dmpage ../btree/bt_debug.c /^__bt_dmpage(h)$/
+__bt_dnpage ../btree/bt_debug.c /^__bt_dnpage(dbp, pgno)$/
+__bt_dpage ../btree/bt_debug.c /^__bt_dpage(h)$/
+__bt_dump ../btree/bt_debug.c /^__bt_dump(dbp)$/
+__bt_fd ../btree/bt_open.c /^__bt_fd(dbp)$/
+__bt_first ../btree/bt_seq.c /^__bt_first(t, key, erval, exactp)$/
+__bt_free ../btree/bt_page.c /^__bt_free(t, h)$/
+__bt_get ../btree/bt_get.c /^__bt_get(dbp, key, data, flags)$/
+__bt_new ../btree/bt_page.c /^__bt_new(t, npg)$/
+__bt_open ../btree/bt_open.c /^__bt_open(fname, flags, mode, openinfo, dflags)$/
+__bt_pdelete ../btree/bt_delete.c /^__bt_pdelete(t, h)$/
+__bt_pgin ../btree/bt_conv.c /^__bt_pgin(t, pg, pp)$/
+__bt_pgout ../btree/bt_conv.c /^__bt_pgout(t, pg, pp)$/
+__bt_put ../btree/bt_put.c /^__bt_put(dbp, key, data, flags)$/
+__bt_relink ../btree/bt_delete.c /^__bt_relink(t, h)$/
+__bt_ret ../btree/bt_utils.c /^__bt_ret(t, e, key, rkey, data, rdata, copy)$/
+__bt_search ../btree/bt_search.c /^__bt_search(t, key, exactp)$/
+__bt_seq ../btree/bt_seq.c /^__bt_seq(dbp, key, data, flags)$/
+__bt_seqadv ../btree/bt_seq.c /^__bt_seqadv(t, ep, flags)$/
+__bt_seqset ../btree/bt_seq.c /^__bt_seqset(t, ep, key, flags)$/
+__bt_setcur ../btree/bt_seq.c /^__bt_setcur(t, pgno, index)$/
+__bt_snext ../btree/bt_search.c /^__bt_snext(t, h, key, exactp)$/
+__bt_split ../btree/bt_split.c /^__bt_split(t, sp, key, data, flags, ilen, argskip)/
+__bt_sprev ../btree/bt_search.c /^__bt_sprev(t, h, key, exactp)$/
+__bt_stat ../btree/bt_debug.c /^__bt_stat(dbp)$/
+__bt_stkacq ../btree/bt_delete.c /^__bt_stkacq(t, hp, c)$/
+__bt_sync ../btree/bt_close.c /^__bt_sync(dbp, flags)$/
+__buf_free ../hash/hash_buf.c /^__buf_free(hashp, do_free, to_disk)$/
+__buf_init ../hash/hash_buf.c /^__buf_init(hashp, nbytes)$/
+__call_hash ../hash/hash.c /^__call_hash(hashp, k, len)$/
+__dberr ../db/db.c /^__dberr()$/
+__dbpanic ../db/db.c /^__dbpanic(dbp)$/
+__delpair ../hash/hash_page.c /^__delpair(hashp, bufp, ndx)$/
+__expand_table ../hash/hash.c /^__expand_table(hashp)$/
+__find_bigpair ../hash/hash_bigkey.c /^__find_bigpair(hashp, bufp, ndx, key, size)$/
+__find_last_page ../hash/hash_bigkey.c /^__find_last_page(hashp, bpp)$/
+__free_ovflpage ../hash/hash_page.c /^__free_ovflpage(hashp, obufp)$/
+__get_buf ../hash/hash_buf.c /^__get_buf(hashp, addr, prev_bp, newpage)$/
+__get_page ../hash/hash_page.c /^__get_page(hashp, p, bucket, is_bucket, is_disk, i/
+__hash_open ../hash/hash.c /^__hash_open(file, flags, mode, info, dflags)$/
+__ibitmap ../hash/hash_page.c /^__ibitmap(hashp, pnum, nbits, ndx)$/
+__log2 ../hash/hash_log2.c /^__log2(num)$/
+__ovfl_delete ../btree/bt_overflow.c /^__ovfl_delete(t, p)$/
+__ovfl_get ../btree/bt_overflow.c /^__ovfl_get(t, p, ssz, buf, bufsz)$/
+__ovfl_put ../btree/bt_overflow.c /^__ovfl_put(t, dbt, pg)$/
+__put_page ../hash/hash_page.c /^__put_page(hashp, p, bucket, is_bucket, is_bitmap)/
+__rec_close ../recno/rec_close.c /^__rec_close(dbp)$/
+__rec_delete ../recno/rec_delete.c /^__rec_delete(dbp, key, flags)$/
+__rec_dleaf ../recno/rec_delete.c /^__rec_dleaf(t, h, index)$/
+__rec_fd ../recno/rec_open.c /^__rec_fd(dbp)$/
+__rec_fmap ../recno/rec_get.c /^__rec_fmap(t, top)$/
+__rec_fpipe ../recno/rec_get.c /^__rec_fpipe(t, top)$/
+__rec_get ../recno/rec_get.c /^__rec_get(dbp, key, data, flags)$/
+__rec_iput ../recno/rec_put.c /^__rec_iput(t, nrec, data, flags)$/
+__rec_open ../recno/rec_open.c /^__rec_open(fname, flags, mode, openinfo, dflags)$/
+__rec_put ../recno/rec_put.c /^__rec_put(dbp, key, data, flags)$/
+__rec_ret ../recno/rec_utils.c /^__rec_ret(t, e, nrec, key, data)$/
+__rec_search ../recno/rec_search.c /^__rec_search(t, recno, op)$/
+__rec_seq ../recno/rec_seq.c /^__rec_seq(dbp, key, data, flags)$/
+__rec_sync ../recno/rec_close.c /^__rec_sync(dbp, flags)$/
+__rec_vmap ../recno/rec_get.c /^__rec_vmap(t, top)$/
+__rec_vpipe ../recno/rec_get.c /^__rec_vpipe(t, top)$/
+__reclaim_buf ../hash/hash_buf.c /^__reclaim_buf(hashp, bp)$/
+__split_page ../hash/hash_page.c /^__split_page(hashp, obucket, nbucket)$/
+alloc_segs ../hash/hash.c /^alloc_segs(hashp, nsegs)$/
+bt_broot ../btree/bt_split.c /^bt_broot(t, h, l, r)$/
+bt_fast ../btree/bt_put.c /^bt_fast(t, key, data, exactp)$/
+bt_meta ../btree/bt_close.c /^bt_meta(t)$/
+bt_page ../btree/bt_split.c /^bt_page(t, h, lp, rp, skip, ilen)$/
+bt_preserve ../btree/bt_split.c /^bt_preserve(t, pg)$/
+bt_psplit ../btree/bt_split.c /^bt_psplit(t, h, l, r, pskip, ilen)$/
+bt_root ../btree/bt_split.c /^bt_root(t, h, lp, rp, skip, ilen)$/
+bt_rroot ../btree/bt_split.c /^bt_rroot(t, h, l, r)$/
+byteorder ../btree/bt_open.c /^byteorder()$/
+collect_data ../hash/hash_bigkey.c /^collect_data(hashp, bufp, len, set)$/
+collect_key ../hash/hash_bigkey.c /^collect_key(hashp, bufp, len, val, set)$/
+dbm_clearerr ../hash/ndbm.c /^dbm_clearerr(db)$/
+dbm_close ../hash/ndbm.c /^dbm_close(db)$/
+dbm_delete ../hash/ndbm.c /^dbm_delete(db, key)$/
+dbm_dirfno ../hash/ndbm.c /^dbm_dirfno(db)$/
+dbm_error ../hash/ndbm.c /^dbm_error(db)$/
+dbm_fetch ../hash/ndbm.c /^dbm_fetch(db, key)$/
+dbm_firstkey ../hash/ndbm.c /^dbm_firstkey(db)$/
+dbm_nextkey ../hash/ndbm.c /^dbm_nextkey(db)$/
+dbm_open ../hash/ndbm.c /^dbm_open(file, flags, mode)$/
+dbm_store ../hash/ndbm.c /^dbm_store(db, key, content, flags)$/
+dbopen ../db/db.c /^dbopen(fname, flags, mode, type, openinfo)$/
+dcharhash ../hash/hash_func.c /^#define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x/
+fetch_bitmap ../hash/hash_page.c /^fetch_bitmap(hashp, ndx)$/
+first_free ../hash/hash_page.c /^first_free(map)$/
+flush_meta ../hash/hash.c /^flush_meta(hashp)$/
+hash2 ../hash/hash_func.c /^hash2(keyarg, len)$/
+hash3 ../hash/hash_func.c /^hash3(keyarg, len)$/
+hash4 ../hash/hash_func.c /^hash4(keyarg, len)$/
+hash_access ../hash/hash.c /^hash_access(hashp, action, key, val)$/
+hash_close ../hash/hash.c /^hash_close(dbp)$/
+hash_delete ../hash/hash.c /^hash_delete(dbp, key, flag)$/
+hash_fd ../hash/hash.c /^hash_fd(dbp)$/
+hash_get ../hash/hash.c /^hash_get(dbp, key, data, flag)$/
+hash_put ../hash/hash.c /^hash_put(dbp, key, data, flag)$/
+hash_realloc ../hash/hash.c /^hash_realloc(p_ptr, oldsize, newsize)$/
+hash_seq ../hash/hash.c /^hash_seq(dbp, key, data, flag)$/
+hash_sync ../hash/hash.c /^hash_sync(dbp, flags)$/
+hcreate ../hash/hsearch.c /^hcreate(nel)$/
+hdestroy ../hash/hash.c /^hdestroy(hashp)$/
+hsearch ../hash/hsearch.c /^hsearch(item, action)$/
+init_hash ../hash/hash.c /^init_hash(hashp, file, info)$/
+init_htab ../hash/hash.c /^init_htab(hashp, nelem)$/
+mpool_bkt ../mpool/mpool.c /^mpool_bkt(mp)$/
+mpool_close ../mpool/mpool.c /^mpool_close(mp)$/
+mpool_filter ../mpool/mpool.c /^mpool_filter(mp, pgin, pgout, pgcookie)$/
+mpool_get ../mpool/mpool.c /^mpool_get(mp, pgno, flags)$/
+mpool_look ../mpool/mpool.c /^mpool_look(mp, pgno)$/
+mpool_new ../mpool/mpool.c /^mpool_new(mp, pgnoaddr)$/
+mpool_open ../mpool/mpool.c /^mpool_open(key, fd, pagesize, maxcache)$/
+mpool_put ../mpool/mpool.c /^mpool_put(mp, page, flags)$/
+mpool_stat ../mpool/mpool.c /^mpool_stat(mp)$/
+mpool_sync ../mpool/mpool.c /^mpool_sync(mp)$/
+mpool_write ../mpool/mpool.c /^mpool_write(mp, bp)$/
+mswap ../btree/bt_conv.c /^mswap(pg)$/
+newbuf ../hash/hash_buf.c /^newbuf(hashp, addr, prev_bp)$/
+nroot ../btree/bt_open.c /^nroot(t)$/
+open_temp ../hash/hash_page.c /^open_temp(hashp)$/
+overflow_page ../hash/hash_page.c /^overflow_page(hashp)$/
+print_chain ../hash/hash_page.c /^print_chain(addr)$/
+putpair ../hash/hash_page.c /^putpair(p, key, val)$/
+rec_rdelete ../recno/rec_delete.c /^rec_rdelete(t, nrec)$/
+rec_total ../btree/bt_split.c /^rec_total(h)$/
+squeeze_key ../hash/hash_page.c /^squeeze_key(sp, key, val)$/
+swap_header ../hash/hash.c /^swap_header(hashp)$/
+swap_header_copy ../hash/hash.c /^swap_header_copy(srcp, destp)$/
+tmp ../btree/bt_open.c /^tmp()$/
+u_int32_t ../hash/extern.h /^extern u_int32_t (*__default_hash) __P((const void/
+ugly_split ../hash/hash_page.c /^ugly_split(hashp, obucket, old_bufp, new_bufp, cop/