summaryrefslogtreecommitdiff
path: root/db/btree
diff options
context:
space:
mode:
authorjbj <devnull@localhost>2003-12-15 21:42:09 +0000
committerjbj <devnull@localhost>2003-12-15 21:42:09 +0000
commit8960e3895f7af91126465368dff8fbb36ab4e853 (patch)
tree3c515e39dde0e88edeb806ea87d08524ba25c761 /db/btree
parent752cac72e220dcad4e6fce39508e714e59e3e0a1 (diff)
downloadrpm-8960e3895f7af91126465368dff8fbb36ab4e853.tar.gz
rpm-8960e3895f7af91126465368dff8fbb36ab4e853.tar.bz2
rpm-8960e3895f7af91126465368dff8fbb36ab4e853.zip
- upgrade to db-4.2.52.
CVS patchset: 6972 CVS date: 2003/12/15 21:42:09
Diffstat (limited to 'db/btree')
-rw-r--r--db/btree/bt_compare.c14
-rw-r--r--db/btree/bt_conv.c30
-rw-r--r--db/btree/bt_curadj.c87
-rw-r--r--db/btree/bt_cursor.c1332
-rw-r--r--db/btree/bt_delete.c187
-rw-r--r--db/btree/bt_method.c274
-rw-r--r--db/btree/bt_open.c432
-rw-r--r--db/btree/bt_put.c233
-rw-r--r--db/btree/bt_rec.c510
-rw-r--r--db/btree/bt_reclaim.c40
-rw-r--r--db/btree/bt_recno.c471
-rw-r--r--db/btree/bt_rsearch.c88
-rw-r--r--db/btree/bt_search.c120
-rw-r--r--db/btree/bt_split.c321
-rw-r--r--db/btree/bt_stat.c259
-rw-r--r--db/btree/bt_upgrade.c25
-rw-r--r--db/btree/bt_verify.c733
-rw-r--r--db/btree/btree.src142
-rw-r--r--db/btree/btree_auto.c3528
19 files changed, 5247 insertions, 3579 deletions
diff --git a/db/btree/bt_compare.c b/db/btree/bt_compare.c
index 91481c313..a329d8044 100644
--- a/db/btree/bt_compare.c
+++ b/db/btree/bt_compare.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_compare.c,v 11.12 2000/10/26 19:00:28 krinsky Exp $";
+static const char revid[] = "$Id: bt_compare.c,v 11.18 2003/01/08 04:00:56 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -51,8 +51,8 @@ static const char revid[] = "$Id: bt_compare.c,v 11.12 2000/10/26 19:00:28 krins
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
/*
* __bam_cmp --
@@ -92,7 +92,7 @@ __bam_cmp(dbp, dbt, h, indx, func, cmpp)
case P_LBTREE:
case P_LDUP:
case P_LRECNO:
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
if (B_TYPE(bk->type) == B_OVERFLOW)
bo = (BOVERFLOW *)bk;
else {
@@ -125,7 +125,7 @@ __bam_cmp(dbp, dbt, h, indx, func, cmpp)
return (0);
}
- bi = GET_BINTERNAL(h, indx);
+ bi = GET_BINTERNAL(dbp, h, indx);
if (B_TYPE(bi->type) == B_OVERFLOW)
bo = (BOVERFLOW *)(bi->data);
else {
@@ -136,7 +136,7 @@ __bam_cmp(dbp, dbt, h, indx, func, cmpp)
}
break;
default:
- return (__db_pgfmt(dbp, PGNO(h)));
+ return (__db_pgfmt(dbp->dbenv, PGNO(h)));
}
/*
diff --git a/db/btree/bt_conv.c b/db/btree/bt_conv.c
index fd30f375f..fd80d8a4c 100644
--- a/db/btree/bt_conv.c
+++ b/db/btree/bt_conv.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_conv.c,v 11.6 2000/03/31 00:30:26 ubell Exp $";
+static const char revid[] = "$Id: bt_conv.c,v 11.14 2003/01/08 04:00:56 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -16,20 +16,21 @@ static const char revid[] = "$Id: bt_conv.c,v 11.6 2000/03/31 00:30:26 ubell Exp
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_swap.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_swap.h"
+#include "dbinc/btree.h"
/*
* __bam_pgin --
* Convert host-specific page layout from the host-independent format
* stored on disk.
*
- * PUBLIC: int __bam_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *));
+ * PUBLIC: int __bam_pgin __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *));
*/
int
-__bam_pgin(dbenv, pg, pp, cookie)
+__bam_pgin(dbenv, dummydbp, pg, pp, cookie)
DB_ENV *dbenv;
+ DB *dummydbp;
db_pgno_t pg;
void *pp;
DBT *cookie;
@@ -38,12 +39,12 @@ __bam_pgin(dbenv, pg, pp, cookie)
PAGE *h;
pginfo = (DB_PGINFO *)cookie->data;
- if (!pginfo->needswap)
+ if (!F_ISSET(pginfo, DB_AM_SWAP))
return (0);
h = pp;
return (TYPE(h) == P_BTREEMETA ? __bam_mswap(pp) :
- __db_byteswap(dbenv, pg, pp, pginfo->db_pagesize, 1));
+ __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 1));
}
/*
@@ -51,11 +52,12 @@ __bam_pgin(dbenv, pg, pp, cookie)
* Convert host-specific page layout to the host-independent format
* stored on disk.
*
- * PUBLIC: int __bam_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *));
+ * PUBLIC: int __bam_pgout __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *));
*/
int
-__bam_pgout(dbenv, pg, pp, cookie)
+__bam_pgout(dbenv, dummydbp, pg, pp, cookie)
DB_ENV *dbenv;
+ DB *dummydbp;
db_pgno_t pg;
void *pp;
DBT *cookie;
@@ -64,12 +66,12 @@ __bam_pgout(dbenv, pg, pp, cookie)
PAGE *h;
pginfo = (DB_PGINFO *)cookie->data;
- if (!pginfo->needswap)
+ if (!F_ISSET(pginfo, DB_AM_SWAP))
return (0);
h = pp;
return (TYPE(h) == P_BTREEMETA ? __bam_mswap(pp) :
- __db_byteswap(dbenv, pg, pp, pginfo->db_pagesize, 0));
+ __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 0));
}
/*
@@ -93,6 +95,8 @@ __bam_mswap(pg)
SWAP32(p); /* re_len */
SWAP32(p); /* re_pad */
SWAP32(p); /* root */
+ p += 92 * sizeof(u_int32_t); /* unused */
+ SWAP32(p); /* crypto_magic */
return (0);
}
diff --git a/db/btree/bt_curadj.c b/db/btree/bt_curadj.c
index 011acd2f4..3da200c27 100644
--- a/db/btree/bt_curadj.c
+++ b/db/btree/bt_curadj.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_curadj.c,v 11.20 2001/01/17 16:15:49 bostic Exp $";
+static const char revid[] = "$Id: bt_curadj.c,v 11.34 2003/07/09 02:32:24 margo Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -16,9 +16,8 @@ static const char revid[] = "$Id: bt_curadj.c,v 11.20 2001/01/17 16:15:49 bostic
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-#include "txn.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
static int __bam_opd_cursor __P((DB *, DBC *, db_pgno_t, u_int32_t, u_int32_t));
@@ -99,6 +98,19 @@ __bam_ca_delete(dbp, pgno, indx, delete)
dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
cp = (BTREE_CURSOR *)dbc->internal;
if (cp->pgno == pgno && cp->indx == indx) {
+ /*
+ * [#8032] This assert is checking
+ * for possible race conditions where we
+ * hold a cursor position without a lock.
+ * Unfortunately, there are paths in the
+ * Btree code that do not satisfy these
+ * conditions. None of them are known to
+ * be a problem, but this assert should
+ * be re-activated when the Btree stack
+ * code is re-written.
+ DB_ASSERT(!STD_LOCKING(dbc) ||
+ cp->lock_mode != DB_LOCK_NG);
+ */
if (delete)
F_SET(cp, C_DELETED);
else
@@ -193,7 +205,10 @@ __bam_ca_di(my_dbc, pgno, indx, adjust)
if (cp->pgno == pgno && cp->indx >= indx) {
/* Cursor indices should never be negative. */
DB_ASSERT(cp->indx != 0 || adjust > 0);
-
+ /* [#8032]
+ DB_ASSERT(!STD_LOCKING(dbc) ||
+ cp->lock_mode != DB_LOCK_NG);
+ */
cp->indx += adjust;
if (my_txn != NULL && dbc->txn != my_txn)
found = 1;
@@ -203,10 +218,9 @@ __bam_ca_di(my_dbc, pgno, indx, adjust)
}
MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
- if (found != 0 && DB_LOGGING(my_dbc)) {
- if ((ret = __bam_curadj_log(dbenv,
- my_dbc->txn, &lsn, 0, dbp->log_fileid,
- DB_CA_DI, pgno, 0, 0, adjust, indx, 0)) != 0)
+ if (found != 0 && DBC_LOGGING(my_dbc)) {
+ if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
+ &lsn, 0, DB_CA_DI, pgno, 0, 0, adjust, indx, 0)) != 0)
return (ret);
}
@@ -234,8 +248,13 @@ __bam_opd_cursor(dbp, dbc, first, tpgno, ti)
* Allocate a new cursor and create the stack. If duplicates
* are sorted, we've just created an off-page duplicate Btree.
* If duplicates aren't sorted, we've just created a Recno tree.
+ *
+ * Note that in order to get here at all, there shouldn't be
+ * an old off-page dup cursor--to augment the checking db_c_newopd
+ * will do, assert this.
*/
- if ((ret = __db_c_newopd(dbc, tpgno, &dbc_nopd)) != 0)
+ DB_ASSERT(orig_cp->opd == NULL);
+ if ((ret = __db_c_newopd(dbc, tpgno, orig_cp->opd, &dbc_nopd)) != 0)
return (ret);
cp = (BTREE_CURSOR *)dbc_nopd->internal;
@@ -316,22 +335,25 @@ loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
continue;
MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+ /* [#8032]
+ DB_ASSERT(!STD_LOCKING(dbc) ||
+ orig_cp->lock_mode != DB_LOCK_NG);
+ */
if ((ret = __bam_opd_cursor(dbp,
dbc, first, tpgno, ti)) !=0)
return (ret);
if (my_txn != NULL && dbc->txn != my_txn)
found = 1;
- /* We released the MUTEX to get a cursor, start over. */
+ /* We released the mutex to get a cursor, start over. */
goto loop;
}
MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
}
MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
- if (found != 0 && DB_LOGGING(my_dbc)) {
- if ((ret = __bam_curadj_log(dbenv,
- my_dbc->txn, &lsn, 0, dbp->log_fileid,
- DB_CA_DUP, fpgno, tpgno, 0, first, fi, ti)) != 0)
+ if (found != 0 && DBC_LOGGING(my_dbc)) {
+ if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
+ &lsn, 0, DB_CA_DUP, fpgno, tpgno, 0, first, fi, ti)) != 0)
return (ret);
}
return (0);
@@ -372,18 +394,26 @@ loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
orig_cp = (BTREE_CURSOR *)dbc->internal;
+ /*
+ * A note on the orig_cp->opd != NULL requirement here:
+ * it's possible that there's a cursor that refers to
+ * the same duplicate set, but which has no opd cursor,
+ * because it refers to a different item and we took
+ * care of it while processing a previous record.
+ */
if (orig_cp->pgno != fpgno ||
orig_cp->indx != first ||
+ orig_cp->opd == NULL ||
((BTREE_CURSOR *)orig_cp->opd->internal)->indx
!= ti)
continue;
MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
- if ((ret = orig_cp->opd->c_close(orig_cp->opd)) != 0)
+ if ((ret = __db_c_close(orig_cp->opd)) != 0)
return (ret);
orig_cp->opd = NULL;
orig_cp->indx = fi;
/*
- * We released the MUTEX to free a cursor,
+ * We released the mutex to free a cursor,
* start over.
*/
goto loop;
@@ -432,6 +462,10 @@ __bam_ca_rsplit(my_dbc, fpgno, tpgno)
continue;
if (dbc->internal->pgno == fpgno) {
dbc->internal->pgno = tpgno;
+ /* [#8032]
+ DB_ASSERT(!STD_LOCKING(dbc) ||
+ dbc->internal->lock_mode != DB_LOCK_NG);
+ */
if (my_txn != NULL && dbc->txn != my_txn)
found = 1;
}
@@ -440,10 +474,9 @@ __bam_ca_rsplit(my_dbc, fpgno, tpgno)
}
MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
- if (found != 0 && DB_LOGGING(my_dbc)) {
- if ((ret = __bam_curadj_log(dbenv,
- my_dbc->txn, &lsn, 0, dbp->log_fileid,
- DB_CA_RSPLIT, fpgno, tpgno, 0, 0, 0, 0)) != 0)
+ if (found != 0 && DBC_LOGGING(my_dbc)) {
+ if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
+ &lsn, 0, DB_CA_RSPLIT, fpgno, tpgno, 0, 0, 0, 0)) != 0)
return (ret);
}
return (0);
@@ -497,6 +530,10 @@ __bam_ca_split(my_dbc, ppgno, lpgno, rpgno, split_indx, cleft)
continue;
cp = dbc->internal;
if (cp->pgno == ppgno) {
+ /* [#8032]
+ DB_ASSERT(!STD_LOCKING(dbc) ||
+ cp->lock_mode != DB_LOCK_NG);
+ */
if (my_txn != NULL && dbc->txn != my_txn)
found = 1;
if (cp->indx < split_indx) {
@@ -512,9 +549,9 @@ __bam_ca_split(my_dbc, ppgno, lpgno, rpgno, split_indx, cleft)
}
MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
- if (found != 0 && DB_LOGGING(my_dbc)) {
- if ((ret = __bam_curadj_log(dbenv, my_dbc->txn,
- &lsn, 0, dbp->log_fileid, DB_CA_SPLIT, ppgno, rpgno,
+ if (found != 0 && DBC_LOGGING(my_dbc)) {
+ if ((ret = __bam_curadj_log(dbp,
+ my_dbc->txn, &lsn, 0, DB_CA_SPLIT, ppgno, rpgno,
cleft ? lpgno : PGNO_INVALID, 0, split_indx, 0)) != 0)
return (ret);
}
diff --git a/db/btree/bt_cursor.c b/db/btree/bt_cursor.c
index 84ab7c807..067da53be 100644
--- a/db/btree/bt_cursor.c
+++ b/db/btree/bt_cursor.c
@@ -1,31 +1,30 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_cursor.c,v 11.88 2001/01/11 18:19:49 bostic Exp $";
+static const char revid[] = "$Id: bt_cursor.c,v 11.169 2003/11/19 18:41:06 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
-#include <stdlib.h>
#include <string.h>
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "btree.h"
-#include "lock.h"
-#include "qam.h"
-#include "common_ext.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/btree.h"
+#include "dbinc/lock.h"
+#include "dbinc/mp.h"
+static int __bam_bulk __P((DBC *, DBT *, u_int32_t));
static int __bam_c_close __P((DBC *, db_pgno_t, int *));
static int __bam_c_del __P((DBC *));
static int __bam_c_destroy __P((DBC *));
@@ -33,15 +32,16 @@ static int __bam_c_first __P((DBC *));
static int __bam_c_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
static int __bam_c_getstack __P((DBC *));
static int __bam_c_last __P((DBC *));
-static int __bam_c_next __P((DBC *, int));
+static int __bam_c_next __P((DBC *, int, int));
static int __bam_c_physdel __P((DBC *));
static int __bam_c_prev __P((DBC *));
static int __bam_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
-static void __bam_c_reset __P((BTREE_CURSOR *));
-static int __bam_c_search __P((DBC *, const DBT *, u_int32_t, int *));
+static int __bam_c_search __P((DBC *,
+ db_pgno_t, const DBT *, u_int32_t, int *));
static int __bam_c_writelock __P((DBC *));
-static int __bam_getboth_finddatum __P((DBC *, DBT *));
+static int __bam_getboth_finddatum __P((DBC *, DBT *, u_int32_t));
static int __bam_getbothc __P((DBC *, DBT *));
+static int __bam_get_prev __P((DBC *));
static int __bam_isopd __P((DBC *, db_pgno_t *));
/*
@@ -53,48 +53,64 @@ static int __bam_isopd __P((DBC *, db_pgno_t *));
* don't -- we don't duplicate locks when we duplicate cursors if we are
* running in a transaction environment as there's no point if locks are
* never discarded. This means that the cursor may or may not hold a lock.
+ * In the case where we are decending the tree we always want to
+ * unlock the held interior page so we use ACQUIRE_COUPLE.
*/
#undef ACQUIRE
-#define ACQUIRE(dbc, mode, lpgno, lock, fpgno, pagep, ret) {\
+#define ACQUIRE(dbc, mode, lpgno, lock, fpgno, pagep, ret) { \
+ DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \
if ((pagep) != NULL) { \
- ret = memp_fput((dbc)->dbp->mpf, pagep, 0); \
+ ret = __memp_fput(__mpf, pagep, 0); \
+ pagep = NULL; \
+ } else \
+ ret = 0; \
+ if ((ret) == 0 && STD_LOCKING(dbc)) \
+ ret = __db_lget(dbc, LCK_COUPLE, lpgno, mode, 0, &(lock));\
+ if ((ret) == 0) \
+ ret = __memp_fget(__mpf, &(fpgno), 0, &(pagep)); \
+}
+
+#undef ACQUIRE_COUPLE
+#define ACQUIRE_COUPLE(dbc, mode, lpgno, lock, fpgno, pagep, ret) { \
+ DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \
+ if ((pagep) != NULL) { \
+ ret = __memp_fput(__mpf, pagep, 0); \
pagep = NULL; \
} else \
ret = 0; \
if ((ret) == 0 && STD_LOCKING(dbc)) \
ret = __db_lget(dbc, \
- (lock).off == LOCK_INVALID ? 0 : LCK_COUPLE, \
- lpgno, mode, 0, &lock); \
- else \
- (lock).off = LOCK_INVALID; \
+ LCK_COUPLE_ALWAYS, lpgno, mode, 0, &(lock)); \
if ((ret) == 0) \
- ret = memp_fget((dbc)->dbp->mpf, &(fpgno), 0, &(pagep));\
+ ret = __memp_fget(__mpf, &(fpgno), 0, &(pagep)); \
}
/* Acquire a new page/lock for a cursor. */
#undef ACQUIRE_CUR
-#define ACQUIRE_CUR(dbc, mode, ret) { \
+#define ACQUIRE_CUR(dbc, mode, p, ret) { \
BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \
- ACQUIRE(dbc, mode, \
- __cp->pgno, __cp->lock, __cp->pgno, __cp->page, ret); \
- if ((ret) == 0) \
+ if (p != __cp->pgno) \
+ __cp->pgno = PGNO_INVALID; \
+ ACQUIRE(dbc, mode, p, __cp->lock, p, __cp->page, ret); \
+ if ((ret) == 0) { \
+ __cp->pgno = p; \
__cp->lock_mode = (mode); \
+ } \
}
/*
- * Acquire a new page/lock for a cursor, and move the cursor on success.
- * The reason that this is a separate macro is because we don't want to
- * set the pgno/indx fields in the cursor until we actually have the lock,
- * otherwise the cursor adjust routines will adjust the cursor even though
- * we're not really on the page.
+ * Acquire a new page/lock for a cursor and release the previous.
+ * This is typically used when decending a tree and we do not
+ * want to hold the interior nodes locked.
*/
-#undef ACQUIRE_CUR_SET
-#define ACQUIRE_CUR_SET(dbc, mode, p, ret) { \
+#undef ACQUIRE_CUR_COUPLE
+#define ACQUIRE_CUR_COUPLE(dbc, mode, p, ret) { \
BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \
- ACQUIRE(dbc, mode, p, __cp->lock, p, __cp->page, ret); \
+ if (p != __cp->pgno) \
+ __cp->pgno = PGNO_INVALID; \
+ ACQUIRE_COUPLE(dbc, mode, p, __cp->lock, p, __cp->page, ret); \
if ((ret) == 0) { \
- __cp->pgno = p; \
- __cp->indx = 0; \
+ __cp->pgno = p; \
__cp->lock_mode = (mode); \
} \
}
@@ -112,46 +128,37 @@ static int __bam_isopd __P((DBC *, db_pgno_t *));
if (STD_LOCKING(dbc) && \
__cp->lock_mode != DB_LOCK_WRITE && \
((ret) = __db_lget(dbc, \
- __cp->lock.off == LOCK_INVALID ? 0 : LCK_COUPLE, \
+ LOCK_ISSET(__cp->lock) ? LCK_COUPLE : 0, \
__cp->pgno, DB_LOCK_WRITE, 0, &__cp->lock)) == 0) \
__cp->lock_mode = DB_LOCK_WRITE; \
}
-/* Discard the current page/lock. */
-#undef DISCARD
-#define DISCARD(dbc, ldiscard, lock, pagep, ret) { \
- int __t_ret; \
- if ((pagep) != NULL) { \
- ret = memp_fput((dbc)->dbp->mpf, pagep, 0); \
- pagep = NULL; \
- } else \
- ret = 0; \
- if ((lock).off != LOCK_INVALID) { \
- __t_ret = ldiscard ? \
- __LPUT((dbc), lock): __TLPUT((dbc), lock); \
- if (__t_ret != 0 && (ret) == 0) \
- ret = __t_ret; \
- (lock).off = LOCK_INVALID; \
- } \
-}
-
/* Discard the current page/lock for a cursor. */
#undef DISCARD_CUR
#define DISCARD_CUR(dbc, ret) { \
BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \
- DISCARD(dbc, 0, __cp->lock, __cp->page, ret); \
- if ((ret) == 0) \
+ DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \
+ int __t_ret; \
+ if ((__cp->page) != NULL) { \
+ ret = __memp_fput(__mpf, __cp->page, 0); \
+ __cp->page = NULL; \
+ } else \
+ ret = 0; \
+ __t_ret = __TLPUT((dbc), __cp->lock); \
+ if (__t_ret != 0 && (ret) == 0) \
+ ret = __t_ret; \
+ if ((ret) == 0 && !LOCK_ISSET(__cp->lock)) \
__cp->lock_mode = DB_LOCK_NG; \
}
/* If on-page item is a deleted record. */
#undef IS_DELETED
-#define IS_DELETED(page, indx) \
- B_DISSET(GET_BKEYDATA(page, \
+#define IS_DELETED(dbp, page, indx) \
+ B_DISSET(GET_BKEYDATA(dbp, page, \
(indx) + (TYPE(page) == P_LBTREE ? O_INDX : 0))->type)
#undef IS_CUR_DELETED
#define IS_CUR_DELETED(dbc) \
- IS_DELETED((dbc)->internal->page, (dbc)->internal->indx)
+ IS_DELETED((dbc)->dbp, (dbc)->internal->page, (dbc)->internal->indx)
/*
* Test to see if two cursors could point to duplicates of the same key.
@@ -163,8 +170,8 @@ static int __bam_isopd __P((DBC *, db_pgno_t *));
*/
#undef IS_DUPLICATE
#define IS_DUPLICATE(dbc, i1, i2) \
- (((PAGE *)(dbc)->internal->page)->inp[i1] == \
- ((PAGE *)(dbc)->internal->page)->inp[i2])
+ (P_INP((dbc)->dbp,((PAGE *)(dbc)->internal->page))[i1] == \
+ P_INP((dbc)->dbp,((PAGE *)(dbc)->internal->page))[i2])
#undef IS_CUR_DUPLICATE
#define IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx) \
(F_ISSET(dbc, DBC_OPD) || \
@@ -172,22 +179,6 @@ static int __bam_isopd __P((DBC *, db_pgno_t *));
IS_DUPLICATE(dbc, (dbc)->internal->indx, orig_indx)))
/*
- * __bam_c_reset --
- * Initialize internal cursor structure.
- */
-static void
-__bam_c_reset(cp)
- BTREE_CURSOR *cp;
-{
- cp->csp = cp->sp;
- cp->lock.off = LOCK_INVALID;
- cp->lock_mode = DB_LOCK_NG;
- cp->recno = RECNO_OOB;
- cp->order = INVALID_ORDER;
- cp->flags = 0;
-}
-
-/*
* __bam_c_init --
* Initialize the access private portion of a cursor
*
@@ -198,35 +189,26 @@ __bam_c_init(dbc, dbtype)
DBC *dbc;
DBTYPE dbtype;
{
- BTREE *t;
- BTREE_CURSOR *cp;
- DB *dbp;
+ DB_ENV *dbenv;
int ret;
- u_int32_t minkey;
- dbp = dbc->dbp;
+ dbenv = dbc->dbp->dbenv;
/* Allocate/initialize the internal structure. */
- if (dbc->internal == NULL) {
- if ((ret = __os_malloc(dbp->dbenv,
- sizeof(BTREE_CURSOR), NULL, &cp)) != 0)
- return (ret);
- dbc->internal = (DBC_INTERNAL *)cp;
-
- cp->sp = cp->csp = cp->stack;
- cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]);
- } else
- cp = (BTREE_CURSOR *)dbc->internal;
- __bam_c_reset(cp);
+ if (dbc->internal == NULL && (ret =
+ __os_malloc(dbenv, sizeof(BTREE_CURSOR), &dbc->internal)) != 0)
+ return (ret);
/* Initialize methods. */
dbc->c_close = __db_c_close;
- dbc->c_count = __db_c_count;
- dbc->c_del = __db_c_del;
- dbc->c_dup = __db_c_dup;
- dbc->c_get = __db_c_get;
- dbc->c_put = __db_c_put;
+ dbc->c_count = __db_c_count_pp;
+ dbc->c_del = __db_c_del_pp;
+ dbc->c_dup = __db_c_dup_pp;
+ dbc->c_get = __db_c_get_pp;
+ dbc->c_pget = __db_c_pget_pp;
+ dbc->c_put = __db_c_put_pp;
if (dbtype == DB_BTREE) {
+ dbc->c_am_bulk = __bam_bulk;
dbc->c_am_close = __bam_c_close;
dbc->c_am_del = __bam_c_del;
dbc->c_am_destroy = __bam_c_destroy;
@@ -234,6 +216,7 @@ __bam_c_init(dbc, dbtype)
dbc->c_am_put = __bam_c_put;
dbc->c_am_writelock = __bam_c_writelock;
} else {
+ dbc->c_am_bulk = __bam_bulk;
dbc->c_am_close = __bam_c_close;
dbc->c_am_del = __ram_c_del;
dbc->c_am_destroy = __bam_c_destroy;
@@ -242,18 +225,6 @@ __bam_c_init(dbc, dbtype)
dbc->c_am_writelock = __bam_c_writelock;
}
- /*
- * The btree leaf page data structures require that two key/data pairs
- * (or four items) fit on a page, but other than that there's no fixed
- * requirement. The btree off-page duplicates only require two items,
- * to be exact, but requiring four for them as well seems reasonable.
- *
- * Recno uses the btree bt_ovflsize value -- it's close enough.
- */
- t = dbp->bt_internal;
- minkey = F_ISSET(dbc, DBC_OPD) ? 2 : t->bt_minkey;
- cp->ovflsize = B_MINKEY_TO_OVFLSIZE(minkey, dbp->pgsize);
-
return (0);
}
@@ -267,12 +238,13 @@ int
__bam_c_refresh(dbc)
DBC *dbc;
{
+ BTREE *t;
BTREE_CURSOR *cp;
DB *dbp;
dbp = dbc->dbp;
+ t = dbp->bt_internal;
cp = (BTREE_CURSOR *)dbc->internal;
- __bam_c_reset(cp);
/*
* If our caller set the root page number, it's because the root was
@@ -280,11 +252,32 @@ __bam_c_refresh(dbc)
* pull it out of our internal information.
*/
if (cp->root == PGNO_INVALID)
- cp->root = ((BTREE *)dbp->bt_internal)->bt_root;
+ cp->root = t->bt_root;
+
+ LOCK_INIT(cp->lock);
+ cp->lock_mode = DB_LOCK_NG;
+
+ cp->sp = cp->csp = cp->stack;
+ cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]);
+
+ /*
+ * The btree leaf page data structures require that two key/data pairs
+ * (or four items) fit on a page, but other than that there's no fixed
+ * requirement. The btree off-page duplicates only require two items,
+ * to be exact, but requiring four for them as well seems reasonable.
+ *
+ * Recno uses the btree bt_ovflsize value -- it's close enough.
+ */
+ cp->ovflsize = B_MINKEY_TO_OVFLSIZE(
+ dbp, F_ISSET(dbc, DBC_OPD) ? 2 : t->bt_minkey, dbp->pgsize);
+
+ cp->recno = RECNO_OOB;
+ cp->order = INVALID_ORDER;
+ cp->flags = 0;
/* Initialize for record numbers. */
if (F_ISSET(dbc, DBC_OPD) ||
- dbc->dbtype == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) {
+ dbc->dbtype == DB_RECNO || F_ISSET(dbp, DB_AM_RECNUM)) {
F_SET(cp, C_RECNUM);
/*
@@ -293,7 +286,7 @@ __bam_c_refresh(dbc)
* mutable record numbers.
*/
if ((F_ISSET(dbc, DBC_OPD) && dbc->dbtype == DB_RECNO) ||
- F_ISSET(dbp, DB_BT_RECNUM | DB_RE_RENUMBER))
+ F_ISSET(dbp, DB_AM_RECNUM | DB_AM_RENUMBER))
F_SET(cp, C_RENUMBER);
}
@@ -313,11 +306,12 @@ __bam_c_close(dbc, root_pgno, rmroot)
BTREE_CURSOR *cp, *cp_opd, *cp_c;
DB *dbp;
DBC *dbc_opd, *dbc_c;
+ DB_MPOOLFILE *mpf;
PAGE *h;
- u_int32_t num;
int cdb_lock, ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
cp_opd = (dbc_opd = cp->opd) == NULL ?
NULL : (BTREE_CURSOR *)dbc_opd->internal;
@@ -394,6 +388,9 @@ __bam_c_close(dbc, root_pgno, rmroot)
if (__ram_ca_delete(dbp, cp->root) == 0)
goto lock;
goto done;
+ case DB_HASH:
+ case DB_QUEUE:
+ case DB_UNKNOWN:
default:
return (__db_unknown_type(dbp->dbenv,
"__bam_c_close", dbc->dbtype));
@@ -408,10 +405,10 @@ __bam_c_close(dbc, root_pgno, rmroot)
* We will not have been provided a root page number. Acquire
* one from the primary database.
*/
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0)
+ if ((ret = __memp_fget(mpf, &cp->pgno, 0, &h)) != 0)
goto err;
- root_pgno = GET_BOVERFLOW(h, cp->indx + O_INDX)->pgno;
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ root_pgno = GET_BOVERFLOW(dbp, h, cp->indx + O_INDX)->pgno;
+ if ((ret = __memp_fput(mpf, h, 0)) != 0)
goto err;
dbc_c = dbc_opd;
@@ -425,6 +422,9 @@ __bam_c_close(dbc, root_pgno, rmroot)
if (__ram_ca_delete(dbp, cp_opd->root) == 0)
goto lock;
goto done;
+ case DB_HASH:
+ case DB_QUEUE:
+ case DB_UNKNOWN:
default:
return (__db_unknown_type(dbp->dbenv,
"__bam_c_close", dbc->dbtype));
@@ -438,35 +438,18 @@ lock: cp_c = (BTREE_CURSOR *)dbc_c->internal;
* If this is CDB, upgrade the lock if necessary. While we acquired
* the write lock to logically delete the record, we released it when
* we returned from that call, and so may not be holding a write lock
- * at the moment. NB: to get here in CDB we must either be holding a
- * write lock or be the only cursor that is permitted to acquire write
- * locks. The reason is that there can never be more than a single CDB
- * write cursor (that cursor cannot be dup'd), and so that cursor must
- * be closed and the item therefore deleted before any other cursor
- * could acquire a reference to this item.
- *
- * Note that dbc may be an off-page dup cursor; this is the sole
- * instance in which an OPD cursor does any locking, but it's necessary
- * because we may be closed by ourselves without a parent cursor
- * handy, and we have to do a lock upgrade on behalf of somebody.
- * If this is the case, the OPD has been given the parent's locking
- * info in __db_c_get--the OPD is also a WRITEDUP.
+ * at the moment.
*/
if (CDB_LOCKING(dbp->dbenv)) {
- DB_ASSERT(!F_ISSET(dbc, DBC_OPD) || F_ISSET(dbc, DBC_WRITEDUP));
- if (!F_ISSET(dbc, DBC_WRITER)) {
- if ((ret =
- lock_get(dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE,
- &dbc->lock_dbt, DB_LOCK_WRITE, &dbc->mylock)) != 0)
+ if (F_ISSET(dbc, DBC_WRITECURSOR)) {
+ if ((ret = __lock_get(dbp->dbenv,
+ dbc->locker, DB_LOCK_UPGRADE, &dbc->lock_dbt,
+ DB_LOCK_WRITE, &dbc->mylock)) != 0)
goto err;
cdb_lock = 1;
}
-
- cp_c->lock.off = LOCK_INVALID;
- if ((ret =
- memp_fget(dbp->mpf, &cp_c->pgno, 0, &cp_c->page)) != 0)
+ if ((ret = __memp_fget(mpf, &cp_c->pgno, 0, &cp_c->page)) != 0)
goto err;
-
goto delete;
}
@@ -480,9 +463,7 @@ lock: cp_c = (BTREE_CURSOR *)dbc_c->internal;
* is responsible for acquiring any necessary locks before calling us.
*/
if (F_ISSET(dbc, DBC_OPD)) {
- cp_c->lock.off = LOCK_INVALID;
- if ((ret =
- memp_fget(dbp->mpf, &cp_c->pgno, 0, &cp_c->page)) != 0)
+ if ((ret = __memp_fget(mpf, &cp_c->pgno, 0, &cp_c->page)) != 0)
goto err;
goto delete;
}
@@ -542,13 +523,13 @@ delete: /*
* in that case. So, if the off-page duplicate tree is empty at this
* point, we want to remove it.
*/
- if ((ret = memp_fget(dbp->mpf, &root_pgno, 0, &h)) != 0)
+ if ((ret = __memp_fget(mpf, &root_pgno, 0, &h)) != 0)
goto err;
- if ((num = NUM_ENT(h)) == 0) {
+ if (NUM_ENT(h) == 0) {
if ((ret = __db_free(dbc, h)) != 0)
goto err;
} else {
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ if ((ret = __memp_fput(mpf, h, 0)) != 0)
goto err;
goto done;
}
@@ -566,8 +547,7 @@ delete: /*
* the primary page.
*/
if (dbc_opd != NULL) {
- cp->lock.off = LOCK_INVALID;
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
+ if ((ret = __memp_fget(mpf, &cp->pgno, 0, &cp->page)) != 0)
goto err;
if ((ret = __bam_c_physdel(dbc)) != 0)
goto err;
@@ -604,7 +584,7 @@ __bam_c_destroy(dbc)
DBC *dbc;
{
/* Discard the structures. */
- __os_free(dbc->internal, sizeof(BTREE_CURSOR));
+ __os_free(dbc->dbp->dbenv, dbc->internal);
return (0);
}
@@ -622,22 +602,25 @@ __bam_c_count(dbc, recnop)
{
BTREE_CURSOR *cp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
db_indx_t indx, top;
db_recno_t recno;
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
/*
* Called with the top-level cursor that may reference an off-page
- * duplicates page. If it's a set of on-page duplicates, get the
- * page and count. Otherwise, get the root page of the off-page
- * duplicate tree, and use the count. We don't have to acquire any
- * new locks, we have to have a read lock to even get here.
+ * duplicates tree. We don't have to acquire any new locks, we have
+ * to have a read lock to even get here.
*/
if (cp->opd == NULL) {
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
+ /*
+ * On-page duplicates, get the page and count.
+ */
+ if ((ret = __memp_fget(mpf, &cp->pgno, 0, &cp->page)) != 0)
return (ret);
/*
@@ -648,20 +631,47 @@ __bam_c_count(dbc, recnop)
if (indx == 0 ||
!IS_DUPLICATE(dbc, indx, indx - P_INDX))
break;
- for (recno = 1, top = NUM_ENT(cp->page) - P_INDX;
- indx < top; ++recno, indx += P_INDX)
- if (!IS_DUPLICATE(dbc, indx, indx + P_INDX))
+ for (recno = 0,
+ top = NUM_ENT(cp->page) - P_INDX;; indx += P_INDX) {
+ if (!IS_DELETED(dbp, cp->page, indx))
+ ++recno;
+ if (indx == top ||
+ !IS_DUPLICATE(dbc, indx, indx + P_INDX))
break;
- *recnop = recno;
+ }
} else {
- if ((ret = memp_fget(dbp->mpf,
- &cp->opd->internal->root, 0, &cp->page)) != 0)
+ /*
+ * Off-page duplicates tree, get the root page of the off-page
+ * duplicate tree.
+ */
+ if ((ret = __memp_fget(
+ mpf, &cp->opd->internal->root, 0, &cp->page)) != 0)
return (ret);
- *recnop = RE_NREC(cp->page);
+ /*
+ * If the page is an internal page use the page's count as it's
+ * up-to-date and reflects the status of cursors in the tree.
+ * If the page is a leaf page for unsorted duplicates, use the
+ * page's count as cursors don't mark items deleted on the page
+ * and wait, cursor delete items immediately.
+ * If the page is a leaf page for sorted duplicates, there may
+ * be cursors on the page marking deleted items -- count.
+ */
+ if (TYPE(cp->page) == P_LDUP)
+ for (recno = 0, indx = 0,
+ top = NUM_ENT(cp->page) - O_INDX;; indx += O_INDX) {
+ if (!IS_DELETED(dbp, cp->page, indx))
+ ++recno;
+ if (indx == top)
+ break;
+ }
+ else
+ recno = RE_NREC(cp->page);
}
- ret = memp_fput(dbp->mpf, cp->page, 0);
+ *recnop = recno;
+
+ ret = __memp_fput(mpf, cp->page, 0);
cp->page = NULL;
return (ret);
@@ -677,9 +687,11 @@ __bam_c_del(dbc)
{
BTREE_CURSOR *cp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
int ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
ret = 0;
@@ -706,25 +718,27 @@ __bam_c_del(dbc)
goto err;
cp->page = cp->csp->page;
} else {
- ACQUIRE_CUR(dbc, DB_LOCK_WRITE, ret);
+ ACQUIRE_CUR(dbc, DB_LOCK_WRITE, cp->pgno, ret);
if (ret != 0)
goto err;
}
/* Log the change. */
- if (DB_LOGGING(dbc) &&
- (ret = __bam_cdel_log(dbp->dbenv, dbc->txn, &LSN(cp->page), 0,
- dbp->log_fileid, PGNO(cp->page), &LSN(cp->page), cp->indx)) != 0)
- goto err;
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __bam_cdel_log(dbp, dbc->txn, &LSN(cp->page), 0,
+ PGNO(cp->page), &LSN(cp->page), cp->indx)) != 0)
+ goto err;
+ } else
+ LSN_NOT_LOGGED(LSN(cp->page));
/* Set the intent-to-delete flag on the page. */
if (TYPE(cp->page) == P_LBTREE)
- B_DSET(GET_BKEYDATA(cp->page, cp->indx + O_INDX)->type);
+ B_DSET(GET_BKEYDATA(dbp, cp->page, cp->indx + O_INDX)->type);
else
- B_DSET(GET_BKEYDATA(cp->page, cp->indx)->type);
+ B_DSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type);
/* Mark the page dirty. */
- ret = memp_fset(dbp->mpf, cp->page, DB_MPOOL_DIRTY);
+ ret = __memp_fset(mpf, cp->page, DB_MPOOL_DIRTY);
err: /*
* If we've been successful so far and the tree has record numbers,
@@ -736,7 +750,7 @@ err: /*
(void)__bam_stkrel(dbc, 0);
} else
if (cp->page != NULL &&
- (t_ret = memp_fput(dbp->mpf, cp->page, 0)) != 0 && ret == 0)
+ (t_ret = __memp_fput(mpf, cp->page, 0)) != 0 && ret == 0)
ret = t_ret;
cp->page = NULL;
@@ -771,7 +785,7 @@ __bam_c_dup(orig_dbc, new_dbc)
* holding inside a transaction because all the locks are retained
* until the transaction commits or aborts.
*/
- if (orig->lock.off != LOCK_INVALID && orig_dbc->txn == NULL) {
+ if (LOCK_ISSET(orig->lock) && orig_dbc->txn == NULL) {
if ((ret = __db_lget(new_dbc,
0, new->pgno, new->lock_mode, 0, &new->lock)) != 0)
return (ret);
@@ -796,11 +810,13 @@ __bam_c_get(dbc, key, data, flags, pgnop)
{
BTREE_CURSOR *cp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
db_pgno_t orig_pgno;
db_indx_t orig_indx;
int exact, newopd, ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
orig_pgno = cp->pgno;
orig_indx = cp->indx;
@@ -820,7 +836,7 @@ __bam_c_get(dbc, key, data, flags, pgnop)
* write lock, but upgrading to a write lock has no better
* chance of succeeding now instead of later, so don't try.
*/
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
+ if ((ret = __memp_fget(mpf, &cp->pgno, 0, &cp->page)) != 0)
goto err;
break;
case DB_FIRST:
@@ -829,9 +845,10 @@ __bam_c_get(dbc, key, data, flags, pgnop)
goto err;
break;
case DB_GET_BOTH:
+ case DB_GET_BOTH_RANGE:
/*
* There are two ways to get here based on DBcursor->c_get
- * with the DB_GET_BOTH flag set:
+ * with the DB_GET_BOTH/DB_GET_BOTH_RANGE flags set:
*
* 1. Searching a sorted off-page duplicate tree: do a tree
* search.
@@ -839,20 +856,34 @@ __bam_c_get(dbc, key, data, flags, pgnop)
* 2. Searching btree: do a tree search. If it returns a
* reference to off-page duplicate tree, return immediately
* and let our caller deal with it. If the search doesn't
- * return a reference to off-page duplicate tree, start an
- * on-page search.
+ * return a reference to off-page duplicate tree, continue
+ * with an on-page search.
*/
if (F_ISSET(dbc, DBC_OPD)) {
if ((ret = __bam_c_search(
- dbc, data, DB_GET_BOTH, &exact)) != 0)
- goto err;
- if (!exact) {
- ret = DB_NOTFOUND;
+ dbc, PGNO_INVALID, data, flags, &exact)) != 0)
goto err;
+ if (flags == DB_GET_BOTH) {
+ if (!exact) {
+ ret = DB_NOTFOUND;
+ goto err;
+ }
+ break;
}
+
+ /*
+ * We didn't require an exact match, so the search may
+ * may have returned an entry past the end of the page,
+ * or we may be referencing a deleted record. If so,
+ * move to the next entry.
+ */
+ if ((cp->indx == NUM_ENT(cp->page) ||
+ IS_CUR_DELETED(dbc)) &&
+ (ret = __bam_c_next(dbc, 1, 0)) != 0)
+ goto err;
} else {
if ((ret = __bam_c_search(
- dbc, key, DB_GET_BOTH, &exact)) != 0)
+ dbc, PGNO_INVALID, key, flags, &exact)) != 0)
return (ret);
if (!exact) {
ret = DB_NOTFOUND;
@@ -863,7 +894,8 @@ __bam_c_get(dbc, key, data, flags, pgnop)
newopd = 1;
break;
}
- if ((ret = __bam_getboth_finddatum(dbc, data)) != 0)
+ if ((ret =
+ __bam_getboth_finddatum(dbc, data, flags)) != 0)
goto err;
}
break;
@@ -882,11 +914,11 @@ __bam_c_get(dbc, key, data, flags, pgnop)
if ((ret = __bam_c_first(dbc)) != 0)
goto err;
} else
- if ((ret = __bam_c_next(dbc, 1)) != 0)
+ if ((ret = __bam_c_next(dbc, 1, 0)) != 0)
goto err;
break;
case DB_NEXT_DUP:
- if ((ret = __bam_c_next(dbc, 1)) != 0)
+ if ((ret = __bam_c_next(dbc, 1, 0)) != 0)
goto err;
if (!IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx)) {
ret = DB_NOTFOUND;
@@ -900,7 +932,7 @@ __bam_c_get(dbc, key, data, flags, pgnop)
goto err;
} else
do {
- if ((ret = __bam_c_next(dbc, 1)) != 0)
+ if ((ret = __bam_c_next(dbc, 1, 0)) != 0)
goto err;
} while (IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx));
break;
@@ -927,12 +959,14 @@ __bam_c_get(dbc, key, data, flags, pgnop)
case DB_SET:
case DB_SET_RECNO:
newopd = 1;
- if ((ret = __bam_c_search(dbc, key, flags, &exact)) != 0)
+ if ((ret = __bam_c_search(dbc,
+ PGNO_INVALID, key, flags, &exact)) != 0)
goto err;
break;
case DB_SET_RANGE:
newopd = 1;
- if ((ret = __bam_c_search(dbc, key, flags, &exact)) != 0)
+ if ((ret = __bam_c_search(dbc,
+ PGNO_INVALID, key, flags, &exact)) != 0)
goto err;
/*
@@ -942,7 +976,7 @@ __bam_c_get(dbc, key, data, flags, pgnop)
* the next entry.
*/
if (cp->indx == NUM_ENT(cp->page) || IS_CUR_DELETED(dbc))
- if ((ret = __bam_c_next(dbc, 0)) != 0)
+ if ((ret = __bam_c_next(dbc, 0, 0)) != 0)
goto err;
break;
default:
@@ -957,8 +991,15 @@ __bam_c_get(dbc, key, data, flags, pgnop)
if (newopd && pgnop != NULL)
(void)__bam_isopd(dbc, pgnop);
- /* Don't return the key, it was passed to us */
- if (flags == DB_SET)
+ /*
+ * Don't return the key, it was passed to us (this is true even if the
+ * application defines a compare function returning equality for more
+ * than one key value, since in that case which actual value we store
+ * in the database is undefined -- and particularly true in the case of
+ * duplicates where we only store one key value).
+ */
+ if (flags == DB_GET_BOTH ||
+ flags == DB_GET_BOTH_RANGE || flags == DB_SET)
F_SET(key, DB_DBT_ISSET);
err: /*
@@ -966,13 +1007,595 @@ err: /*
* moved, clear the delete flag, DBcursor->c_get never references
* a deleted key, if it moved at all.
*/
- if (F_ISSET(cp, C_DELETED)
- && (cp->pgno != orig_pgno || cp->indx != orig_indx))
+ if (F_ISSET(cp, C_DELETED) &&
+ (cp->pgno != orig_pgno || cp->indx != orig_indx))
F_CLR(cp, C_DELETED);
return (ret);
}
+static int
+__bam_get_prev(dbc)
+ DBC *dbc;
+{
+ BTREE_CURSOR *cp;
+ DBT key, data;
+ db_pgno_t pgno;
+ int ret;
+
+ if ((ret = __bam_c_prev(dbc)) != 0)
+ return (ret);
+
+ if (__bam_isopd(dbc, &pgno)) {
+ cp = (BTREE_CURSOR *)dbc->internal;
+ if ((ret = __db_c_newopd(dbc, pgno, cp->opd, &cp->opd)) != 0)
+ return (ret);
+ if ((ret = cp->opd->c_am_get(cp->opd,
+ &key, &data, DB_LAST, NULL)) != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+/*
+ * __bam_bulk -- Return bulk data from a btree.
+ */
+static int
+__bam_bulk(dbc, data, flags)
+ DBC *dbc;
+ DBT *data;
+ u_int32_t flags;
+{
+ BKEYDATA *bk;
+ BOVERFLOW *bo;
+ BTREE_CURSOR *cp;
+ PAGE *pg;
+ db_indx_t *inp, indx, pg_keyoff;
+ int32_t *endp, key_off, *offp, *saveoffp;
+ u_int8_t *dbuf, *dp, *np;
+ u_int32_t key_size, pagesize, size, space;
+ int adj, is_key, need_pg, next_key, no_dup, rec_key, ret;
+
+ ret = 0;
+ key_off = 0;
+ size = 0;
+ pagesize = dbc->dbp->pgsize;
+ cp = (BTREE_CURSOR *)dbc->internal;
+
+ /*
+ * dp tracks the beginging of the page in the buffer.
+ * np is the next place to copy things into the buffer.
+ * dbuf always stays at the beging of the buffer.
+ */
+ dbuf = data->data;
+ np = dp = dbuf;
+
+ /* Keep track of space that is left. There is a termination entry */
+ space = data->ulen;
+ space -= sizeof(*offp);
+
+ /* Build the offset/size table from the end up. */
+ endp = (int32_t *)((u_int8_t *)dbuf + data->ulen);
+ endp--;
+ offp = endp;
+
+ key_size = 0;
+
+ /*
+ * Distinguish between BTREE and RECNO.
+ * There are no keys in RECNO. If MULTIPLE_KEY is specified
+ * then we return the record numbers.
+ * is_key indicates that multiple btree keys are returned.
+ * rec_key is set if we are returning record numbers.
+ * next_key is set if we are going after the next key rather than dup.
+ */
+ if (dbc->dbtype == DB_BTREE) {
+ is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1: 0;
+ rec_key = 0;
+ next_key = is_key && LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP;
+ adj = 2;
+ } else {
+ is_key = 0;
+ rec_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0;
+ next_key = LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP;
+ adj = 1;
+ }
+ no_dup = LF_ISSET(DB_OPFLAGS_MASK) == DB_NEXT_NODUP;
+
+next_pg:
+ indx = cp->indx;
+ pg = cp->page;
+
+ inp = P_INP(dbc->dbp, pg);
+ /* The current page is not yet in the buffer. */
+ need_pg = 1;
+
+ /*
+ * Keep track of the offset of the current key on the page.
+ * If we are returning keys, set it to 0 first so we force
+ * the copy of the key to the buffer.
+ */
+ pg_keyoff = 0;
+ if (is_key == 0)
+ pg_keyoff = inp[indx];
+
+ do {
+ if (IS_DELETED(dbc->dbp, pg, indx)) {
+ if (dbc->dbtype != DB_RECNO)
+ continue;
+
+ cp->recno++;
+ /*
+ * If we are not returning recnos then we
+ * need to fill in every slot so the user
+ * can calculate the record numbers.
+ */
+ if (rec_key != 0)
+ continue;
+
+ space -= 2 * sizeof(*offp);
+ /* Check if space as underflowed. */
+ if (space > data->ulen)
+ goto back_up;
+
+ /* Just mark the empty recno slots. */
+ *offp-- = 0;
+ *offp-- = 0;
+ continue;
+ }
+
+ /*
+ * Check to see if we have a new key.
+ * If so, then see if we need to put the
+ * key on the page. If its already there
+ * then we just point to it.
+ */
+ if (is_key && pg_keyoff != inp[indx]) {
+ bk = GET_BKEYDATA(dbc->dbp, pg, indx);
+ if (B_TYPE(bk->type) == B_OVERFLOW) {
+ bo = (BOVERFLOW *)bk;
+ size = key_size = bo->tlen;
+ if (key_size > space)
+ goto get_key_space;
+ if ((ret = __bam_bulk_overflow(dbc,
+ bo->tlen, bo->pgno, np)) != 0)
+ return (ret);
+ space -= key_size;
+ key_off = (int32_t)(np - dbuf);
+ np += key_size;
+ } else {
+ if (need_pg) {
+ dp = np;
+ size = pagesize - HOFFSET(pg);
+ if (space < size) {
+get_key_space:
+ /* Nothing added, then error. */
+ if (offp == endp) {
+ data->size =
+ ALIGN(size +
+ pagesize, 1024);
+ return (ENOMEM);
+ }
+ /*
+ * We need to back up to the
+ * last record put into the
+ * buffer so that it is
+ * CURRENT.
+ */
+ if (indx != 0)
+ indx -= P_INDX;
+ else {
+ if ((ret =
+ __bam_get_prev(
+ dbc)) != 0)
+ return (ret);
+ indx = cp->indx;
+ pg = cp->page;
+ }
+ break;
+ }
+ /*
+ * Move the data part of the page
+ * to the buffer.
+ */
+ memcpy(dp,
+ (u_int8_t *)pg + HOFFSET(pg), size);
+ need_pg = 0;
+ space -= size;
+ np += size;
+ }
+ key_size = bk->len;
+ key_off = (int32_t)((inp[indx] - HOFFSET(pg))
+ + (dp - dbuf) + SSZA(BKEYDATA, data));
+ pg_keyoff = inp[indx];
+ }
+ }
+
+ /*
+ * Reserve space for the pointers and sizes.
+ * Either key/data pair or just for a data item.
+ */
+ space -= (is_key ? 4 : 2) * sizeof(*offp);
+ if (rec_key)
+ space -= sizeof(*offp);
+
+ /* Check to see if space has underflowed. */
+ if (space > data->ulen)
+ goto back_up;
+
+ /*
+ * Determine if the next record is in the
+ * buffer already or if it needs to be copied in.
+ * If we have an off page dup, then copy as many
+ * as will fit into the buffer.
+ */
+ bk = GET_BKEYDATA(dbc->dbp, pg, indx + adj - 1);
+ if (B_TYPE(bk->type) == B_DUPLICATE) {
+ bo = (BOVERFLOW *)bk;
+ if (is_key) {
+ *offp-- = (int32_t)key_off;
+ *offp-- = (int32_t)key_size;
+ }
+ /*
+ * We pass the offset of the current key.
+ * On return we check to see if offp has
+ * moved to see if any data fit.
+ */
+ saveoffp = offp;
+ if ((ret = __bam_bulk_duplicates(dbc, bo->pgno,
+ dbuf, is_key ? offp + P_INDX : NULL,
+ &offp, &np, &space, no_dup)) != 0) {
+ if (ret == ENOMEM) {
+ size = space;
+ space = 0;
+ /* If nothing was added, then error. */
+ if (offp == saveoffp) {
+ offp += 2;
+ goto back_up;
+ }
+ goto get_space;
+ }
+ return (ret);
+ }
+ } else if (B_TYPE(bk->type) == B_OVERFLOW) {
+ bo = (BOVERFLOW *)bk;
+ size = bo->tlen;
+ if (size > space)
+ goto back_up;
+ if ((ret =
+ __bam_bulk_overflow(dbc,
+ bo->tlen, bo->pgno, np)) != 0)
+ return (ret);
+ space -= size;
+ if (is_key) {
+ *offp-- = (int32_t)key_off;
+ *offp-- = (int32_t)key_size;
+ } else if (rec_key)
+ *offp-- = (int32_t)cp->recno;
+ *offp-- = (int32_t)(np - dbuf);
+ np += size;
+ *offp-- = (int32_t)size;
+ } else {
+ if (need_pg) {
+ dp = np;
+ size = pagesize - HOFFSET(pg);
+ if (space < size) {
+back_up:
+ /*
+ * Back up the index so that the
+ * last record in the buffer is CURRENT
+ */
+ if (indx >= adj)
+ indx -= adj;
+ else {
+ if ((ret =
+ __bam_get_prev(dbc)) != 0 &&
+ ret != DB_NOTFOUND)
+ return (ret);
+ indx = cp->indx;
+ pg = cp->page;
+ }
+ if (dbc->dbtype == DB_RECNO)
+ cp->recno--;
+get_space:
+ /*
+ * See if we put anything in the
+ * buffer or if we are doing a DBP->get
+ * did we get all of the data.
+ */
+ if (offp >=
+ (is_key ? &endp[-1] : endp) ||
+ F_ISSET(dbc, DBC_TRANSIENT)) {
+ data->size = ALIGN(size +
+ data->ulen - space, 1024);
+ return (ENOMEM);
+ }
+ break;
+ }
+ memcpy(dp, (u_int8_t *)pg + HOFFSET(pg), size);
+ need_pg = 0;
+ space -= size;
+ np += size;
+ }
+ /*
+ * Add the offsets and sizes to the end of the buffer.
+ * First add the key info then the data info.
+ */
+ if (is_key) {
+ *offp-- = (int32_t)key_off;
+ *offp-- = (int32_t)key_size;
+ } else if (rec_key)
+ *offp-- = (int32_t)cp->recno;
+ *offp-- = (int32_t)((inp[indx + adj - 1] - HOFFSET(pg))
+ + (dp - dbuf) + SSZA(BKEYDATA, data));
+ *offp-- = bk->len;
+ }
+ if (dbc->dbtype == DB_RECNO)
+ cp->recno++;
+ else if (no_dup) {
+ while (indx + adj < NUM_ENT(pg) &&
+ pg_keyoff == inp[indx + adj])
+ indx += adj;
+ }
+ /*
+ * Stop when we either run off the page or we
+ * move to the next key and we are not returning mulitple keys.
+ */
+ } while ((indx += adj) < NUM_ENT(pg) &&
+ (next_key || pg_keyoff == inp[indx]));
+
+ /* If we are off the page then try to the next page. */
+ if (ret == 0 && next_key && indx >= NUM_ENT(pg)) {
+ cp->indx = indx;
+ ret = __bam_c_next(dbc, 0, 1);
+ if (ret == 0)
+ goto next_pg;
+ if (ret != DB_NOTFOUND)
+ return (ret);
+ }
+
+ /*
+ * If we did a DBP->get we must error if we did not return
+ * all the data for the current key because there is
+ * no way to know if we did not get it all, nor any
+ * interface to fetch the balance.
+ */
+
+ if (ret == 0 && indx < pg->entries &&
+ F_ISSET(dbc, DBC_TRANSIENT) && pg_keyoff == inp[indx]) {
+ data->size = (data->ulen - space) + size;
+ return (ENOMEM);
+ }
+ /*
+ * Must leave the index pointing at the last record fetched.
+ * If we are not fetching keys, we may have stepped to the
+ * next key.
+ */
+ if (ret == ENOMEM || next_key || pg_keyoff == inp[indx])
+ cp->indx = indx;
+ else
+ cp->indx = indx - P_INDX;
+
+ if (rec_key == 1)
+ *offp = RECNO_OOB;
+ else
+ *offp = -1;
+ return (0);
+}
+
+/*
+ * __bam_bulk_overflow --
+ * Dump overflow record into the buffer.
+ * The space requirements have already been checked.
+ * PUBLIC: int __bam_bulk_overflow
+ * PUBLIC: __P((DBC *, u_int32_t, db_pgno_t, u_int8_t *));
+ */
+int
+__bam_bulk_overflow(dbc, len, pgno, dp)
+ DBC *dbc;
+ u_int32_t len;
+ db_pgno_t pgno;
+ u_int8_t *dp;
+{
+ DBT dbt;
+
+ memset(&dbt, 0, sizeof(dbt));
+ F_SET(&dbt, DB_DBT_USERMEM);
+ dbt.ulen = len;
+ dbt.data = (void *)dp;
+ return (__db_goff(dbc->dbp, &dbt, len, pgno, NULL, NULL));
+}
+
+/*
+ * __bam_bulk_duplicates --
+ * Put as many off page duplicates as will fit into the buffer.
+ * This routine will adjust the cursor to reflect the position in
+ * the overflow tree.
+ * PUBLIC: int __bam_bulk_duplicates __P((DBC *,
+ * PUBLIC: db_pgno_t, u_int8_t *, int32_t *,
+ * PUBLIC: int32_t **, u_int8_t **, u_int32_t *, int));
+ */
+int
+__bam_bulk_duplicates(dbc, pgno, dbuf, keyoff, offpp, dpp, spacep, no_dup)
+ DBC *dbc;
+ db_pgno_t pgno;
+ u_int8_t *dbuf;
+ int32_t *keyoff, **offpp;
+ u_int8_t **dpp;
+ u_int32_t *spacep;
+ int no_dup;
+{
+ DB *dbp;
+ BKEYDATA *bk;
+ BOVERFLOW *bo;
+ BTREE_CURSOR *cp;
+ DBC *opd;
+ DBT key, data;
+ PAGE *pg;
+ db_indx_t indx, *inp;
+ int32_t *offp;
+ u_int32_t pagesize, size, space;
+ u_int8_t *dp, *np;
+ int first, need_pg, ret, t_ret;
+
+ ret = 0;
+
+ dbp = dbc->dbp;
+ cp = (BTREE_CURSOR *)dbc->internal;
+ opd = cp->opd;
+
+ if (opd == NULL) {
+ if ((ret = __db_c_newopd(dbc, pgno, NULL, &opd)) != 0)
+ return (ret);
+ cp->opd = opd;
+ if ((ret = opd->c_am_get(opd,
+ &key, &data, DB_FIRST, NULL)) != 0)
+ goto close_opd;
+ }
+
+ pagesize = opd->dbp->pgsize;
+ cp = (BTREE_CURSOR *)opd->internal;
+ space = *spacep;
+ /* Get current offset slot. */
+ offp = *offpp;
+
+ /*
+ * np is the next place to put data.
+ * dp is the begining of the current page in the buffer.
+ */
+ np = dp = *dpp;
+ first = 1;
+ indx = cp->indx;
+
+ do {
+ /* Fetch the current record. No initial move. */
+ if ((ret = __bam_c_next(opd, 0, 0)) != 0)
+ break;
+ pg = cp->page;
+ indx = cp->indx;
+ inp = P_INP(dbp, pg);
+ /* We need to copy the page to the buffer. */
+ need_pg = 1;
+
+ do {
+ if (IS_DELETED(dbp, pg, indx))
+ goto contin;
+ bk = GET_BKEYDATA(dbp, pg, indx);
+ space -= 2 * sizeof(*offp);
+ /* Allocate space for key if needed. */
+ if (first == 0 && keyoff != NULL)
+ space -= 2 * sizeof(*offp);
+
+ /* Did space underflow? */
+ if (space > *spacep) {
+ ret = ENOMEM;
+ if (first == 1) {
+ /* Get the absolute value. */
+ space = -(int32_t)space;
+ space = *spacep + space;
+ if (need_pg)
+ space += pagesize - HOFFSET(pg);
+ }
+ break;
+ }
+ if (B_TYPE(bk->type) == B_OVERFLOW) {
+ bo = (BOVERFLOW *)bk;
+ size = bo->tlen;
+ if (size > space) {
+ ret = ENOMEM;
+ space = *spacep + size;
+ break;
+ }
+ if (first == 0 && keyoff != NULL) {
+ *offp-- = keyoff[0];
+ *offp-- = keyoff[-1];
+ }
+ if ((ret = __bam_bulk_overflow(dbc,
+ bo->tlen, bo->pgno, np)) != 0)
+ return (ret);
+ space -= size;
+ *offp-- = (int32_t)(np - dbuf);
+ np += size;
+ } else {
+ if (need_pg) {
+ dp = np;
+ size = pagesize - HOFFSET(pg);
+ if (space < size) {
+ ret = ENOMEM;
+ /* Return space required. */
+ space = *spacep + size;
+ break;
+ }
+ memcpy(dp,
+ (u_int8_t *)pg + HOFFSET(pg), size);
+ need_pg = 0;
+ space -= size;
+ np += size;
+ }
+ if (first == 0 && keyoff != NULL) {
+ *offp-- = keyoff[0];
+ *offp-- = keyoff[-1];
+ }
+ size = bk->len;
+ *offp-- = (int32_t)((inp[indx] - HOFFSET(pg))
+ + (dp - dbuf) + SSZA(BKEYDATA, data));
+ }
+ *offp-- = (int32_t)size;
+ first = 0;
+ if (no_dup)
+ break;
+contin:
+ indx++;
+ if (opd->dbtype == DB_RECNO)
+ cp->recno++;
+ } while (indx < NUM_ENT(pg));
+ if (no_dup)
+ break;
+ cp->indx = indx;
+
+ } while (ret == 0);
+
+ /* Return the updated information. */
+ *spacep = space;
+ *offpp = offp;
+ *dpp = np;
+
+ /*
+ * If we ran out of space back up the pointer.
+ * If we did not return any dups or reached the end, close the opd.
+ */
+ if (ret == ENOMEM) {
+ if (opd->dbtype == DB_RECNO) {
+ if (--cp->recno == 0)
+ goto close_opd;
+ } else if (indx != 0)
+ cp->indx--;
+ else {
+ t_ret = __bam_c_prev(opd);
+ if (t_ret == DB_NOTFOUND)
+ goto close_opd;
+ if (t_ret != 0)
+ ret = t_ret;
+ }
+ } else if (keyoff == NULL && ret == DB_NOTFOUND) {
+ cp->indx--;
+ if (opd->dbtype == DB_RECNO)
+ --cp->recno;
+ } else if (indx == 0 || ret == DB_NOTFOUND) {
+close_opd:
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+ if ((t_ret = __db_c_close(opd)) != 0 && ret == 0)
+ ret = t_ret;
+ ((BTREE_CURSOR *)dbc->internal)->opd = NULL;
+ }
+ if (ret == DB_NOTFOUND)
+ ret = 0;
+
+ return (ret);
+}
+
/*
* __bam_getbothc --
* Search for a matching data item on a join.
@@ -984,9 +1607,11 @@ __bam_getbothc(dbc, data)
{
BTREE_CURSOR *cp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
int cmp, exact, ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
/*
@@ -995,7 +1620,7 @@ __bam_getbothc(dbc, data)
* write lock, but upgrading to a write lock has no better
* chance of succeeding now instead of later, so don't try.
*/
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
+ if ((ret = __memp_fget(mpf, &cp->pgno, 0, &cp->page)) != 0)
return (ret);
/*
@@ -1017,11 +1642,12 @@ __bam_getbothc(dbc, data)
return (DB_NOTFOUND);
/* Discard the current page, we're going to do a full search. */
- if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
+ if ((ret = __memp_fput(mpf, cp->page, 0)) != 0)
return (ret);
cp->page = NULL;
- return (__bam_c_search(dbc, data, DB_GET_BOTH, &exact));
+ return (__bam_c_search(dbc,
+ PGNO_INVALID, data, DB_GET_BOTH, &exact));
}
/*
@@ -1038,7 +1664,7 @@ __bam_getbothc(dbc, data)
return (DB_NOTFOUND);
cp->indx += P_INDX;
- return (__bam_getboth_finddatum(dbc, data));
+ return (__bam_getboth_finddatum(dbc, data, DB_GET_BOTH));
}
/*
@@ -1046,31 +1672,31 @@ __bam_getbothc(dbc, data)
* Find a matching on-page data item.
*/
static int
-__bam_getboth_finddatum(dbc, data)
+__bam_getboth_finddatum(dbc, data, flags)
DBC *dbc;
DBT *data;
+ u_int32_t flags;
{
BTREE_CURSOR *cp;
DB *dbp;
db_indx_t base, lim, top;
int cmp, ret;
+ COMPQUIET(cmp, 0);
+
dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
/*
* Called (sometimes indirectly) from DBC->get to search on-page data
- * item(s) for a matching value. If the original flag was DB_GET_BOTH,
- * the cursor argument is set to the first data item for the key. If
- * the original flag was DB_GET_BOTHC, the cursor argument is set to
- * the first data item that we can potentially return. In both cases,
- * there may or may not be additional duplicate data items to search.
+ * item(s) for a matching value. If the original flag was DB_GET_BOTH
+ * or DB_GET_BOTH_RANGE, the cursor is set to the first undeleted data
+ * item for the key. If the original flag was DB_GET_BOTHC, the cursor
+ * argument is set to the first data item we can potentially return.
+ * In both cases, there may or may not be additional duplicate data
+ * items to search.
*
* If the duplicates are not sorted, do a linear search.
- *
- * If the duplicates are sorted, do a binary search. The reason for
- * this is that large pages and small key/data pairs result in large
- * numbers of on-page duplicates before they get pushed off-page.
*/
if (dbp->dup_compare == NULL) {
for (;; cp->indx += P_INDX) {
@@ -1085,41 +1711,62 @@ __bam_getboth_finddatum(dbc, data)
!IS_DUPLICATE(dbc, cp->indx, cp->indx + P_INDX))
break;
}
- } else {
- /*
- * Find the top and bottom of the duplicate set. Binary search
- * requires at least two items, don't loop if there's only one.
- */
- for (base = top = cp->indx;
- top < NUM_ENT(cp->page); top += P_INDX)
- if (!IS_DUPLICATE(dbc, cp->indx, top))
- break;
- if (base == (top - P_INDX)) {
- if ((ret = __bam_cmp(dbp, data,
- cp->page, cp->indx + O_INDX,
- dbp->dup_compare, &cmp)) != 0)
- return (ret);
- return (cmp == 0 ? 0 : DB_NOTFOUND);
- }
+ return (DB_NOTFOUND);
+ }
- for (lim =
- (top - base) / (db_indx_t)P_INDX; lim != 0; lim >>= 1) {
- cp->indx = base + ((lim >> 1) * P_INDX);
- if ((ret = __bam_cmp(dbp, data, cp->page,
- cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
- return (ret);
- if (cmp == 0) {
- if (!IS_CUR_DELETED(dbc))
- return (0);
- break;
- }
- if (cmp > 0) {
- base = cp->indx + P_INDX;
- --lim;
- }
+ /*
+ * If the duplicates are sorted, do a binary search. The reason for
+ * this is that large pages and small key/data pairs result in large
+ * numbers of on-page duplicates before they get pushed off-page.
+ *
+ * Find the top and bottom of the duplicate set. Binary search
+ * requires at least two items, don't loop if there's only one.
+ */
+ for (base = top = cp->indx; top < NUM_ENT(cp->page); top += P_INDX)
+ if (!IS_DUPLICATE(dbc, cp->indx, top))
+ break;
+ if (base == (top - P_INDX)) {
+ if ((ret = __bam_cmp(dbp, data,
+ cp->page, cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
+ return (ret);
+ return (cmp == 0 ||
+ (cmp < 0 && flags == DB_GET_BOTH_RANGE) ? 0 : DB_NOTFOUND);
+ }
+
+ for (lim = (top - base) / (db_indx_t)P_INDX; lim != 0; lim >>= 1) {
+ cp->indx = base + ((lim >> 1) * P_INDX);
+ if ((ret = __bam_cmp(dbp, data, cp->page,
+ cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
+ return (ret);
+ if (cmp == 0) {
+ /*
+ * XXX
+ * No duplicate duplicates in sorted duplicate sets,
+ * so there can be only one.
+ */
+ if (!IS_CUR_DELETED(dbc))
+ return (0);
+ break;
+ }
+ if (cmp > 0) {
+ base = cp->indx + P_INDX;
+ --lim;
}
}
- return (DB_NOTFOUND);
+
+ /* No match found; if we're looking for an exact match, we're done. */
+ if (flags == DB_GET_BOTH)
+ return (DB_NOTFOUND);
+
+ /*
+ * Base is the smallest index greater than the data item, may be zero
+ * or a last + O_INDX index, and may be deleted. Find an undeleted
+ * item.
+ */
+ cp->indx = base;
+ while (cp->indx < top && IS_CUR_DELETED(dbc))
+ cp->indx += P_INDX;
+ return (cp->indx < top ? 0 : DB_NOTFOUND);
}
/*
@@ -1136,20 +1783,24 @@ __bam_c_put(dbc, key, data, flags, pgnop)
BTREE_CURSOR *cp;
DB *dbp;
DBT dbt;
+ DB_MPOOLFILE *mpf;
+ db_pgno_t root_pgno;
u_int32_t iiop;
- int cmp, exact, needkey, ret, stack;
+ int cmp, exact, own, ret, stack;
void *arg;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
+ root_pgno = cp->root;
-split: needkey = ret = stack = 0;
+split: ret = stack = 0;
switch (flags) {
case DB_AFTER:
case DB_BEFORE:
case DB_CURRENT:
- needkey = 1;
iiop = flags;
+ own = 1;
/*
* If the Btree has record numbers (and we're not replacing an
@@ -1182,25 +1833,33 @@ split: needkey = ret = stack = 0;
ACQUIRE_WRITE_LOCK(dbc, ret);
if (ret != 0)
goto err;
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
+ if ((ret = __memp_fget(mpf, &cp->pgno, 0, &cp->page)) != 0)
goto err;
break;
case DB_KEYFIRST:
case DB_KEYLAST:
case DB_NODUPDATA:
+ own = 0;
/*
* Searching off-page, sorted duplicate tree: do a tree search
* for the correct item; __bam_c_search returns the smallest
* slot greater than the key, use it.
+ *
+ * See comment below regarding where we can start the search.
*/
if (F_ISSET(dbc, DBC_OPD)) {
- if ((ret =
- __bam_c_search(dbc, data, flags, &exact)) != 0)
+ if ((ret = __bam_c_search(dbc,
+ F_ISSET(cp, C_RECNUM) ? cp->root : root_pgno,
+ data, flags, &exact)) != 0)
goto err;
stack = 1;
/* Disallow "sorted" duplicate duplicates. */
if (exact) {
+ if (IS_DELETED(dbp, cp->page, cp->indx)) {
+ iiop = DB_CURRENT;
+ break;
+ }
ret = __db_duperr(dbp, flags);
goto err;
}
@@ -1208,8 +1867,17 @@ split: needkey = ret = stack = 0;
break;
}
- /* Searching a btree. */
- if ((ret = __bam_c_search(dbc, key,
+ /*
+ * Searching a btree.
+ *
+ * If we've done a split, we can start the search from the
+ * parent of the split page, which __bam_split returned
+ * for us in root_pgno, unless we're in a Btree with record
+ * numbering. In that case, we'll need the true root page
+ * in order to adjust the record count.
+ */
+ if ((ret = __bam_c_search(dbc,
+ F_ISSET(cp, C_RECNUM) ? cp->root : root_pgno, key,
flags == DB_KEYFIRST || dbp->dup_compare != NULL ?
DB_KEYFIRST : DB_KEYLAST, &exact)) != 0)
goto err;
@@ -1264,8 +1932,8 @@ split: needkey = ret = stack = 0;
*/
for (;; cp->indx += P_INDX) {
if ((ret = __bam_cmp(dbp, data, cp->page,
- cp->indx + O_INDX, dbp->dup_compare, &cmp)) !=0)
- return (ret);
+ cp->indx + O_INDX, dbp->dup_compare, &cmp)) != 0)
+ goto err;
if (cmp < 0) {
iiop = DB_BEFORE;
break;
@@ -1273,7 +1941,7 @@ split: needkey = ret = stack = 0;
/* Disallow "sorted" duplicate duplicates. */
if (cmp == 0) {
- if (IS_DELETED(cp->page, cp->indx)) {
+ if (IS_DELETED(dbp, cp->page, cp->indx)) {
iiop = DB_CURRENT;
break;
}
@@ -1282,8 +1950,8 @@ split: needkey = ret = stack = 0;
}
if (cp->indx + P_INDX >= NUM_ENT(cp->page) ||
- ((PAGE *)cp->page)->inp[cp->indx] !=
- ((PAGE *)cp->page)->inp[cp->indx + P_INDX]) {
+ P_INP(dbp, ((PAGE *)cp->page))[cp->indx] !=
+ P_INP(dbp, ((PAGE *)cp->page))[cp->indx + P_INDX]) {
iiop = DB_AFTER;
break;
}
@@ -1306,7 +1974,7 @@ split: needkey = ret = stack = 0;
flags == DB_BEFORE || flags == DB_CURRENT) {
memset(&dbt, 0, sizeof(DBT));
if ((ret = __db_ret(dbp, cp->page, 0, &dbt,
- &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
+ &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0)
goto err;
arg = &dbt;
} else
@@ -1326,8 +1994,22 @@ split: needkey = ret = stack = 0;
if (ret != 0)
goto err;
+ /*
+ * SR [#6059]
+ * If we do not own a lock on the page anymore then
+ * clear the cursor so we don't point at it.
+ * Even if we call __bam_stkrel above we still
+ * may have entered the routine with the cursor
+ * posistioned to a particular record. This
+ * is in the case where C_RECNUM is set.
+ */
+ if (own == 0) {
+ cp->pgno = PGNO_INVALID;
+ cp->indx = 0;
+ }
+
/* Split the tree. */
- if ((ret = __bam_split(dbc, arg)) != 0)
+ if ((ret = __bam_split(dbc, arg, &root_pgno)) != 0)
return (ret);
goto split;
@@ -1351,8 +2033,15 @@ done: /*
* flag. If we're successful, we either moved the cursor or the item
* is no longer deleted. If we're not successful, then we're just a
* copy, no need to have the flag set.
+ *
+ * We may have instantiated off-page duplicate cursors during the put,
+ * so clear the deleted bit from the off-page duplicate cursor as well.
*/
F_CLR(cp, C_DELETED);
+ if (cp->opd != NULL) {
+ cp = (BTREE_CURSOR *)cp->opd->internal;
+ F_CLR(cp, C_DELETED);
+ }
return (ret);
}
@@ -1361,22 +2050,22 @@ done: /*
* __bam_c_rget --
* Return the record number for a cursor.
*
- * PUBLIC: int __bam_c_rget __P((DBC *, DBT *, u_int32_t));
+ * PUBLIC: int __bam_c_rget __P((DBC *, DBT *));
*/
int
-__bam_c_rget(dbc, data, flags)
+__bam_c_rget(dbc, data)
DBC *dbc;
DBT *data;
- u_int32_t flags;
{
BTREE_CURSOR *cp;
DB *dbp;
DBT dbt;
+ DB_MPOOLFILE *mpf;
db_recno_t recno;
- int exact, ret;
+ int exact, ret, t_ret;
- COMPQUIET(flags, 0);
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
/*
@@ -1384,27 +2073,28 @@ __bam_c_rget(dbc, data, flags)
* Get a copy of the key.
* Release the page, making sure we don't release it twice.
*/
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
+ if ((ret = __memp_fget(mpf, &cp->pgno, 0, &cp->page)) != 0)
return (ret);
memset(&dbt, 0, sizeof(DBT));
if ((ret = __db_ret(dbp, cp->page,
- cp->indx, &dbt, &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
+ cp->indx, &dbt, &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0)
goto err;
- ret = memp_fput(dbp->mpf, cp->page, 0);
+ ret = __memp_fput(mpf, cp->page, 0);
cp->page = NULL;
if (ret != 0)
return (ret);
- if ((ret = __bam_search(dbc, &dbt,
+ if ((ret = __bam_search(dbc, PGNO_INVALID, &dbt,
F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND,
1, &recno, &exact)) != 0)
goto err;
- ret = __db_retcopy(dbp, data,
- &recno, sizeof(recno), &dbc->rdata.data, &dbc->rdata.ulen);
+ ret = __db_retcopy(dbp->dbenv, data,
+ &recno, sizeof(recno), &dbc->rdata->data, &dbc->rdata->ulen);
/* Release the stack. */
-err: __bam_stkrel(dbc, 0);
+err: if ((t_ret = __bam_stkrel(dbc, 0)) != 0 && ret == 0)
+ ret = t_ret;
return (ret);
}
@@ -1444,17 +2134,15 @@ __bam_c_first(dbc)
DBC *dbc;
{
BTREE_CURSOR *cp;
- DB *dbp;
db_pgno_t pgno;
int ret;
- dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
ret = 0;
/* Walk down the left-hand side of the tree. */
for (pgno = cp->root;;) {
- ACQUIRE_CUR_SET(dbc, DB_LOCK_READ, pgno, ret);
+ ACQUIRE_CUR_COUPLE(dbc, DB_LOCK_READ, pgno, ret);
if (ret != 0)
return (ret);
@@ -1462,7 +2150,7 @@ __bam_c_first(dbc)
if (ISLEAF(cp->page))
break;
- pgno = GET_BINTERNAL(cp->page, 0)->pgno;
+ pgno = GET_BINTERNAL(dbc->dbp, cp->page, 0)->pgno;
}
/* If we want a write lock instead of a read lock, get it now. */
@@ -1472,9 +2160,11 @@ __bam_c_first(dbc)
return (ret);
}
+ cp->indx = 0;
+
/* If on an empty page or a deleted record, move to the next one. */
if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(dbc))
- if ((ret = __bam_c_next(dbc, 0)) != 0)
+ if ((ret = __bam_c_next(dbc, 0, 0)) != 0)
return (ret);
return (0);
@@ -1489,17 +2179,15 @@ __bam_c_last(dbc)
DBC *dbc;
{
BTREE_CURSOR *cp;
- DB *dbp;
db_pgno_t pgno;
int ret;
- dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
ret = 0;
/* Walk down the right-hand side of the tree. */
for (pgno = cp->root;;) {
- ACQUIRE_CUR_SET(dbc, DB_LOCK_READ, pgno, ret);
+ ACQUIRE_CUR_COUPLE(dbc, DB_LOCK_READ, pgno, ret);
if (ret != 0)
return (ret);
@@ -1507,8 +2195,8 @@ __bam_c_last(dbc)
if (ISLEAF(cp->page))
break;
- pgno =
- GET_BINTERNAL(cp->page, NUM_ENT(cp->page) - O_INDX)->pgno;
+ pgno = GET_BINTERNAL(dbc->dbp, cp->page,
+ NUM_ENT(cp->page) - O_INDX)->pgno;
}
/* If we want a write lock instead of a read lock, get it now. */
@@ -1535,18 +2223,16 @@ __bam_c_last(dbc)
* Move to the next record.
*/
static int
-__bam_c_next(dbc, initial_move)
+__bam_c_next(dbc, initial_move, deleted_okay)
DBC *dbc;
- int initial_move;
+ int initial_move, deleted_okay;
{
BTREE_CURSOR *cp;
- DB *dbp;
db_indx_t adjust;
db_lockmode_t lock_mode;
db_pgno_t pgno;
int ret;
- dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
ret = 0;
@@ -1566,7 +2252,7 @@ __bam_c_next(dbc, initial_move)
F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE : DB_LOCK_READ;
}
if (cp->page == NULL) {
- ACQUIRE_CUR(dbc, lock_mode, ret);
+ ACQUIRE_CUR(dbc, lock_mode, cp->pgno, ret);
if (ret != 0)
return (ret);
}
@@ -1587,12 +2273,13 @@ __bam_c_next(dbc, initial_move)
= NEXT_PGNO(cp->page)) == PGNO_INVALID)
return (DB_NOTFOUND);
- ACQUIRE_CUR_SET(dbc, lock_mode, pgno, ret);
+ ACQUIRE_CUR(dbc, lock_mode, pgno, ret);
if (ret != 0)
return (ret);
+ cp->indx = 0;
continue;
}
- if (IS_CUR_DELETED(dbc)) {
+ if (!deleted_okay && IS_CUR_DELETED(dbc)) {
cp->indx += adjust;
continue;
}
@@ -1610,13 +2297,11 @@ __bam_c_prev(dbc)
DBC *dbc;
{
BTREE_CURSOR *cp;
- DB *dbp;
db_indx_t adjust;
db_lockmode_t lock_mode;
db_pgno_t pgno;
int ret;
- dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
ret = 0;
@@ -1636,7 +2321,7 @@ __bam_c_prev(dbc)
F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE : DB_LOCK_READ;
}
if (cp->page == NULL) {
- ACQUIRE_CUR(dbc, lock_mode, ret);
+ ACQUIRE_CUR(dbc, lock_mode, cp->pgno, ret);
if (ret != 0)
return (ret);
}
@@ -1648,7 +2333,7 @@ __bam_c_prev(dbc)
PREV_PGNO(cp->page)) == PGNO_INVALID)
return (DB_NOTFOUND);
- ACQUIRE_CUR_SET(dbc, lock_mode, pgno, ret);
+ ACQUIRE_CUR(dbc, lock_mode, pgno, ret);
if (ret != 0)
return (ret);
@@ -1671,8 +2356,9 @@ __bam_c_prev(dbc)
* Move to a specified record.
*/
static int
-__bam_c_search(dbc, key, flags, exactp)
+__bam_c_search(dbc, root_pgno, key, flags, exactp)
DBC *dbc;
+ db_pgno_t root_pgno;
const DBT *key;
u_int32_t flags;
int *exactp;
@@ -1681,7 +2367,7 @@ __bam_c_search(dbc, key, flags, exactp)
BTREE_CURSOR *cp;
DB *dbp;
PAGE *h;
- db_indx_t indx;
+ db_indx_t indx, *inp;
db_pgno_t bt_lpgno;
db_recno_t recno;
u_int32_t sflags;
@@ -1712,6 +2398,9 @@ __bam_c_search(dbc, key, flags, exactp)
case DB_GET_BOTH:
sflags = (F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND) | S_EXACT;
goto search;
+ case DB_GET_BOTH_RANGE:
+ sflags = (F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND);
+ goto search;
case DB_SET_RANGE:
sflags =
(F_ISSET(dbc, DBC_RMW) ? S_WRITE : S_READ) | S_DUPFIRST;
@@ -1753,11 +2442,12 @@ fast_search: /*
/* Lock and retrieve the page on which we last inserted. */
h = NULL;
- ACQUIRE(dbc,
- DB_LOCK_WRITE, bt_lpgno, cp->lock, bt_lpgno, h, ret);
+ ACQUIRE_CUR(dbc, DB_LOCK_WRITE, bt_lpgno, ret);
if (ret != 0)
goto fast_miss;
+ h = cp->page;
+ inp = P_INP(dbp, h);
/*
* It's okay if the page type isn't right or it's empty, it
* just means that the world changed.
@@ -1796,7 +2486,7 @@ fast_search: /*
if (flags == DB_KEYLAST)
goto fast_hit;
for (;
- indx > 0 && h->inp[indx - P_INDX] == h->inp[indx];
+ indx > 0 && inp[indx - P_INDX] == inp[indx];
indx -= P_INDX)
;
goto fast_hit;
@@ -1823,7 +2513,7 @@ try_begin: if (h->prev_pgno == PGNO_INVALID) {
goto fast_hit;
for (;
indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
- h->inp[indx] == h->inp[indx + P_INDX];
+ inp[indx] == inp[indx + P_INDX];
indx += P_INDX)
;
goto fast_hit;
@@ -1848,12 +2538,14 @@ fast_miss: /*
* This was not the right page, so we do not need to retain
* the lock even in the presence of transactions.
*/
- DISCARD(dbc, 1, cp->lock, h, ret);
+ DISCARD_CUR(dbc, ret);
+ cp->pgno = PGNO_INVALID;
+ (void)__LPUT(dbc, cp->lock);
if (ret != 0)
return (ret);
-search: if ((ret =
- __bam_search(dbc, key, sflags, 1, NULL, exactp)) != 0)
+search: if ((ret = __bam_search(dbc, root_pgno,
+ key, sflags, 1, NULL, exactp)) != 0)
return (ret);
break;
default:
@@ -1870,12 +2562,15 @@ search: if ((ret =
/*
* If we inserted a key into the first or last slot of the tree,
* remember where it was so we can do it more quickly next time.
+ * If there are duplicates and we are inserting into the last slot,
+ * the cursor will point _to_ the last item, not after it, which
+ * is why we subtract P_INDX below.
*/
if (TYPE(cp->page) == P_LBTREE &&
(flags == DB_KEYFIRST || flags == DB_KEYLAST))
t->bt_lpgno =
(NEXT_PGNO(cp->page) == PGNO_INVALID &&
- cp->indx >= NUM_ENT(cp->page)) ||
+ cp->indx >= NUM_ENT(cp->page) - P_INDX) ||
(PREV_PGNO(cp->page) == PGNO_INVALID &&
cp->indx == 0) ? cp->pgno : PGNO_INVALID;
return (0);
@@ -1893,11 +2588,14 @@ __bam_c_physdel(dbc)
DB *dbp;
DBT key;
DB_LOCK lock;
+ DB_MPOOLFILE *mpf;
PAGE *h;
db_pgno_t pgno;
int delete_page, empty_page, exact, level, ret;
dbp = dbc->dbp;
+ memset(&key, 0, sizeof(DBT));
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
delete_page = empty_page = ret = 0;
@@ -1911,7 +2609,7 @@ __bam_c_physdel(dbc)
* space will never be reused unless the exact same key is specified.
*/
if (delete_page &&
- !F_ISSET(dbc, DBC_OPD) && F_ISSET(dbp, DB_BT_REVSPLIT))
+ !F_ISSET(dbc, DBC_OPD) && F_ISSET(dbp, DB_AM_REVSPLITOFF))
delete_page = 0;
/*
@@ -1926,13 +2624,17 @@ __bam_c_physdel(dbc)
* To delete a leaf page other than an empty root page, we need a
* copy of a key from the page. Use the 0th page index since it's
* the last key the page held.
+ *
+ * !!!
+ * Note that because __bam_c_physdel is always called from a cursor
+ * close, it should be safe to use the cursor's own "my_rkey" memory
+ * to temporarily hold this key. We shouldn't own any returned-data
+ * memory of interest--if we do, we're in trouble anyway.
*/
- if (delete_page) {
- memset(&key, 0, sizeof(DBT));
+ if (delete_page)
if ((ret = __db_ret(dbp, cp->page,
- 0, &key, &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
+ 0, &key, &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0)
return (ret);
- }
/*
* Delete the items. If page isn't empty, we adjust the cursors.
@@ -1940,7 +2642,7 @@ __bam_c_physdel(dbc)
* !!!
* The following operations to delete a page may deadlock. The easy
* scenario is if we're deleting an item because we're closing cursors
- * because we've already deadlocked and want to call txn_abort(). If
+ * because we've already deadlocked and want to call txn->abort. If
* we fail due to deadlock, we'll leave a locked, possibly empty page
* in the tree, which won't be empty long because we'll undo the delete
* when we undo the transaction's modifications.
@@ -1977,8 +2679,8 @@ __bam_c_physdel(dbc)
*/
for (level = LEAFLEVEL;; ++level) {
/* Acquire a page and its parent, locked. */
- if ((ret = __bam_search(
- dbc, &key, S_WRPAIR, level, NULL, &exact)) != 0)
+ if ((ret = __bam_search(dbc, PGNO_INVALID,
+ &key, S_WRPAIR, level, NULL, &exact)) != 0)
return (ret);
/*
@@ -2031,19 +2733,19 @@ __bam_c_physdel(dbc)
*/
switch (TYPE(h)) {
case P_IBTREE:
- pgno = GET_BINTERNAL(h, 0)->pgno;
+ pgno = GET_BINTERNAL(dbp, h, 0)->pgno;
break;
case P_IRECNO:
- pgno = GET_RINTERNAL(h, 0)->pgno;
+ pgno = GET_RINTERNAL(dbp, h, 0)->pgno;
break;
default:
- return (__db_pgfmt(dbp, PGNO(h)));
+ return (__db_pgfmt(dbp->dbenv, PGNO(h)));
}
if ((ret =
__db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &lock)) != 0)
break;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if ((ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
break;
BT_STK_PUSH(dbp->dbenv, cp, h, 0, lock, DB_LOCK_WRITE, ret);
if (ret != 0)
@@ -2076,10 +2778,12 @@ __bam_c_getstack(dbc)
BTREE_CURSOR *cp;
DB *dbp;
DBT dbt;
+ DB_MPOOLFILE *mpf;
PAGE *h;
int exact, ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
/*
@@ -2087,21 +2791,22 @@ __bam_c_getstack(dbc)
* routine has to already hold a read lock on the page, so there
* is no additional lock to acquire.
*/
- if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0)
+ if ((ret = __memp_fget(mpf, &cp->pgno, 0, &h)) != 0)
return (ret);
/* Get a copy of a key from the page. */
memset(&dbt, 0, sizeof(DBT));
if ((ret = __db_ret(dbp,
- h, 0, &dbt, &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
+ h, 0, &dbt, &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0)
goto err;
/* Get a write-locked stack for the page. */
exact = 0;
- ret = __bam_search(dbc, &dbt, S_KEYFIRST, 1, NULL, &exact);
+ ret = __bam_search(dbc, PGNO_INVALID,
+ &dbt, S_KEYFIRST, 1, NULL, &exact);
err: /* Discard the key and the page. */
- if ((t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0)
+ if ((t_ret = __memp_fput(mpf, h, 0)) != 0 && ret == 0)
ret = t_ret;
return (ret);
@@ -2122,7 +2827,8 @@ __bam_isopd(dbc, pgnop)
if (TYPE(dbc->internal->page) != P_LBTREE)
return (0);
- bo = GET_BOVERFLOW(dbc->internal->page, dbc->internal->indx + O_INDX);
+ bo = GET_BOVERFLOW(dbc->dbp,
+ dbc->internal->page, dbc->internal->indx + O_INDX);
if (B_TYPE(bo->type) == B_DUPLICATE) {
*pgnop = bo->pgno;
return (1);
diff --git a/db/btree/bt_delete.c b/db/btree/bt_delete.c
index 972588788..ef6e34caf 100644
--- a/db/btree/bt_delete.c
+++ b/db/btree/bt_delete.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_delete.c,v 11.31 2001/01/17 18:48:46 bostic Exp $";
+static const char revid[] = "$Id: bt_delete.c,v 11.46 2003/06/30 17:19:29 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -53,88 +53,11 @@ static const char revid[] = "$Id: bt_delete.c,v 11.31 2001/01/17 18:48:46 bostic
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "btree.h"
-#include "lock.h"
-
-/*
- * __bam_delete --
- * Delete the items referenced by a key.
- *
- * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
- */
-int
-__bam_delete(dbp, txn, key, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key;
- u_int32_t flags;
-{
- DBC *dbc;
- DBT lkey;
- DBT data;
- u_int32_t f_init, f_next;
- int ret, t_ret;
-
- PANIC_CHECK(dbp->dbenv);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del");
- DB_CHECK_TXN(dbp, txn);
-
- /* Check for invalid flags. */
- if ((ret =
- __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
- return (ret);
-
- /* Allocate a cursor. */
- if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, txn, "bam_delete", key, NULL, flags);
-
- /*
- * Walk a cursor through the key/data pairs, deleting as we go. Set
- * the DB_DBT_USERMEM flag, as this might be a threaded application
- * and the flags checking will catch us. We don't actually want the
- * keys or data, so request a partial of length 0.
- */
- memset(&lkey, 0, sizeof(lkey));
- F_SET(&lkey, DB_DBT_USERMEM | DB_DBT_PARTIAL);
- memset(&data, 0, sizeof(data));
- F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL);
-
- /*
- * If locking (and we haven't already acquired CDB locks), set the
- * read-modify-write flag.
- */
- f_init = DB_SET;
- f_next = DB_NEXT_DUP;
- if (STD_LOCKING(dbc)) {
- f_init |= DB_RMW;
- f_next |= DB_RMW;
- }
-
- /* Walk through the set of key/data pairs, deleting as we go. */
- if ((ret = dbc->c_get(dbc, key, &data, f_init)) != 0)
- goto err;
- for (;;) {
- if ((ret = dbc->c_del(dbc, 0)) != 0)
- goto err;
- if ((ret = dbc->c_get(dbc, &lkey, &data, f_next)) != 0) {
- if (ret == DB_NOTFOUND) {
- ret = 0;
- break;
- }
- goto err;
- }
- }
-
-err: /* Discard the cursor. */
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
-}
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/btree.h"
+#include "dbinc/lock.h"
+#include "dbinc/mp.h"
/*
* __bam_ditem --
@@ -151,14 +74,18 @@ __bam_ditem(dbc, h, indx)
BINTERNAL *bi;
BKEYDATA *bk;
DB *dbp;
+ DB_MPOOLFILE *mpf;
u_int32_t nbytes;
int ret;
+ db_indx_t *inp;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
+ inp = P_INP(dbp, h);
switch (TYPE(h)) {
case P_IBTREE:
- bi = GET_BINTERNAL(h, indx);
+ bi = GET_BINTERNAL(dbp, h, indx);
switch (B_TYPE(bi->type)) {
case B_DUPLICATE:
case B_KEYDATA:
@@ -171,7 +98,7 @@ __bam_ditem(dbc, h, indx)
return (ret);
break;
default:
- return (__db_pgfmt(dbp, PGNO(h)));
+ return (__db_pgfmt(dbp->dbenv, PGNO(h)));
}
break;
case P_IRECNO:
@@ -195,7 +122,7 @@ __bam_ditem(dbc, h, indx)
* won't work!
*/
if (indx + P_INDX < (u_int32_t)NUM_ENT(h) &&
- h->inp[indx] == h->inp[indx + P_INDX])
+ inp[indx] == inp[indx + P_INDX])
return (__bam_adjindx(dbc,
h, indx, indx + O_INDX, 0));
/*
@@ -203,14 +130,14 @@ __bam_ditem(dbc, h, indx)
* doesn't matter if we delete the key item before or
* after the data item for the purposes of this one.
*/
- if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
+ if (indx > 0 && inp[indx] == inp[indx - P_INDX])
return (__bam_adjindx(dbc,
h, indx, indx - P_INDX, 0));
}
/* FALLTHROUGH */
case P_LDUP:
case P_LRECNO:
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
switch (B_TYPE(bk->type)) {
case B_DUPLICATE:
nbytes = BOVERFLOW_SIZE;
@@ -218,24 +145,24 @@ __bam_ditem(dbc, h, indx)
case B_OVERFLOW:
nbytes = BOVERFLOW_SIZE;
if ((ret = __db_doff(
- dbc, (GET_BOVERFLOW(h, indx))->pgno)) != 0)
+ dbc, (GET_BOVERFLOW(dbp, h, indx))->pgno)) != 0)
return (ret);
break;
case B_KEYDATA:
nbytes = BKEYDATA_SIZE(bk->len);
break;
default:
- return (__db_pgfmt(dbp, PGNO(h)));
+ return (__db_pgfmt(dbp->dbenv, PGNO(h)));
}
break;
default:
- return (__db_pgfmt(dbp, PGNO(h)));
+ return (__db_pgfmt(dbp->dbenv, PGNO(h)));
}
/* Delete the item and mark the page dirty. */
if ((ret = __db_ditem(dbc, h, indx, nbytes)) != 0)
return (ret);
- if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = __memp_fset(mpf, h, DB_MPOOL_DIRTY)) != 0)
return (ret);
return (0);
@@ -255,33 +182,37 @@ __bam_adjindx(dbc, h, indx, indx_copy, is_insert)
int is_insert;
{
DB *dbp;
- db_indx_t copy;
+ DB_MPOOLFILE *mpf;
+ db_indx_t copy, *inp;
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
+ inp = P_INP(dbp, h);
/* Log the change. */
- if (DB_LOGGING(dbc) &&
- (ret = __bam_adj_log(dbp->dbenv, dbc->txn, &LSN(h),
- 0, dbp->log_fileid, PGNO(h), &LSN(h), indx, indx_copy,
- (u_int32_t)is_insert)) != 0)
- return (ret);
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __bam_adj_log(dbp, dbc->txn, &LSN(h), 0,
+ PGNO(h), &LSN(h), indx, indx_copy, (u_int32_t)is_insert)) != 0)
+ return (ret);
+ } else
+ LSN_NOT_LOGGED(LSN(h));
/* Shuffle the indices and mark the page dirty. */
if (is_insert) {
- copy = h->inp[indx_copy];
+ copy = inp[indx_copy];
if (indx != NUM_ENT(h))
- memmove(&h->inp[indx + O_INDX], &h->inp[indx],
+ memmove(&inp[indx + O_INDX], &inp[indx],
sizeof(db_indx_t) * (NUM_ENT(h) - indx));
- h->inp[indx] = copy;
+ inp[indx] = copy;
++NUM_ENT(h);
} else {
--NUM_ENT(h);
if (indx != NUM_ENT(h))
- memmove(&h->inp[indx], &h->inp[indx + O_INDX],
+ memmove(&inp[indx], &inp[indx + O_INDX],
sizeof(db_indx_t) * (NUM_ENT(h) - indx));
}
- if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = __memp_fset(mpf, h, DB_MPOOL_DIRTY)) != 0)
return (ret);
return (0);
@@ -303,6 +234,7 @@ __bam_dpages(dbc, stack_epg)
DB *dbp;
DBT a, b;
DB_LOCK c_lock, p_lock;
+ DB_MPOOLFILE *mpf;
EPG *epg;
PAGE *child, *parent;
db_indx_t nitems;
@@ -311,6 +243,7 @@ __bam_dpages(dbc, stack_epg)
int done, ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
/*
@@ -328,8 +261,7 @@ __bam_dpages(dbc, stack_epg)
*/
ret = 0;
for (epg = cp->sp; epg < stack_epg; ++epg) {
- if ((t_ret =
- memp_fput(dbp->mpf, epg->page, 0)) != 0 && ret == 0)
+ if ((t_ret = __memp_fput(mpf, epg->page, 0)) != 0 && ret == 0)
ret = t_ret;
(void)__TLPUT(dbc, epg->lock);
}
@@ -364,7 +296,7 @@ __bam_dpages(dbc, stack_epg)
pgno = PGNO(epg->page);
nitems = NUM_ENT(epg->page);
- if ((ret = memp_fput(dbp->mpf, epg->page, 0)) != 0)
+ if ((ret = __memp_fput(mpf, epg->page, 0)) != 0)
goto err_inc;
(void)__TLPUT(dbc, epg->lock);
@@ -394,7 +326,7 @@ __bam_dpages(dbc, stack_epg)
err_inc: ++epg;
err: for (; epg <= cp->csp; ++epg) {
if (epg->page != NULL)
- (void)memp_fput(dbp->mpf, epg->page, 0);
+ (void)__memp_fput(mpf, epg->page, 0);
(void)__TLPUT(dbc, epg->lock);
}
BT_STK_CLR(cp);
@@ -415,14 +347,15 @@ err: for (; epg <= cp->csp; ++epg) {
for (done = 0; !done;) {
/* Initialize. */
parent = child = NULL;
- p_lock.off = c_lock.off = LOCK_INVALID;
+ LOCK_INIT(p_lock);
+ LOCK_INIT(c_lock);
/* Lock the root. */
pgno = root_pgno;
if ((ret =
__db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &p_lock)) != 0)
goto stop;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &parent)) != 0)
+ if ((ret = __memp_fget(mpf, &pgno, 0, &parent)) != 0)
goto stop;
if (NUM_ENT(parent) != 1)
@@ -434,7 +367,7 @@ err: for (; epg <= cp->csp; ++epg) {
* If this is overflow, then try to delete it.
* The child may or may not still point at it.
*/
- bi = GET_BINTERNAL(parent, 0);
+ bi = GET_BINTERNAL(dbp, parent, 0);
if (B_TYPE(bi->type) == B_OVERFLOW)
if ((ret = __db_doff(dbc,
((BOVERFLOW *)bi->data)->pgno)) != 0)
@@ -442,7 +375,7 @@ err: for (; epg <= cp->csp; ++epg) {
pgno = bi->pgno;
break;
case P_IRECNO:
- pgno = GET_RINTERNAL(parent, 0)->pgno;
+ pgno = GET_RINTERNAL(dbp, parent, 0)->pgno;
break;
default:
goto stop;
@@ -452,24 +385,24 @@ err: for (; epg <= cp->csp; ++epg) {
if ((ret =
__db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, &c_lock)) != 0)
goto stop;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &child)) != 0)
+ if ((ret = __memp_fget(mpf, &pgno, 0, &child)) != 0)
goto stop;
/* Log the change. */
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
memset(&a, 0, sizeof(a));
a.data = child;
a.size = dbp->pgsize;
memset(&b, 0, sizeof(b));
- b.data = P_ENTRY(parent, 0);
+ b.data = P_ENTRY(dbp, parent, 0);
b.size = TYPE(parent) == P_IRECNO ? RINTERNAL_SIZE :
BINTERNAL_SIZE(((BINTERNAL *)b.data)->len);
- if ((ret =
- __bam_rsplit_log(dbp->dbenv, dbc->txn, &child->lsn,
- 0, dbp->log_fileid, PGNO(child), &a, PGNO(parent),
- RE_NREC(parent), &b, &parent->lsn)) != 0)
+ if ((ret = __bam_rsplit_log(dbp, dbc->txn,
+ &child->lsn, 0, PGNO(child), &a, PGNO(parent),
+ RE_NREC(parent), &b, &parent->lsn)) != 0)
goto stop;
- }
+ } else
+ LSN_NOT_LOGGED(child->lsn);
/*
* Make the switch.
@@ -491,9 +424,9 @@ err: for (; epg <= cp->csp; ++epg) {
RE_NREC_SET(parent, rcnt);
/* Mark the pages dirty. */
- if ((ret = memp_fset(dbp->mpf, parent, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = __memp_fset(mpf, parent, DB_MPOOL_DIRTY)) != 0)
goto stop;
- if ((ret = memp_fset(dbp->mpf, child, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = __memp_fset(mpf, child, DB_MPOOL_DIRTY)) != 0)
goto stop;
/* Adjust the cursors. */
@@ -514,15 +447,13 @@ err: for (; epg <= cp->csp; ++epg) {
if (0) {
stop: done = 1;
}
- if (p_lock.off != LOCK_INVALID)
- (void)__TLPUT(dbc, p_lock);
+ (void)__TLPUT(dbc, p_lock);
if (parent != NULL &&
- (t_ret = memp_fput(dbp->mpf, parent, 0)) != 0 && ret == 0)
+ (t_ret = __memp_fput(mpf, parent, 0)) != 0 && ret == 0)
ret = t_ret;
- if (c_lock.off != LOCK_INVALID)
- (void)__TLPUT(dbc, c_lock);
+ (void)__TLPUT(dbc, c_lock);
if (child != NULL &&
- (t_ret = memp_fput(dbp->mpf, child, 0)) != 0 && ret == 0)
+ (t_ret = __memp_fput(mpf, child, 0)) != 0 && ret == 0)
ret = t_ret;
}
diff --git a/db/btree/bt_method.c b/db/btree/bt_method.c
index 5e3af27d0..84abe96a2 100644
--- a/db/btree/bt_method.c
+++ b/db/btree/bt_method.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2000
+ * Copyright (c) 1999-2003
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_method.c,v 11.20 2000/11/30 00:58:28 ubell Exp $";
+static const char revid[] = "$Id: bt_method.c,v 11.34 2003/06/30 17:19:32 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -16,19 +16,22 @@ static const char revid[] = "$Id: bt_method.c,v 11.20 2000/11/30 00:58:28 ubell
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-#include "qam.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/qam.h"
-static int __bam_set_bt_compare
- __P((DB *, int (*)(DB *, const DBT *, const DBT *)));
static int __bam_set_bt_maxkey __P((DB *, u_int32_t));
+static int __bam_get_bt_minkey __P((DB *, u_int32_t *));
static int __bam_set_bt_minkey __P((DB *, u_int32_t));
static int __bam_set_bt_prefix
__P((DB *, size_t(*)(DB *, const DBT *, const DBT *)));
+static int __ram_get_re_delim __P((DB *, int *));
static int __ram_set_re_delim __P((DB *, int));
+static int __ram_get_re_len __P((DB *, u_int32_t *));
static int __ram_set_re_len __P((DB *, u_int32_t));
+static int __ram_get_re_pad __P((DB *, int *));
static int __ram_set_re_pad __P((DB *, int));
+static int __ram_get_re_source __P((DB *, const char **));
static int __ram_set_re_source __P((DB *, const char *));
/*
@@ -55,6 +58,7 @@ __bam_db_create(dbp)
dbp->set_bt_compare = __bam_set_bt_compare;
dbp->set_bt_maxkey = __bam_set_bt_maxkey;
+ dbp->get_bt_minkey = __bam_get_bt_minkey;
dbp->set_bt_minkey = __bam_set_bt_minkey;
dbp->set_bt_prefix = __bam_set_bt_prefix;
@@ -62,9 +66,13 @@ __bam_db_create(dbp)
t->re_delim = '\n';
t->re_eof = 1;
+ dbp->get_re_delim = __ram_get_re_delim;
dbp->set_re_delim = __ram_set_re_delim;
+ dbp->get_re_len = __ram_get_re_len;
dbp->set_re_len = __ram_set_re_len;
+ dbp->get_re_pad = __ram_get_re_pad;
dbp->set_re_pad = __ram_set_re_pad;
+ dbp->get_re_source = __ram_get_re_source;
dbp->set_re_source = __ram_set_re_source;
return (0);
@@ -82,7 +90,8 @@ __bam_db_close(dbp)
{
BTREE *t;
- t = dbp->bt_internal;
+ if ((t = dbp->bt_internal) == NULL)
+ return (0);
/* Recno */
/* Close any backing source file descriptor. */
if (t->re_fp != NULL)
@@ -90,15 +99,46 @@ __bam_db_close(dbp)
/* Free any backing source file name. */
if (t->re_source != NULL)
- __os_freestr(t->re_source);
+ __os_free(dbp->dbenv, t->re_source);
- __os_free(t, sizeof(BTREE));
+ __os_free(dbp->dbenv, t);
dbp->bt_internal = NULL;
return (0);
}
/*
+ * __bam_map_flags --
+ * Map Btree specific flags from public to the internal values.
+ *
+ * PUBLIC: void __bam_map_flags __P((DB *, u_int32_t *, u_int32_t *));
+ */
+void
+__bam_map_flags(dbp, inflagsp, outflagsp)
+ DB *dbp;
+ u_int32_t *inflagsp, *outflagsp;
+{
+ COMPQUIET(dbp, NULL);
+
+ if (FLD_ISSET(*inflagsp, DB_DUP)) {
+ FLD_SET(*outflagsp, DB_AM_DUP);
+ FLD_CLR(*inflagsp, DB_DUP);
+ }
+ if (FLD_ISSET(*inflagsp, DB_DUPSORT)) {
+ FLD_SET(*outflagsp, DB_AM_DUP | DB_AM_DUPSORT);
+ FLD_CLR(*inflagsp, DB_DUPSORT);
+ }
+ if (FLD_ISSET(*inflagsp, DB_RECNUM)) {
+ FLD_SET(*outflagsp, DB_AM_RECNUM);
+ FLD_CLR(*inflagsp, DB_RECNUM);
+ }
+ if (FLD_ISSET(*inflagsp, DB_REVSPLITOFF)) {
+ FLD_SET(*outflagsp, DB_AM_REVSPLITOFF);
+ FLD_CLR(*inflagsp, DB_REVSPLITOFF);
+ }
+}
+
+/*
* __bam_set_flags --
* Set Btree specific flags.
*
@@ -112,50 +152,31 @@ __bam_set_flags(dbp, flagsp)
u_int32_t flags;
flags = *flagsp;
- if (LF_ISSET(DB_DUP | DB_DUPSORT | DB_RECNUM | DB_REVSPLITOFF)) {
+ if (LF_ISSET(DB_DUP | DB_DUPSORT | DB_RECNUM | DB_REVSPLITOFF))
DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_flags");
- /*
- * The DB_DUP and DB_DUPSORT flags are shared by the Hash
- * and Btree access methods.
- */
- if (LF_ISSET(DB_DUP | DB_DUPSORT))
- DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE | DB_OK_HASH);
-
- if (LF_ISSET(DB_RECNUM | DB_REVSPLITOFF))
- DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE);
-
- if (LF_ISSET(DB_DUP | DB_DUPSORT)) {
- /* DB_DUP/DB_DUPSORT is incompatible with DB_RECNUM. */
- if (F_ISSET(dbp, DB_BT_RECNUM))
- goto incompat;
-
- if (LF_ISSET(DB_DUPSORT)) {
- if (dbp->dup_compare == NULL)
- dbp->dup_compare = __bam_defcmp;
- F_SET(dbp, DB_AM_DUPSORT);
- }
-
- F_SET(dbp, DB_AM_DUP);
- LF_CLR(DB_DUP | DB_DUPSORT);
- }
-
- if (LF_ISSET(DB_RECNUM)) {
- /* DB_RECNUM is incompatible with DB_DUP/DB_DUPSORT. */
- if (F_ISSET(dbp, DB_AM_DUP))
- goto incompat;
-
- F_SET(dbp, DB_BT_RECNUM);
- LF_CLR(DB_RECNUM);
- }
-
- if (LF_ISSET(DB_REVSPLITOFF)) {
- F_SET(dbp, DB_BT_REVSPLIT);
- LF_CLR(DB_REVSPLITOFF);
- }
-
- *flagsp = flags;
- }
+ /*
+ * The DB_DUP and DB_DUPSORT flags are shared by the Hash
+ * and Btree access methods.
+ */
+ if (LF_ISSET(DB_DUP | DB_DUPSORT))
+ DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE | DB_OK_HASH);
+
+ if (LF_ISSET(DB_RECNUM | DB_REVSPLITOFF))
+ DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE);
+
+ /* DB_DUP/DB_DUPSORT is incompatible with DB_RECNUM. */
+ if (LF_ISSET(DB_DUP | DB_DUPSORT) && F_ISSET(dbp, DB_AM_RECNUM))
+ goto incompat;
+
+ /* DB_RECNUM is incompatible with DB_DUP/DB_DUPSORT. */
+ if (LF_ISSET(DB_RECNUM) && F_ISSET(dbp, DB_AM_DUP))
+ goto incompat;
+
+ if (LF_ISSET(DB_DUPSORT) && dbp->dup_compare == NULL)
+ dbp->dup_compare = __bam_defcmp;
+
+ __bam_map_flags(dbp, flagsp, &dbp->flags);
return (0);
incompat:
@@ -165,15 +186,18 @@ incompat:
/*
* __bam_set_bt_compare --
* Set the comparison function.
+ *
+ * PUBLIC: int __bam_set_bt_compare
+ * PUBLIC: __P((DB *, int (*)(DB *, const DBT *, const DBT *)));
*/
-static int
+int
__bam_set_bt_compare(dbp, func)
DB *dbp;
int (*func) __P((DB *, const DBT *, const DBT *));
{
BTREE *t;
- DB_ILLEGAL_AFTER_OPEN(dbp, "set_bt_compare");
+ DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_bt_compare");
DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE);
t = dbp->bt_internal;
@@ -200,7 +224,7 @@ __bam_set_bt_maxkey(dbp, bt_maxkey)
{
BTREE *t;
- DB_ILLEGAL_AFTER_OPEN(dbp, "set_bt_maxkey");
+ DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_bt_maxkey");
DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE);
t = dbp->bt_internal;
@@ -215,6 +239,24 @@ __bam_set_bt_maxkey(dbp, bt_maxkey)
}
/*
+ * __db_get_bt_minkey --
+ * Get the minimum keys per page.
+ */
+static int
+__bam_get_bt_minkey(dbp, bt_minkeyp)
+ DB *dbp;
+ u_int32_t *bt_minkeyp;
+{
+ BTREE *t;
+
+ DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE);
+
+ t = dbp->bt_internal;
+ *bt_minkeyp = t->bt_minkey;
+ return (0);
+}
+
+/*
* __bam_set_bt_minkey --
* Set the minimum keys per page.
*/
@@ -225,7 +267,7 @@ __bam_set_bt_minkey(dbp, bt_minkey)
{
BTREE *t;
- DB_ILLEGAL_AFTER_OPEN(dbp, "set_bt_minkey");
+ DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_bt_minkey");
DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE);
t = dbp->bt_internal;
@@ -250,7 +292,7 @@ __bam_set_bt_prefix(dbp, func)
{
BTREE *t;
- DB_ILLEGAL_AFTER_OPEN(dbp, "set_bt_prefix");
+ DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_bt_prefix");
DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE);
t = dbp->bt_internal;
@@ -260,6 +302,29 @@ __bam_set_bt_prefix(dbp, func)
}
/*
+ * __ram_map_flags --
+ * Map Recno specific flags from public to the internal values.
+ *
+ * PUBLIC: void __ram_map_flags __P((DB *, u_int32_t *, u_int32_t *));
+ */
+void
+__ram_map_flags(dbp, inflagsp, outflagsp)
+ DB *dbp;
+ u_int32_t *inflagsp, *outflagsp;
+{
+ COMPQUIET(dbp, NULL);
+
+ if (FLD_ISSET(*inflagsp, DB_RENUMBER)) {
+ FLD_SET(*outflagsp, DB_AM_RENUMBER);
+ FLD_CLR(*inflagsp, DB_RENUMBER);
+ }
+ if (FLD_ISSET(*inflagsp, DB_SNAPSHOT)) {
+ FLD_SET(*outflagsp, DB_AM_SNAPSHOT);
+ FLD_CLR(*inflagsp, DB_SNAPSHOT);
+ }
+}
+
+/*
* __ram_set_flags --
* Set Recno specific flags.
*
@@ -275,21 +340,27 @@ __ram_set_flags(dbp, flagsp)
flags = *flagsp;
if (LF_ISSET(DB_RENUMBER | DB_SNAPSHOT)) {
DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_flags");
-
DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO);
+ }
- if (LF_ISSET(DB_RENUMBER)) {
- F_SET(dbp, DB_RE_RENUMBER);
- LF_CLR(DB_RENUMBER);
- }
+ __ram_map_flags(dbp, flagsp, &dbp->flags);
+ return (0);
+}
- if (LF_ISSET(DB_SNAPSHOT)) {
- F_SET(dbp, DB_RE_SNAPSHOT);
- LF_CLR(DB_SNAPSHOT);
- }
+/*
+ * __db_get_re_delim --
+ * Get the variable-length input record delimiter.
+ */
+static int
+__ram_get_re_delim(dbp, re_delimp)
+ DB *dbp;
+ int *re_delimp;
+{
+ BTREE *t;
- *flagsp = flags;
- }
+ DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO);
+ t = dbp->bt_internal;
+ *re_delimp = t->re_delim;
return (0);
}
@@ -304,18 +375,35 @@ __ram_set_re_delim(dbp, re_delim)
{
BTREE *t;
- DB_ILLEGAL_AFTER_OPEN(dbp, "set_re_delim");
+ DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_re_delim");
DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO);
t = dbp->bt_internal;
t->re_delim = re_delim;
- F_SET(dbp, DB_RE_DELIMITER);
+ F_SET(dbp, DB_AM_DELIMITER);
return (0);
}
/*
+ * __db_get_re_len --
+ * Get the variable-length input record length.
+ */
+static int
+__ram_get_re_len(dbp, re_lenp)
+ DB *dbp;
+ u_int32_t *re_lenp;
+{
+ BTREE *t;
+
+ DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO);
+ t = dbp->bt_internal;
+ *re_lenp = t->re_len;
+ return (0);
+}
+
+/*
* __ram_set_re_len --
* Set the variable-length input record length.
*/
@@ -327,7 +415,7 @@ __ram_set_re_len(dbp, re_len)
BTREE *t;
QUEUE *q;
- DB_ILLEGAL_AFTER_OPEN(dbp, "set_re_len");
+ DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_re_len");
DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO);
t = dbp->bt_internal;
@@ -336,12 +424,30 @@ __ram_set_re_len(dbp, re_len)
q = dbp->q_internal;
q->re_len = re_len;
- F_SET(dbp, DB_RE_FIXEDLEN);
+ F_SET(dbp, DB_AM_FIXEDLEN);
return (0);
}
/*
+ * __db_get_re_pad --
+ * Get the fixed-length record pad character.
+ */
+static int
+__ram_get_re_pad(dbp, re_padp)
+ DB *dbp;
+ int *re_padp;
+{
+ BTREE *t;
+
+ DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO);
+
+ t = dbp->bt_internal;
+ *re_padp = t->re_pad;
+ return (0);
+}
+
+/*
* __ram_set_re_pad --
* Set the fixed-length record pad character.
*/
@@ -353,7 +459,7 @@ __ram_set_re_pad(dbp, re_pad)
BTREE *t;
QUEUE *q;
- DB_ILLEGAL_AFTER_OPEN(dbp, "set_re_pad");
+ DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_re_pad");
DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO);
t = dbp->bt_internal;
@@ -362,8 +468,26 @@ __ram_set_re_pad(dbp, re_pad)
q = dbp->q_internal;
q->re_pad = re_pad;
- F_SET(dbp, DB_RE_PAD);
+ F_SET(dbp, DB_AM_PAD);
+
+ return (0);
+}
+
+/*
+ * __db_get_re_source --
+ * Get the backing source file name.
+ */
+static int
+__ram_get_re_source(dbp, re_sourcep)
+ DB *dbp;
+ const char **re_sourcep;
+{
+ BTREE *t;
+ DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO);
+
+ t = dbp->bt_internal;
+ *re_sourcep = t->re_source;
return (0);
}
@@ -378,7 +502,7 @@ __ram_set_re_source(dbp, re_source)
{
BTREE *t;
- DB_ILLEGAL_AFTER_OPEN(dbp, "set_re_source");
+ DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_re_source");
DB_ILLEGAL_METHOD(dbp, DB_OK_RECNO);
t = dbp->bt_internal;
diff --git a/db/btree/bt_open.c b/db/btree/bt_open.c
index 405c1880f..20f594fe5 100644
--- a/db/btree/bt_open.c
+++ b/db/btree/bt_open.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,47 +43,48 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_open.c,v 11.42 2000/11/30 00:58:28 ubell Exp $";
+static const char revid[] = "$Id: bt_open.c,v 11.87 2003/07/17 01:39:09 margo Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
-#include <limits.h>
#include <string.h>
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_swap.h"
-#include "btree.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "log.h"
-#include "mp.h"
+#include "dbinc/crypto.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_swap.h"
+#include "dbinc/btree.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/lock.h"
+#include "dbinc/log.h"
+#include "dbinc/mp.h"
+#include "dbinc/fop.h"
+
+static void __bam_init_meta __P((DB *, BTMETA *, db_pgno_t, DB_LSN *));
/*
* __bam_open --
* Open a btree.
*
- * PUBLIC: int __bam_open __P((DB *, const char *, db_pgno_t, u_int32_t));
+ * PUBLIC: int __bam_open __P((DB *,
+ * PUBLIC: DB_TXN *, const char *, db_pgno_t, u_int32_t));
*/
int
-__bam_open(dbp, name, base_pgno, flags)
+__bam_open(dbp, txn, name, base_pgno, flags)
DB *dbp;
+ DB_TXN *txn;
const char *name;
db_pgno_t base_pgno;
u_int32_t flags;
{
BTREE *t;
+ COMPQUIET(name, NULL);
t = dbp->bt_internal;
- /* Initialize the remaining fields/methods of the DB. */
- dbp->del = __bam_delete;
- dbp->key_range = __bam_key_range;
- dbp->stat = __bam_stat;
-
/*
* We don't permit the user to specify a prefix routine if they didn't
* also specify a comparison routine, they can't know enough about our
@@ -99,8 +100,8 @@ __bam_open(dbp, name, base_pgno, flags)
* Verify that the bt_minkey value specified won't cause the
* calculation of ovflsize to underflow [#2406] for this pagesize.
*/
- if (B_MINKEY_TO_OVFLSIZE(t->bt_minkey, dbp->pgsize) >
- B_MINKEY_TO_OVFLSIZE(DEFMINKEYPAGE, dbp->pgsize)) {
+ if (B_MINKEY_TO_OVFLSIZE(dbp, t->bt_minkey, dbp->pgsize) >
+ B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) {
__db_err(dbp->dbenv,
"bt_minkey value of %lu too high for page size of %lu",
(u_long)t->bt_minkey, (u_long)dbp->pgsize);
@@ -108,7 +109,7 @@ __bam_open(dbp, name, base_pgno, flags)
}
/* Start up the tree. */
- return (__bam_read_root(dbp, name, base_pgno, flags));
+ return (__bam_read_root(dbp, txn, base_pgno, flags));
}
/*
@@ -143,6 +144,7 @@ __bam_metachk(dbp, name, btm)
name, (u_long)vers);
return (DB_OLD_VERSION);
case 8:
+ case 9:
break;
default:
__db_err(dbenv,
@@ -187,13 +189,13 @@ __bam_metachk(dbp, name, btm)
if (F_ISSET(&btm->dbmeta, BTM_RECNUM)) {
if (dbp->type != DB_BTREE)
goto wrong_type;
- F_SET(dbp, DB_BT_RECNUM);
+ F_SET(dbp, DB_AM_RECNUM);
if ((ret = __db_fcchk(dbenv,
- "DB->open", dbp->flags, DB_AM_DUP, DB_BT_RECNUM)) != 0)
+ "DB->open", dbp->flags, DB_AM_DUP, DB_AM_RECNUM)) != 0)
return (ret);
} else
- if (F_ISSET(dbp, DB_BT_RECNUM)) {
+ if (F_ISSET(dbp, DB_AM_RECNUM)) {
__db_err(dbenv,
"%s: DB_RECNUM specified to open method but not set in database",
name);
@@ -203,9 +205,9 @@ __bam_metachk(dbp, name, btm)
if (F_ISSET(&btm->dbmeta, BTM_FIXEDLEN)) {
if (dbp->type != DB_RECNO)
goto wrong_type;
- F_SET(dbp, DB_RE_FIXEDLEN);
+ F_SET(dbp, DB_AM_FIXEDLEN);
} else
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN)) {
__db_err(dbenv,
"%s: DB_FIXEDLEN specified to open method but not set in database",
name);
@@ -215,9 +217,9 @@ __bam_metachk(dbp, name, btm)
if (F_ISSET(&btm->dbmeta, BTM_RENUMBER)) {
if (dbp->type != DB_RECNO)
goto wrong_type;
- F_SET(dbp, DB_RE_RENUMBER);
+ F_SET(dbp, DB_AM_RENUMBER);
} else
- if (F_ISSET(dbp, DB_RE_RENUMBER)) {
+ if (F_ISSET(dbp, DB_AM_RENUMBER)) {
__db_err(dbenv,
"%s: DB_RENUMBER specified to open method but not set in database",
name);
@@ -266,116 +268,137 @@ wrong_type:
/*
* __bam_read_root --
- * Check (and optionally create) a tree.
+ * Read the root page and check a tree.
*
- * PUBLIC: int __bam_read_root __P((DB *, const char *, db_pgno_t, u_int32_t));
+ * PUBLIC: int __bam_read_root __P((DB *, DB_TXN *, db_pgno_t, u_int32_t));
*/
int
-__bam_read_root(dbp, name, base_pgno, flags)
+__bam_read_root(dbp, txn, base_pgno, flags)
DB *dbp;
- const char *name;
+ DB_TXN *txn;
db_pgno_t base_pgno;
u_int32_t flags;
{
BTMETA *meta;
BTREE *t;
DBC *dbc;
- DB_LSN orig_lsn;
DB_LOCK metalock;
- PAGE *root;
- int locked, ret, t_ret;
+ DB_MPOOLFILE *mpf;
+ int ret, t_ret;
- ret = 0;
- t = dbp->bt_internal;
meta = NULL;
- root = NULL;
- locked = 0;
+ t = dbp->bt_internal;
+ LOCK_INIT(metalock);
+ mpf = dbp->mpf;
+ ret = 0;
- /*
- * Get a cursor. If DB_CREATE is specified, we may be creating
- * the root page, and to do that safely in CDB we need a write
- * cursor. In STD_LOCKING mode, we'll synchronize using the
- * meta page lock instead.
- */
- if ((ret = dbp->cursor(dbp, dbp->open_txn,
- &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbp->dbenv) ?
- DB_WRITECURSOR : 0)) != 0)
+ /* Get a cursor. */
+ if ((ret = __db_cursor(dbp, txn, &dbc, 0)) != 0)
return (ret);
- /* Get, and optionally create the metadata page. */
+ /* Get the metadata page. */
if ((ret =
__db_lget(dbc, 0, base_pgno, DB_LOCK_READ, 0, &metalock)) != 0)
goto err;
- if ((ret = memp_fget(
- dbp->mpf, &base_pgno, DB_MPOOL_CREATE, (PAGE **)&meta)) != 0)
+ if ((ret = __memp_fget(mpf, &base_pgno, 0, &meta)) != 0)
goto err;
/*
- * If the magic number is correct, we're not creating the tree.
- * Correct any fields that may not be right. Note, all of the
- * local flags were set by DB->open.
+ * If the magic number is set, the tree has been created. Correct
+ * any fields that may not be right. Note, all of the local flags
+ * were set by DB->open.
+ *
+ * Otherwise, we'd better be in recovery or abort, in which case the
+ * metadata page will be created/initialized elsewhere.
*/
-again: if (meta->dbmeta.magic != 0) {
+ if (meta->dbmeta.magic == DB_BTREEMAGIC) {
t->bt_maxkey = meta->maxkey;
t->bt_minkey = meta->minkey;
- t->re_pad = meta->re_pad;
+ t->re_pad = (int)meta->re_pad;
t->re_len = meta->re_len;
t->bt_meta = base_pgno;
t->bt_root = meta->root;
-
- (void)memp_fput(dbp->mpf, meta, 0);
- meta = NULL;
- goto done;
+ } else {
+ DB_ASSERT(IS_RECOVERING(dbp->dbenv) ||
+ F_ISSET(dbp, DB_AM_RECOVER));
}
- /* In recovery if it's not there it will be created elsewhere.*/
- if (IS_RECOVERING(dbp->dbenv))
- goto done;
-
- /* If we're doing CDB; we now have to get the write lock. */
- if (CDB_LOCKING(dbp->dbenv)) {
- /*
- * We'd better have DB_CREATE set if we're actually doing
- * the create.
- */
- DB_ASSERT(LF_ISSET(DB_CREATE));
- if ((ret = lock_get(dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE,
- &dbc->lock_dbt, DB_LOCK_WRITE, &dbc->mylock)) != 0)
- goto err;
- }
+ /*
+ * !!!
+ * If creating a subdatabase, we've already done an insert when
+ * we put the subdatabase's entry into the master database, so
+ * our last-page-inserted value is wrongly initialized for the
+ * master database, not the subdatabase we're creating. I'm not
+ * sure where the *right* place to clear this value is, it's not
+ * intuitively obvious that it belongs here.
+ */
+ t->bt_lpgno = PGNO_INVALID;
/*
- * If we are doing locking, relase the read lock and get a write lock.
- * We want to avoid deadlock.
+ * We must initialize last_pgno, it could be stale.
+ * We update this without holding the meta page write
+ * locked. This is ok since two threads in the code
+ * must be setting it to the same value. SR #7159.
*/
- if (locked == 0 && STD_LOCKING(dbc)) {
- if ((ret = __LPUT(dbc, metalock)) != 0)
- goto err;
- if ((ret = __db_lget(dbc,
- 0, base_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
- goto err;
- locked = 1;
- goto again;
- }
+ if (!LF_ISSET(DB_RDONLY) && dbp->meta_pgno == PGNO_BASE_MD) {
+ __memp_last_pgno(mpf, &meta->dbmeta.last_pgno);
+ ret = __memp_fput(mpf, meta, DB_MPOOL_DIRTY);
+ } else
+ ret = __memp_fput(mpf, meta, 0);
+ meta = NULL;
+
+err: /* Put the metadata page back. */
+ if (meta != NULL &&
+ (t_ret = __memp_fput(mpf, meta, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
+ ret = t_ret;
+
+ if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
+ ret = t_ret;
+ return (ret);
+}
+
+/*
+ * __bam_init_meta --
+ *
+ * Initialize a btree meta-data page. The following fields may need
+ * to be updated later: last_pgno, root.
+ */
+static void
+__bam_init_meta(dbp, meta, pgno, lsnp)
+ DB *dbp;
+ BTMETA *meta;
+ db_pgno_t pgno;
+ DB_LSN *lsnp;
+{
+ BTREE *t;
- /* Initialize the tree structure metadata information. */
- orig_lsn = meta->dbmeta.lsn;
memset(meta, 0, sizeof(BTMETA));
- meta->dbmeta.lsn = orig_lsn;
- meta->dbmeta.pgno = base_pgno;
+ meta->dbmeta.lsn = *lsnp;
+ meta->dbmeta.pgno = pgno;
meta->dbmeta.magic = DB_BTREEMAGIC;
meta->dbmeta.version = DB_BTREEVERSION;
meta->dbmeta.pagesize = dbp->pgsize;
+ if (F_ISSET(dbp, DB_AM_CHKSUM))
+ FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM);
+ if (F_ISSET(dbp, DB_AM_ENCRYPT)) {
+ meta->dbmeta.encrypt_alg =
+ ((DB_CIPHER *)dbp->dbenv->crypto_handle)->alg;
+ DB_ASSERT(meta->dbmeta.encrypt_alg != 0);
+ meta->crypto_magic = meta->dbmeta.magic;
+ }
meta->dbmeta.type = P_BTREEMETA;
meta->dbmeta.free = PGNO_INVALID;
+ meta->dbmeta.last_pgno = pgno;
if (F_ISSET(dbp, DB_AM_DUP))
F_SET(&meta->dbmeta, BTM_DUP);
- if (F_ISSET(dbp, DB_RE_FIXEDLEN))
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN))
F_SET(&meta->dbmeta, BTM_FIXEDLEN);
- if (F_ISSET(dbp, DB_BT_RECNUM))
+ if (F_ISSET(dbp, DB_AM_RECNUM))
F_SET(&meta->dbmeta, BTM_RECNUM);
- if (F_ISSET(dbp, DB_RE_RENUMBER))
+ if (F_ISSET(dbp, DB_AM_RENUMBER))
F_SET(&meta->dbmeta, BTM_RENUMBER);
if (F_ISSET(dbp, DB_AM_SUBDB))
F_SET(&meta->dbmeta, BTM_SUBDB);
@@ -385,14 +408,170 @@ again: if (meta->dbmeta.magic != 0) {
F_SET(&meta->dbmeta, BTM_RECNO);
memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN);
+ t = dbp->bt_internal;
meta->maxkey = t->bt_maxkey;
meta->minkey = t->bt_minkey;
meta->re_len = t->re_len;
- meta->re_pad = t->re_pad;
+ meta->re_pad = (u_int32_t)t->re_pad;
+}
+
+/*
+ * __bam_new_file --
+ * Create the necessary pages to begin a new database file.
+ *
+ * This code appears more complex than it is because of the two cases (named
+ * and unnamed). The way to read the code is that for each page being created,
+ * there are three parts: 1) a "get page" chunk (which either uses malloc'd
+ * memory or calls __memp_fget), 2) the initialization, and 3) the "put page"
+ * chunk which either does a fop write or an __memp_fput.
+ *
+ * PUBLIC: int __bam_new_file __P((DB *, DB_TXN *, DB_FH *, const char *));
+ */
+int
+__bam_new_file(dbp, txn, fhp, name)
+ DB *dbp;
+ DB_TXN *txn;
+ DB_FH *fhp;
+ const char *name;
+{
+ BTMETA *meta;
+ DB_ENV *dbenv;
+ DB_LSN lsn;
+ DB_MPOOLFILE *mpf;
+ DB_PGINFO pginfo;
+ DBT pdbt;
+ PAGE *root;
+ db_pgno_t pgno;
+ int ret;
+ void *buf;
- /* If necessary, log the meta-data and root page creates. */
- if ((ret = __db_log_page(dbp,
- name, &orig_lsn, base_pgno, (PAGE *)meta)) != 0)
+ dbenv = dbp->dbenv;
+ mpf = dbp->mpf;
+ root = NULL;
+ meta = NULL;
+ memset(&pdbt, 0, sizeof(pdbt));
+ buf = NULL;
+
+ /* Build meta-data page. */
+
+ if (name == NULL) {
+ pgno = PGNO_BASE_MD;
+ ret = __memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &meta);
+ } else {
+ pginfo.db_pagesize = dbp->pgsize;
+ pginfo.flags =
+ F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP));
+ pginfo.type = dbp->type;
+ pdbt.data = &pginfo;
+ pdbt.size = sizeof(pginfo);
+ ret = __os_calloc(dbp->dbenv, 1, dbp->pgsize, &buf);
+ meta = (BTMETA *)buf;
+ }
+ if (ret != 0)
+ return (ret);
+
+ LSN_NOT_LOGGED(lsn);
+ __bam_init_meta(dbp, meta, PGNO_BASE_MD, &lsn);
+ meta->root = 1;
+ meta->dbmeta.last_pgno = 1;
+
+ if (name == NULL)
+ ret = __memp_fput(mpf, meta, DB_MPOOL_DIRTY);
+ else {
+ if ((ret = __db_pgout(dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0)
+ goto err;
+ ret = __fop_write(dbenv, txn, name,
+ DB_APP_DATA, fhp, dbp->pgsize, 0, 0, buf, dbp->pgsize, 1,
+ F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0);
+ }
+ if (ret != 0)
+ goto err;
+ meta = NULL;
+
+ /* Now build root page. */
+ if (name == NULL) {
+ pgno = 1;
+ if ((ret =
+ __memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &root)) != 0)
+ goto err;
+ } else {
+#ifdef DIAGNOSTIC
+ memset(buf, CLEAR_BYTE, dbp->pgsize);
+#endif
+ root = (PAGE *)buf;
+ }
+
+ P_INIT(root, dbp->pgsize, 1, PGNO_INVALID, PGNO_INVALID,
+ LEAFLEVEL, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE);
+ LSN_NOT_LOGGED(root->lsn);
+
+ if (name == NULL)
+ ret = __memp_fput(mpf, root, DB_MPOOL_DIRTY);
+ else {
+ if ((ret = __db_pgout(dbenv, root->pgno, root, &pdbt)) != 0)
+ goto err;
+ ret = __fop_write(dbenv, txn, name,
+ DB_APP_DATA, fhp, dbp->pgsize, 1, 0, buf, dbp->pgsize, 1,
+ F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0);
+ }
+ if (ret != 0)
+ goto err;
+ root = NULL;
+
+err: if (buf != NULL)
+ __os_free(dbenv, buf);
+ else {
+ if (meta != NULL)
+ (void)__memp_fput(mpf, meta, 0);
+ if (root != NULL)
+ (void)__memp_fput(mpf, root, 0);
+ }
+ return (ret);
+}
+
+/*
+ * __bam_new_subdb --
+ * Create a metadata page and a root page for a new btree.
+ *
+ * PUBLIC: int __bam_new_subdb __P((DB *, DB *, DB_TXN *));
+ */
+int
+__bam_new_subdb(mdbp, dbp, txn)
+ DB *mdbp, *dbp;
+ DB_TXN *txn;
+{
+ BTMETA *meta;
+ DBC *dbc;
+ DB_ENV *dbenv;
+ DB_LOCK metalock;
+ DB_LSN lsn;
+ DB_MPOOLFILE *mpf;
+ PAGE *root;
+ int ret, t_ret;
+
+ dbenv = mdbp->dbenv;
+ mpf = mdbp->mpf;
+ dbc = NULL;
+ meta = NULL;
+ root = NULL;
+
+ if ((ret = __db_cursor(mdbp, txn,
+ &dbc, CDB_LOCKING(dbenv) ? DB_WRITECURSOR : 0)) != 0)
+ return (ret);
+
+ /* Get, and optionally create the metadata page. */
+ if ((ret = __db_lget(dbc,
+ 0, dbp->meta_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
+ goto err;
+ if ((ret =
+ __memp_fget(mpf, &dbp->meta_pgno, DB_MPOOL_CREATE, &meta)) != 0)
+ goto err;
+
+ /* Build meta-data page. */
+ lsn = meta->dbmeta.lsn;
+ __bam_init_meta(dbp, meta, dbp->meta_pgno, &lsn);
+ if ((ret = __db_log_page(mdbp,
+ txn, &meta->dbmeta.lsn, dbp->meta_pgno, (PAGE *)meta)) != 0)
goto err;
/* Create and initialize a root page. */
@@ -401,68 +580,35 @@ again: if (meta->dbmeta.magic != 0) {
goto err;
root->level = LEAFLEVEL;
- if (dbp->open_txn != NULL && (ret = __bam_root_log(dbp->dbenv,
- dbp->open_txn, &meta->dbmeta.lsn, 0, dbp->log_fileid,
+ if (DBENV_LOGGING(dbenv) &&
+ (ret = __bam_root_log(mdbp, txn, &meta->dbmeta.lsn, 0,
meta->dbmeta.pgno, root->pgno, &meta->dbmeta.lsn)) != 0)
goto err;
meta->root = root->pgno;
-
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOGMETA, ret, name);
- if ((ret = __db_log_page(dbp,
- name, &root->lsn, root->pgno, root)) != 0)
+ if ((ret =
+ __db_log_page(mdbp, txn, &root->lsn, root->pgno, root)) != 0)
goto err;
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, name);
-
- t->bt_meta = base_pgno;
- t->bt_root = root->pgno;
/* Release the metadata and root pages. */
- if ((ret = memp_fput(dbp->mpf, meta, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = __memp_fput(mpf, meta, DB_MPOOL_DIRTY)) != 0)
goto err;
meta = NULL;
- if ((ret = memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = __memp_fput(mpf, root, DB_MPOOL_DIRTY)) != 0)
goto err;
root = NULL;
-
- /*
- * Flush the metadata and root pages to disk.
- *
- * !!!
- * It's not useful to return not-yet-flushed here -- convert it to
- * an error.
- */
- if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE) {
- __db_err(dbp->dbenv, "Metapage flush failed");
- ret = EINVAL;
- }
- DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name);
-
-done: /*
- * !!!
- * We already did an insert and so the last-page-inserted has been
- * set. I'm not sure where the *right* place to clear this value
- * is, it's not intuitively obvious that it belongs here.
- */
- t->bt_lpgno = PGNO_INVALID;
-
err:
-DB_TEST_RECOVERY_LABEL
- /* Put any remaining pages back. */
if (meta != NULL)
- if ((t_ret = memp_fput(dbp->mpf, meta, 0)) != 0 &&
- ret == 0)
+ if ((t_ret = __memp_fput(mpf, meta, 0)) != 0 && ret == 0)
ret = t_ret;
if (root != NULL)
- if ((t_ret = memp_fput(dbp->mpf, root, 0)) != 0 &&
- ret == 0)
+ if ((t_ret = __memp_fput(mpf, root, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if (LOCK_ISSET(metalock))
+ if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
+ ret = t_ret;
+ if (dbc != NULL)
+ if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
ret = t_ret;
-
- /* We can release the metapage lock when we are done. */
- if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0)
- ret = t_ret;
-
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
return (ret);
}
diff --git a/db/btree/bt_put.c b/db/btree/bt_put.c
index 19a04526d..b98c6c579 100644
--- a/db/btree/bt_put.c
+++ b/db/btree/bt_put.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_put.c,v 11.46 2001/01/17 18:48:46 bostic Exp $";
+static const char revid[] = "$Id: bt_put.c,v 11.78 2003/10/31 15:07:40 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -53,12 +53,18 @@ static const char revid[] = "$Id: bt_put.c,v 11.46 2001/01/17 18:48:46 bostic Ex
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/btree.h"
+#include "dbinc/mp.h"
+static int __bam_build
+ __P((DBC *, u_int32_t, DBT *, PAGE *, u_int32_t, u_int32_t));
static int __bam_dup_convert __P((DBC *, PAGE *, u_int32_t));
static int __bam_ovput
__P((DBC *, u_int32_t, db_pgno_t, PAGE *, u_int32_t, DBT *));
+static u_int32_t
+ __bam_partsize __P((DB *, u_int32_t, DBT *, PAGE *, u_int32_t));
/*
* __bam_iitem --
@@ -72,11 +78,13 @@ __bam_iitem(dbc, key, data, op, flags)
DBT *key, *data;
u_int32_t op, flags;
{
+ DB_ENV *dbenv;
BKEYDATA *bk, bk_tmp;
BTREE *t;
BTREE_CURSOR *cp;
DB *dbp;
DBT bk_hdr, tdbt;
+ DB_MPOOLFILE *mpf;
PAGE *h;
db_indx_t indx;
u_int32_t data_size, have_bytes, need_bytes, needed;
@@ -85,6 +93,8 @@ __bam_iitem(dbc, key, data, op, flags)
COMPQUIET(bk, NULL);
dbp = dbc->dbp;
+ dbenv = dbp->dbenv;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
t = dbp->bt_internal;
h = cp->page;
@@ -95,11 +105,9 @@ __bam_iitem(dbc, key, data, op, flags)
* Fixed-length records with partial puts: it's an error to specify
* anything other simple overwrite.
*/
- if (F_ISSET(dbp, DB_RE_FIXEDLEN) &&
- F_ISSET(data, DB_DBT_PARTIAL) && data->dlen != data->size) {
- data_size = data->size;
- goto len_err;
- }
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN) &&
+ F_ISSET(data, DB_DBT_PARTIAL) && data->size != data->dlen)
+ return (__db_rec_repl(dbenv, data->size, data->dlen));
/*
* Figure out how much space the data will take, including if it's a
@@ -110,16 +118,14 @@ __bam_iitem(dbc, key, data, op, flags)
* the fixed-length record size.
*/
data_size = F_ISSET(data, DB_DBT_PARTIAL) ?
- __bam_partsize(op, data, h, indx) : data->size;
+ __bam_partsize(dbp, op, data, h, indx) : data->size;
padrec = 0;
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
- if (data_size > t->re_len) {
-len_err: __db_err(dbp->dbenv,
- "Length improper for fixed length record %lu",
- (u_long)data_size);
- return (EINVAL);
- }
- if (data_size < t->re_len) {
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN)) {
+ if (data_size > t->re_len)
+ return (__db_rec_toobig(dbenv, data_size, t->re_len));
+
+ /* Records that are deleted anyway needn't be padded out. */
+ if (!LF_ISSET(BI_DELETED) && data_size < t->re_len) {
padrec = 1;
data_size = t->re_len;
}
@@ -146,12 +152,12 @@ len_err: __db_err(dbp->dbenv,
*/
if (op == DB_CURRENT && dbp->dup_compare != NULL) {
if ((ret = __bam_cmp(dbp, data, h,
- indx + (TYPE(h) == P_LBTREE ? O_INDX : 0),
- dbp->dup_compare, &cmp)) != 0)
+ indx + (TYPE(h) == P_LBTREE ? O_INDX : 0),
+ dbp->dup_compare, &cmp)) != 0)
return (ret);
if (cmp != 0) {
- __db_err(dbp->dbenv,
- "Current data differs from put data");
+ __db_err(dbenv,
+ "Existing data sorts differently from put data");
return (EINVAL);
}
}
@@ -190,7 +196,7 @@ len_err: __db_err(dbp->dbenv,
*/
bigkey = 0;
if (op == DB_CURRENT) {
- bk = GET_BKEYDATA(h,
+ bk = GET_BKEYDATA(dbp, h,
indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
if (B_TYPE(bk->type) == B_KEYDATA)
have_bytes = BKEYDATA_PSIZE(bk->len);
@@ -210,19 +216,14 @@ len_err: __db_err(dbp->dbenv,
needed += need_bytes - have_bytes;
break;
default:
- return (__db_unknown_flag(dbp->dbenv, "__bam_iitem", op));
+ return (__db_unknown_flag(dbenv, "DB->put", op));
}
/*
* If there's not enough room, or the user has put a ceiling on the
* number of keys permitted in the page, split the page.
- *
- * XXX
- * The t->bt_maxkey test here may be insufficient -- do we have to
- * check in the btree split code, so we don't undo it there!?!?
*/
- if (P_FREESPACE(h) < needed ||
- (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey))
+ if (P_FREESPACE(dbp, h) < needed)
return (DB_NEEDSPLIT);
/*
@@ -286,23 +287,24 @@ len_err: __db_err(dbp->dbenv,
* we deadlock or fail while deleting the overflow item or
* replacing the non-overflow item, a subsequent cursor close
* will try and remove the item because the cursor's delete
- * flag is set
+ * flag is set.
*/
(void)__bam_ca_delete(dbp, PGNO(h), indx, 0);
if (TYPE(h) == P_LBTREE) {
++indx;
dupadjust = 1;
+ }
- /*
- * In a Btree deleted records aren't counted (deleted
- * records are counted in a Recno because all accesses
- * are based on record number). If it's a Btree and
- * it's a DB_CURRENT operation overwriting a previously
- * deleted record, increment the record count.
- */
+ /*
+ * In a Btree deleted records aren't counted (deleted records
+ * are counted in a Recno because all accesses are based on
+ * record number). If it's a Btree and it's a DB_CURRENT
+ * operation overwriting a previously deleted record, increment
+ * the record count.
+ */
+ if (TYPE(h) == P_LBTREE || TYPE(h) == P_LDUP)
was_deleted = B_DISSET(bk->type);
- }
/*
* 4. Delete and re-add the data item.
@@ -323,11 +325,16 @@ len_err: __db_err(dbp->dbenv,
replace = 1;
break;
default:
- return (__db_unknown_flag(dbp->dbenv, "__bam_iitem", op));
+ return (__db_unknown_flag(dbenv, "DB->put", op));
}
/* Add the data. */
if (bigdata) {
+ /*
+ * We do not have to handle deleted (BI_DELETED) records
+ * in this case; the actual records should never be created.
+ */
+ DB_ASSERT(!LF_ISSET(BI_DELETED));
if ((ret = __bam_ovput(dbc,
B_OVERFLOW, PGNO_INVALID, h, indx, data)) != 0)
return (ret);
@@ -347,7 +354,7 @@ len_err: __db_err(dbp->dbenv,
if (ret != 0)
return (ret);
}
- if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = __memp_fset(mpf, h, DB_MPOOL_DIRTY)) != 0)
return (ret);
/*
@@ -375,7 +382,7 @@ len_err: __db_err(dbp->dbenv,
* up at least 25% of the space on the page. If it does, move it onto
* its own page.
*/
- if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) {
+ if (dupadjust && P_FREESPACE(dbp, h) <= dbp->pgsize / 2) {
if ((ret = __bam_dup_convert(dbc, h, indx - O_INDX)) != 0)
return (ret);
}
@@ -390,11 +397,10 @@ len_err: __db_err(dbp->dbenv,
/*
* __bam_partsize --
* Figure out how much space a partial data item is in total.
- *
- * PUBLIC: u_int32_t __bam_partsize __P((u_int32_t, DBT *, PAGE *, u_int32_t));
*/
-u_int32_t
-__bam_partsize(op, data, h, indx)
+static u_int32_t
+__bam_partsize(dbp, op, data, h, indx)
+ DB *dbp;
u_int32_t op, indx;
DBT *data;
PAGE *h;
@@ -413,38 +419,18 @@ __bam_partsize(op, data, h, indx)
* Otherwise, it's the data provided plus any already existing data
* that we're not replacing.
*/
- bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+ bk = GET_BKEYDATA(dbp, h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
nbytes =
B_TYPE(bk->type) == B_OVERFLOW ? ((BOVERFLOW *)bk)->tlen : bk->len;
- /*
- * There are really two cases here:
- *
- * Case 1: We are replacing some bytes that do not exist (i.e., they
- * are past the end of the record). In this case the number of bytes
- * we are replacing is irrelevant and all we care about is how many
- * bytes we are going to add from offset. So, the new record length
- * is going to be the size of the new bytes (size) plus wherever those
- * new bytes begin (doff).
- *
- * Case 2: All the bytes we are replacing exist. Therefore, the new
- * size is the oldsize (nbytes) minus the bytes we are replacing (dlen)
- * plus the bytes we are adding (size).
- */
- if (nbytes < data->doff + data->dlen) /* Case 1 */
- return (data->doff + data->size);
-
- return (nbytes + data->size - data->dlen); /* Case 2 */
+ return (__db_partsize(nbytes, data));
}
/*
* __bam_build --
* Build the real record for a partial put, or short fixed-length record.
- *
- * PUBLIC: int __bam_build __P((DBC *, u_int32_t,
- * PUBLIC: DBT *, PAGE *, u_int32_t, u_int32_t));
*/
-int
+static int
__bam_build(dbc, op, dbt, h, indx, nbytes)
DBC *dbc;
u_int32_t op, indx, nbytes;
@@ -454,9 +440,8 @@ __bam_build(dbc, op, dbt, h, indx, nbytes)
BKEYDATA *bk, tbk;
BOVERFLOW *bo;
BTREE *t;
- BTREE_CURSOR *cp;
DB *dbp;
- DBT copy;
+ DBT copy, *rdata;
u_int32_t len, tlen;
u_int8_t *p;
int ret;
@@ -464,26 +449,26 @@ __bam_build(dbc, op, dbt, h, indx, nbytes)
COMPQUIET(bo, NULL);
dbp = dbc->dbp;
- cp = (BTREE_CURSOR *) dbc->internal;
t = dbp->bt_internal;
/* We use the record data return memory, it's only a short-term use. */
- if (dbc->rdata.ulen < nbytes) {
+ rdata = &dbc->my_rdata;
+ if (rdata->ulen < nbytes) {
if ((ret = __os_realloc(dbp->dbenv,
- nbytes, NULL, &dbc->rdata.data)) != 0) {
- dbc->rdata.ulen = 0;
- dbc->rdata.data = NULL;
+ nbytes, &rdata->data)) != 0) {
+ rdata->ulen = 0;
+ rdata->data = NULL;
return (ret);
}
- dbc->rdata.ulen = nbytes;
+ rdata->ulen = nbytes;
}
/*
* We use nul or pad bytes for any part of the record that isn't
* specified; get it over with.
*/
- memset(dbc->rdata.data,
- F_ISSET(dbp, DB_RE_FIXEDLEN) ? t->re_pad : 0, nbytes);
+ memset(rdata->data,
+ F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_pad : 0, nbytes);
/*
* In the next clauses, we need to do three things: a) set p to point
@@ -495,14 +480,15 @@ __bam_build(dbc, op, dbt, h, indx, nbytes)
* the chase.
*/
if (!F_ISSET(dbt, DB_DBT_PARTIAL) || op != DB_CURRENT) {
- p = (u_int8_t *)dbc->rdata.data + dbt->doff;
+ p = (u_int8_t *)rdata->data + dbt->doff;
tlen = dbt->doff;
goto user_copy;
}
/* Find the current record. */
if (indx < NUM_ENT(h)) {
- bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+ bk = GET_BKEYDATA(dbp, h, indx + (TYPE(h) == P_LBTREE ?
+ O_INDX : 0));
bo = (BOVERFLOW *)bk;
} else {
bk = &tbk;
@@ -516,12 +502,12 @@ __bam_build(dbc, op, dbt, h, indx, nbytes)
*/
memset(&copy, 0, sizeof(copy));
if ((ret = __db_goff(dbp, &copy, bo->tlen,
- bo->pgno, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
+ bo->pgno, &rdata->data, &rdata->ulen)) != 0)
return (ret);
/* Skip any leading data from the original record. */
tlen = dbt->doff;
- p = (u_int8_t *)dbc->rdata.data + dbt->doff;
+ p = (u_int8_t *)rdata->data + dbt->doff;
/*
* Copy in any trailing data from the original record.
@@ -542,10 +528,10 @@ __bam_build(dbc, op, dbt, h, indx, nbytes)
}
} else {
/* Copy in any leading data from the original record. */
- memcpy(dbc->rdata.data,
+ memcpy(rdata->data,
bk->data, dbt->doff > bk->len ? bk->len : dbt->doff);
tlen = dbt->doff;
- p = (u_int8_t *)dbc->rdata.data + dbt->doff;
+ p = (u_int8_t *)rdata->data + dbt->doff;
/* Copy in any trailing data from the original record. */
len = dbt->doff + dbt->dlen;
@@ -564,11 +550,11 @@ user_copy:
tlen += dbt->size;
/* Set the DBT to reference our new record. */
- dbc->rdata.size = F_ISSET(dbp, DB_RE_FIXEDLEN) ? t->re_len : tlen;
- dbc->rdata.dlen = 0;
- dbc->rdata.doff = 0;
- dbc->rdata.flags = 0;
- *dbt = dbc->rdata;
+ rdata->size = F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_len : tlen;
+ rdata->dlen = 0;
+ rdata->doff = 0;
+ rdata->flags = 0;
+ *dbt = *rdata;
return (0);
}
@@ -591,6 +577,7 @@ __bam_ritem(dbc, h, indx, data)
db_indx_t cnt, lo, ln, min, off, prefix, suffix;
int32_t nbytes;
int ret;
+ db_indx_t *inp;
u_int8_t *p, *t;
dbp = dbc->dbp;
@@ -600,10 +587,10 @@ __bam_ritem(dbc, h, indx, data)
* to insert and whether it fits is handled in the caller. All we do
* here is manage the page shuffling.
*/
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
/* Log the change. */
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
/*
* We might as well check to see if the two data items share
* a common prefix and suffix -- it can save us a lot of log
@@ -627,17 +614,18 @@ __bam_ritem(dbc, h, indx, data)
orig.size = bk->len - (prefix + suffix);
repl.data = (u_int8_t *)data->data + prefix;
repl.size = data->size - (prefix + suffix);
- if ((ret = __bam_repl_log(dbp->dbenv, dbc->txn,
- &LSN(h), 0, dbp->log_fileid, PGNO(h), &LSN(h),
- (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type),
+ if ((ret = __bam_repl_log(dbp, dbc->txn, &LSN(h), 0, PGNO(h),
+ &LSN(h), (u_int32_t)indx, (u_int32_t)B_DISSET(bk->type),
&orig, &repl, (u_int32_t)prefix, (u_int32_t)suffix)) != 0)
return (ret);
- }
+ } else
+ LSN_NOT_LOGGED(LSN(h));
/*
* Set references to the first in-use byte on the page and the
* first byte of the item being replaced.
*/
+ inp = P_INP(dbp, h);
p = (u_int8_t *)h + HOFFSET(h);
t = (u_int8_t *)bk;
@@ -648,19 +636,19 @@ __bam_ritem(dbc, h, indx, data)
* the regions overlap.
*/
lo = BKEYDATA_SIZE(bk->len);
- ln = BKEYDATA_SIZE(data->size);
+ ln = (db_indx_t)BKEYDATA_SIZE(data->size);
if (lo != ln) {
nbytes = lo - ln; /* Signed difference. */
if (p == t) /* First index is fast. */
- h->inp[indx] += nbytes;
+ inp[indx] += nbytes;
else { /* Else, shift the page. */
- memmove(p + nbytes, p, t - p);
+ memmove(p + nbytes, p, (size_t)(t - p));
/* Adjust the indices' offsets. */
- off = h->inp[indx];
+ off = inp[indx];
for (cnt = 0; cnt < NUM_ENT(h); ++cnt)
- if (h->inp[cnt] <= off)
- h->inp[cnt] += nbytes;
+ if (inp[cnt] <= off)
+ inp[cnt] += nbytes;
}
/* Clean up the page and adjust the item's reference. */
@@ -688,30 +676,35 @@ __bam_dup_convert(dbc, h, indx)
PAGE *h;
u_int32_t indx;
{
- BTREE_CURSOR *cp;
BKEYDATA *bk;
DB *dbp;
DBT hdr;
+ DB_MPOOLFILE *mpf;
PAGE *dp;
- db_indx_t cnt, cpindx, dindx, first, sz;
+ db_indx_t cnt, cpindx, dindx, first, *inp, sz;
int ret;
dbp = dbc->dbp;
- cp = (BTREE_CURSOR *)dbc->internal;
+ mpf = dbp->mpf;
+ inp = P_INP(dbp, h);
/*
* Count the duplicate records and calculate how much room they're
* using on the page.
*/
- while (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
+ while (indx > 0 && inp[indx] == inp[indx - P_INDX])
indx -= P_INDX;
- for (cnt = 0, sz = 0, first = indx;; ++cnt, indx += P_INDX) {
- if (indx >= NUM_ENT(h) || h->inp[first] != h->inp[indx])
- break;
- bk = GET_BKEYDATA(h, indx);
- sz += B_TYPE(bk->type) == B_KEYDATA ?
- BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
- bk = GET_BKEYDATA(h, indx + O_INDX);
+
+ /* Count the key once. */
+ bk = GET_BKEYDATA(dbp, h, indx);
+ sz = B_TYPE(bk->type) == B_KEYDATA ?
+ BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
+
+ /* Sum up all the data items. */
+ for (cnt = 0, first = indx;
+ inp[first] == inp[indx] && indx < NUM_ENT(h);
+ ++cnt, indx += P_INDX) {
+ bk = GET_BKEYDATA(dbp, h, indx + O_INDX);
sz += B_TYPE(bk->type) == B_KEYDATA ?
BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
}
@@ -766,7 +759,7 @@ __bam_dup_convert(dbc, h, indx)
* deleted entries are discarded (if the deleted entry is
* overflow, then free up those pages).
*/
- bk = GET_BKEYDATA(h, dindx + 1);
+ bk = GET_BKEYDATA(dbp, h, dindx + 1);
hdr.data = bk;
hdr.size = B_TYPE(bk->type) == B_KEYDATA ?
BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE;
@@ -778,7 +771,7 @@ __bam_dup_convert(dbc, h, indx)
*/
if (B_TYPE(bk->type) == B_OVERFLOW &&
(ret = __db_doff(dbc,
- (GET_BOVERFLOW(h, dindx + 1))->pgno)) != 0)
+ (GET_BOVERFLOW(dbp, h, dindx + 1))->pgno)) != 0)
goto err;
} else {
if ((ret = __db_pitem(
@@ -802,17 +795,17 @@ __bam_dup_convert(dbc, h, indx)
/* Put in a new data item that points to the duplicates page. */
if ((ret = __bam_ovput(dbc,
- B_DUPLICATE, dp->pgno, h, first + 1, NULL)) != 0)
+ B_DUPLICATE, dp->pgno, h, first + 1, NULL)) != 0)
goto err;
- /* Adjust cursors for all the above movments. */
+ /* Adjust cursors for all the above movements. */
if ((ret = __bam_ca_di(dbc,
- PGNO(h), first + P_INDX, first + P_INDX - indx)) != 0)
+ PGNO(h), first + P_INDX, (int)(first + P_INDX - indx))) != 0)
goto err;
- return (memp_fput(dbp->mpf, dp, DB_MPOOL_DIRTY));
+ return (__memp_fput(mpf, dp, DB_MPOOL_DIRTY));
-err: (void)__db_free(dbc, dp);
+err: (void)__memp_fput(mpf, dp, 0);
return (ret);
}
diff --git a/db/btree/bt_rec.c b/db/btree/bt_rec.c
index 24dc9bc6a..1587028b3 100644
--- a/db/btree/bt_rec.c
+++ b/db/btree/bt_rec.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_rec.c,v 11.35 2001/01/10 16:24:47 ubell Exp $";
+static const char revid[] = "$Id: bt_rec.c,v 11.64 2003/09/13 18:48:58 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -18,287 +18,18 @@ static const char revid[] = "$Id: bt_rec.c,v 11.35 2001/01/10 16:24:47 ubell Exp
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "hash.h"
-#include "btree.h"
-#include "log.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/btree.h"
+#include "dbinc/lock.h"
+#include "dbinc/log.h"
+#include "dbinc/mp.h"
#define IS_BTREE_PAGE(pagep) \
(TYPE(pagep) == P_IBTREE || \
TYPE(pagep) == P_LBTREE || TYPE(pagep) == P_LDUP)
/*
- * __bam_pg_alloc_recover --
- * Recovery function for pg_alloc.
- *
- * PUBLIC: int __bam_pg_alloc_recover
- * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__bam_pg_alloc_recover(dbenv, dbtp, lsnp, op, info)
- DB_ENV *dbenv;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __bam_pg_alloc_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DBMETA *meta;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- db_pgno_t pgno;
- int cmp_n, cmp_p, level, modified, ret;
-
- REC_PRINT(__bam_pg_alloc_print);
- REC_INTRO(__bam_pg_alloc_read, 0);
-
- /*
- * Fix up the allocated page. If we're redoing the operation, we have
- * to get the page (creating it if it doesn't exist), and update its
- * LSN. If we're undoing the operation, we have to reset the page's
- * LSN and put it on the free list.
- *
- * Fix up the metadata page. If we're redoing the operation, we have
- * to get the metadata page and update its LSN and its free pointer.
- * If we're undoing the operation and the page was ever created, we put
- * it on the freelist.
- */
- pgno = PGNO_BASE_MD;
- meta = NULL;
- if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
- /* The metadata page must always exist on redo. */
- if (DB_REDO(op)) {
- (void)__db_pgerr(file_dbp, pgno);
- goto out;
- } else
- goto done;
- }
- if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
- /*
- * We specify creation and check for it later, because this
- * operation was supposed to create the page, and even in
- * the undo case it's going to get linked onto the freelist
- * which we're also fixing up.
- */
- (void)__db_pgerr(file_dbp, argp->pgno);
- goto err;
- }
-
- /* Fix up the allocated page. */
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &argp->page_lsn);
-
- /*
- * If an inital allocation is aborted and then reallocated
- * during an archival restore the log record will have
- * an LSN for the page but the page will be empty.
- */
- if (IS_ZERO_LSN(LSN(pagep)))
- cmp_p = 0;
- CHECK_LSN(op, cmp_p, &LSN(pagep), &argp->page_lsn);
- /*
- * If we we rolled back this allocation previously during an
- * archive restore, the page may have the LSN of the meta page
- * at the point of the roll back. This will be no more
- * than the LSN of the metadata page at the time of this allocation.
- */
- if (DB_REDO(op) &&
- (cmp_p == 0 ||
- (IS_ZERO_LSN(argp->page_lsn) &&
- log_compare(&LSN(pagep), &argp->meta_lsn) <= 0))) {
- /* Need to redo update described. */
- switch (argp->ptype) {
- case P_LBTREE:
- case P_LRECNO:
- case P_LDUP:
- level = LEAFLEVEL;
- break;
- default:
- level = 0;
- break;
- }
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype);
-
- pagep->lsn = *lsnp;
- modified = 1;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /*
- * Undo the allocation, reinitialize the page and
- * link its next pointer to the free list.
- */
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
-
- pagep->lsn = argp->page_lsn;
- modified = 1;
- }
-
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
- goto err;
- }
-
- /*
- * If the page was newly created, put it on the limbo list.
- */
- if (IS_ZERO_LSN(LSN(pagep)) &&
- IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) {
- /* Put the page in limbo.*/
- if ((ret = __db_add_limbo(dbenv,
- info, argp->fileid, argp->pgno, 1)) != 0)
- goto err;
- }
-
- /* Fix up the metadata page. */
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(meta));
- cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
- CHECK_LSN(op, cmp_p, &LSN(meta), &argp->meta_lsn);
- if (cmp_p == 0 && DB_REDO(op)) {
- /* Need to redo update described. */
- LSN(meta) = *lsnp;
- meta->free = argp->next;
- modified = 1;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /* Need to undo update described. */
- LSN(meta) = argp->meta_lsn;
-
- /*
- * If the page has a zero LSN then its newly created
- * and will go into limbo rather than directly on the
- * free list.
- */
- if (!IS_ZERO_LSN(argp->page_lsn))
- meta->free = argp->pgno;
- modified = 1;
- }
- if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
- /*
- * This could be the metapage from a subdb which is read from disk
- * to recover its creation.
- */
- if (F_ISSET(file_dbp, DB_AM_SUBDB))
- switch (argp->type) {
- case P_BTREEMETA:
- case P_HASHMETA:
- case P_QAMMETA:
- file_dbp->sync(file_dbp, 0);
- break;
- }
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
- if (0) {
-err:
- if (meta != NULL)
- (void)memp_fput(mpf, meta, 0);
- }
-out: REC_CLOSE;
-}
-
-/*
- * __bam_pg_free_recover --
- * Recovery function for pg_free.
- *
- * PUBLIC: int __bam_pg_free_recover
- * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
- */
-int
-__bam_pg_free_recover(dbenv, dbtp, lsnp, op, info)
- DB_ENV *dbenv;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops op;
- void *info;
-{
- __bam_pg_free_args *argp;
- DB *file_dbp;
- DBC *dbc;
- DBMETA *meta;
- DB_LSN copy_lsn;
- DB_MPOOLFILE *mpf;
- PAGE *pagep;
- db_pgno_t pgno;
- int cmp_n, cmp_p, modified, ret;
-
- COMPQUIET(info, NULL);
- REC_PRINT(__bam_pg_free_print);
- REC_INTRO(__bam_pg_free_read, 1);
-
- /*
- * Fix up the freed page. If we're redoing the operation we get the
- * page and explicitly discard its contents, then update its LSN. If
- * we're undoing the operation, we get the page and restore its header.
- * Create the page if necessary, we may be freeing an aborted
- * create.
- */
- if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
- goto out;
- modified = 0;
- __ua_memcpy(&copy_lsn, &LSN(argp->header.data), sizeof(DB_LSN));
- cmp_n = log_compare(lsnp, &LSN(pagep));
- cmp_p = log_compare(&LSN(pagep), &copy_lsn);
- CHECK_LSN(op, cmp_p, &LSN(pagep), &copy_lsn);
- if (DB_REDO(op) &&
- (cmp_p == 0 ||
- (IS_ZERO_LSN(copy_lsn) &&
- log_compare(&LSN(pagep), &argp->meta_lsn) <= 0))) {
- /* Need to redo update described. */
- P_INIT(pagep, file_dbp->pgsize,
- argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
- pagep->lsn = *lsnp;
-
- modified = 1;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /* Need to undo update described. */
- memcpy(pagep, argp->header.data, argp->header.size);
-
- modified = 1;
- }
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
- /*
- * Fix up the metadata page. If we're redoing or undoing the operation
- * we get the page and update its LSN and free pointer.
- */
- pgno = PGNO_BASE_MD;
- if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
- /* The metadata page must always exist. */
- (void)__db_pgerr(file_dbp, pgno);
- goto out;
- }
-
- modified = 0;
- cmp_n = log_compare(lsnp, &LSN(meta));
- cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
- CHECK_LSN(op, cmp_p, &LSN(meta), &argp->meta_lsn);
- if (cmp_p == 0 && DB_REDO(op)) {
- /* Need to redo the deallocation. */
- meta->free = argp->pgno;
- LSN(meta) = *lsnp;
- modified = 1;
- } else if (cmp_n == 0 && DB_UNDO(op)) {
- /* Need to undo the deallocation. */
- meta->free = argp->next;
- LSN(meta) = argp->meta_lsn;
- modified = 1;
- }
- if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
- goto out;
-
-done: *lsnp = argp->prev_lsn;
- ret = 0;
-
-out: REC_CLOSE;
-}
-
-/*
* __bam_split_recover --
* Recovery function for split.
*
@@ -320,7 +51,7 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
db_pgno_t pgno, root_pgno;
u_int32_t ptype;
- int cmp, l_update, p_update, r_update, rc, ret, rootsplit, t_ret;
+ int cmp, l_update, p_update, r_update, rc, ret, ret_l, rootsplit, t_ret;
COMPQUIET(info, NULL);
REC_PRINT(__bam_split_print);
@@ -345,16 +76,16 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
* so it's got to be aligned. Copying it into allocated memory is
* the only way to guarantee this.
*/
- if ((ret = __os_malloc(dbenv, argp->pg.size, NULL, &sp)) != 0)
+ if ((ret = __os_malloc(dbenv, argp->pg.size, &sp)) != 0)
goto out;
memcpy(sp, argp->pg.data, argp->pg.size);
pgno = PGNO(sp);
root_pgno = argp->root_pgno;
- rootsplit = pgno == root_pgno;
- if (memp_fget(mpf, &argp->left, 0, &lp) != 0)
+ rootsplit = root_pgno != PGNO_INVALID;
+ if ((ret_l = __memp_fget(mpf, &argp->left, 0, &lp)) != 0)
lp = NULL;
- if (memp_fget(mpf, &argp->right, 0, &rp) != 0)
+ if (__memp_fget(mpf, &argp->right, 0, &rp) != 0)
rp = NULL;
if (DB_REDO(op)) {
@@ -368,8 +99,8 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
* same reason.
*/
if (rootsplit) {
- if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
- (void)__db_pgerr(file_dbp, pgno);
+ if ((ret = __memp_fget(mpf, &pgno, 0, &pp)) != 0) {
+ ret = __db_pgerr(file_dbp, pgno, ret);
pp = NULL;
goto out;
}
@@ -377,7 +108,7 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
CHECK_LSN(op, cmp, &LSN(pp), &LSN(argp->pg.data));
p_update = cmp == 0;
} else if (lp == NULL) {
- (void)__db_pgerr(file_dbp, argp->left);
+ ret = __db_pgerr(file_dbp, argp->left, ret_l);
goto out;
}
@@ -400,10 +131,8 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
goto check_next;
/* Allocate and initialize new left/right child pages. */
- if ((ret =
- __os_malloc(dbenv, file_dbp->pgsize, NULL, &_lp)) != 0
- || (ret =
- __os_malloc(dbenv, file_dbp->pgsize, NULL, &_rp)) != 0)
+ if ((ret = __os_malloc(dbenv, file_dbp->pgsize, &_lp)) != 0 ||
+ (ret = __os_malloc(dbenv, file_dbp->pgsize, &_rp)) != 0)
goto out;
if (rootsplit) {
P_INIT(_lp, file_dbp->pgsize, argp->left,
@@ -431,31 +160,31 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
goto out;
/* If the left child is wrong, update it. */
- if (lp == NULL && (ret =
- memp_fget(mpf, &argp->left, DB_MPOOL_CREATE, &lp)) != 0) {
- (void)__db_pgerr(file_dbp, argp->left);
+ if (lp == NULL && (ret = __memp_fget(
+ mpf, &argp->left, DB_MPOOL_CREATE, &lp)) != 0) {
+ ret = __db_pgerr(file_dbp, argp->left, ret);
lp = NULL;
goto out;
}
if (l_update) {
memcpy(lp, _lp, file_dbp->pgsize);
lp->lsn = *lsnp;
- if ((ret = memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = __memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
goto out;
lp = NULL;
}
/* If the right child is wrong, update it. */
- if (rp == NULL && (ret = memp_fget(mpf,
- &argp->right, DB_MPOOL_CREATE, &rp)) != 0) {
- (void)__db_pgerr(file_dbp, argp->right);
+ if (rp == NULL && (ret = __memp_fget(
+ mpf, &argp->right, DB_MPOOL_CREATE, &rp)) != 0) {
+ ret = __db_pgerr(file_dbp, argp->right, ret);
rp = NULL;
goto out;
}
if (r_update) {
memcpy(rp, _rp, file_dbp->pgsize);
rp->lsn = *lsnp;
- if ((ret = memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = __memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
goto out;
rp = NULL;
}
@@ -477,11 +206,11 @@ __bam_split_recover(dbenv, dbtp, lsnp, op, info)
P_INIT(pp, file_dbp->pgsize, root_pgno,
PGNO_INVALID, PGNO_INVALID, _lp->level + 1, ptype);
- RE_NREC_SET(pp,
- rc ? __bam_total(_lp) + __bam_total(_rp) : 0);
+ RE_NREC_SET(pp, rc ? __bam_total(file_dbp, _lp) +
+ __bam_total(file_dbp, _rp) : 0);
pp->lsn = *lsnp;
- if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = __memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
goto out;
pp = NULL;
}
@@ -494,8 +223,9 @@ check_next: /*
* page must exist because we're redoing the operation.
*/
if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
- if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
- (void)__db_pgerr(file_dbp, argp->npgno);
+ if ((ret =
+ __memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
+ ret = __db_pgerr(file_dbp, argp->npgno, ret);
np = NULL;
goto out;
}
@@ -505,7 +235,7 @@ check_next: /*
PREV_PGNO(np) = argp->right;
np->lsn = *lsnp;
if ((ret =
- memp_fput(mpf, np, DB_MPOOL_DIRTY)) != 0)
+ __memp_fput(mpf, np, DB_MPOOL_DIRTY)) != 0)
goto out;
np = NULL;
}
@@ -518,13 +248,13 @@ check_next: /*
* the adds onto the page that caused the split, and there's
* really no undo-ing to be done.
*/
- if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
+ if ((ret = __memp_fget(mpf, &pgno, 0, &pp)) != 0) {
pp = NULL;
goto lrundo;
}
if (log_compare(lsnp, &LSN(pp)) == 0) {
memcpy(pp, argp->pg.data, argp->pg.size);
- if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = __memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
goto out;
pp = NULL;
}
@@ -542,7 +272,7 @@ lrundo: if ((rootsplit && lp != NULL) || rp != NULL) {
log_compare(lsnp, &LSN(lp)) == 0) {
lp->lsn = argp->llsn;
if ((ret =
- memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
+ __memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
goto out;
lp = NULL;
}
@@ -550,7 +280,7 @@ lrundo: if ((rootsplit && lp != NULL) || rp != NULL) {
log_compare(lsnp, &LSN(rp)) == 0) {
rp->lsn = argp->rlsn;
if ((ret =
- memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
+ __memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
goto out;
rp = NULL;
}
@@ -565,14 +295,15 @@ lrundo: if ((rootsplit && lp != NULL) || rp != NULL) {
* if there's nothing to undo.
*/
if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
- if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
+ if ((ret =
+ __memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
np = NULL;
goto done;
}
if (log_compare(lsnp, &LSN(np)) == 0) {
PREV_PGNO(np) = argp->left;
np->lsn = argp->nlsn;
- if (memp_fput(mpf, np, DB_MPOOL_DIRTY))
+ if (__memp_fput(mpf, np, DB_MPOOL_DIRTY))
goto out;
np = NULL;
}
@@ -583,22 +314,22 @@ done: *lsnp = argp->prev_lsn;
ret = 0;
out: /* Free any pages that weren't dirtied. */
- if (pp != NULL && (t_ret = memp_fput(mpf, pp, 0)) != 0 && ret == 0)
+ if (pp != NULL && (t_ret = __memp_fput(mpf, pp, 0)) != 0 && ret == 0)
ret = t_ret;
- if (lp != NULL && (t_ret = memp_fput(mpf, lp, 0)) != 0 && ret == 0)
+ if (lp != NULL && (t_ret = __memp_fput(mpf, lp, 0)) != 0 && ret == 0)
ret = t_ret;
- if (np != NULL && (t_ret = memp_fput(mpf, np, 0)) != 0 && ret == 0)
+ if (np != NULL && (t_ret = __memp_fput(mpf, np, 0)) != 0 && ret == 0)
ret = t_ret;
- if (rp != NULL && (t_ret = memp_fput(mpf, rp, 0)) != 0 && ret == 0)
+ if (rp != NULL && (t_ret = __memp_fput(mpf, rp, 0)) != 0 && ret == 0)
ret = t_ret;
/* Free any allocated space. */
if (_lp != NULL)
- __os_free(_lp, file_dbp->pgsize);
+ __os_free(dbenv, _lp);
if (_rp != NULL)
- __os_free(_rp, file_dbp->pgsize);
+ __os_free(dbenv, _rp);
if (sp != NULL)
- __os_free(sp, argp->pg.size);
+ __os_free(dbenv, sp);
REC_CLOSE;
}
@@ -627,23 +358,24 @@ __bam_rsplit_recover(dbenv, dbtp, lsnp, op, info)
db_pgno_t pgno, root_pgno;
int cmp_n, cmp_p, modified, ret;
+ pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__bam_rsplit_print);
REC_INTRO(__bam_rsplit_read, 1);
/* Fix the root page. */
pgno = root_pgno = argp->root_pgno;
- if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) != 0) {
+ if ((ret = __memp_fget(mpf, &pgno, 0, &pagep)) != 0) {
/* The root page must always exist if we are going forward. */
if (DB_REDO(op)) {
- __db_pgerr(file_dbp, pgno);
+ ret = __db_pgerr(file_dbp, pgno, ret);
goto out;
}
/* This must be the root of an OPD tree. */
DB_ASSERT(root_pgno !=
((BTREE *)file_dbp->bt_internal)->bt_root);
ret = 0;
- goto done;
+ goto do_page;
}
modified = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
@@ -666,22 +398,23 @@ __bam_rsplit_recover(dbenv, dbtp, lsnp, op, info)
pagep->lsn = argp->rootlsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = __memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+do_page:
/*
* Fix the page copied over the root page. It's possible that the
* page never made it to disk, so if we're undo-ing and the page
* doesn't exist, it's okay and there's nothing further to do.
*/
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = __memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op))
goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
+ ret = __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
modified = 0;
- __ua_memcpy(&copy_lsn, &LSN(argp->pgdbt.data), sizeof(DB_LSN));
+ (void)__ua_memcpy(&copy_lsn, &LSN(argp->pgdbt.data), sizeof(DB_LSN));
cmp_n = log_compare(lsnp, &LSN(pagep));
cmp_p = log_compare(&LSN(pagep), &copy_lsn);
CHECK_LSN(op, cmp_p, &LSN(pagep), &copy_lsn);
@@ -694,13 +427,16 @@ __bam_rsplit_recover(dbenv, dbtp, lsnp, op, info)
memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = __memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: REC_CLOSE;
+out: if (pagep != NULL)
+ (void)__memp_fput(mpf, pagep, 0);
+ REC_CLOSE;
}
/*
@@ -725,15 +461,16 @@ __bam_adj_recover(dbenv, dbtp, lsnp, op, info)
PAGE *pagep;
int cmp_n, cmp_p, modified, ret;
+ pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__bam_adj_print);
REC_INTRO(__bam_adj_read, 1);
/* Get the page; if it never existed and we're undoing, we're done. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = __memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op))
goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
+ ret = __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
@@ -745,7 +482,7 @@ __bam_adj_recover(dbenv, dbtp, lsnp, op, info)
/* Need to redo update described. */
if ((ret = __bam_adjindx(dbc,
pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0)
- goto err;
+ goto out;
LSN(pagep) = *lsnp;
modified = 1;
@@ -753,21 +490,21 @@ __bam_adj_recover(dbenv, dbtp, lsnp, op, info)
/* Need to undo update described. */
if ((ret = __bam_adjindx(dbc,
pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0)
- goto err;
+ goto out;
LSN(pagep) = argp->lsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = __memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
- if (0) {
-err: (void)memp_fput(mpf, pagep, 0);
- }
-out: REC_CLOSE;
+out: if (pagep != NULL)
+ (void)__memp_fput(mpf, pagep, 0);
+ REC_CLOSE;
}
/*
@@ -793,15 +530,16 @@ __bam_cadjust_recover(dbenv, dbtp, lsnp, op, info)
PAGE *pagep;
int cmp_n, cmp_p, modified, ret;
+ pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__bam_cadjust_print);
REC_INTRO(__bam_cadjust_read, 1);
/* Get the page; if it never existed and we're undoing, we're done. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = __memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op))
goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
+ ret = __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
@@ -812,11 +550,13 @@ __bam_cadjust_recover(dbenv, dbtp, lsnp, op, info)
if (cmp_p == 0 && DB_REDO(op)) {
/* Need to redo update described. */
if (IS_BTREE_PAGE(pagep)) {
- GET_BINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
+ GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs +=
+ argp->adjust;
if (argp->opflags & CAD_UPDATEROOT)
RE_NREC_ADJ(pagep, argp->adjust);
} else {
- GET_RINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
+ GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs +=
+ argp->adjust;
if (argp->opflags & CAD_UPDATEROOT)
RE_NREC_ADJ(pagep, argp->adjust);
}
@@ -826,24 +566,29 @@ __bam_cadjust_recover(dbenv, dbtp, lsnp, op, info)
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Need to undo update described. */
if (IS_BTREE_PAGE(pagep)) {
- GET_BINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
+ GET_BINTERNAL(file_dbp, pagep, argp->indx)->nrecs -=
+ argp->adjust;
if (argp->opflags & CAD_UPDATEROOT)
RE_NREC_ADJ(pagep, -(argp->adjust));
} else {
- GET_RINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
+ GET_RINTERNAL(file_dbp, pagep, argp->indx)->nrecs -=
+ argp->adjust;
if (argp->opflags & CAD_UPDATEROOT)
RE_NREC_ADJ(pagep, -(argp->adjust));
}
LSN(pagep) = argp->lsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = __memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: REC_CLOSE;
+out: if (pagep != NULL)
+ (void)__memp_fput(mpf, pagep, 0);
+ REC_CLOSE;
}
/*
@@ -869,15 +614,16 @@ __bam_cdel_recover(dbenv, dbtp, lsnp, op, info)
u_int32_t indx;
int cmp_n, cmp_p, modified, ret;
+ pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__bam_cdel_print);
REC_INTRO(__bam_cdel_read, 1);
/* Get the page; if it never existed and we're undoing, we're done. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = __memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op))
goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
+ ret = __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
@@ -888,27 +634,30 @@ __bam_cdel_recover(dbenv, dbtp, lsnp, op, info)
if (cmp_p == 0 && DB_REDO(op)) {
/* Need to redo update described. */
indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0);
- B_DSET(GET_BKEYDATA(pagep, indx)->type);
+ B_DSET(GET_BKEYDATA(file_dbp, pagep, indx)->type);
LSN(pagep) = *lsnp;
modified = 1;
} else if (cmp_n == 0 && DB_UNDO(op)) {
/* Need to undo update described. */
indx = argp->indx + (TYPE(pagep) == P_LBTREE ? O_INDX : 0);
- B_DCLR(GET_BKEYDATA(pagep, indx)->type);
+ B_DCLR(GET_BKEYDATA(file_dbp, pagep, indx)->type);
(void)__bam_ca_delete(file_dbp, argp->pgno, argp->indx, 0);
LSN(pagep) = argp->lsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = __memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: REC_CLOSE;
+out: if (pagep != NULL)
+ (void)__memp_fput(mpf, pagep, 0);
+ REC_CLOSE;
}
/*
@@ -936,18 +685,19 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info)
int cmp_n, cmp_p, modified, ret;
u_int8_t *p;
+ pagep = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__bam_repl_print);
REC_INTRO(__bam_repl_read, 1);
/* Get the page; if it never existed and we're undoing, we're done. */
- if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+ if ((ret = __memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
if (DB_UNDO(op))
goto done;
- (void)__db_pgerr(file_dbp, argp->pgno);
+ ret = __db_pgerr(file_dbp, argp->pgno, ret);
goto out;
}
- bk = GET_BKEYDATA(pagep, argp->indx);
+ bk = GET_BKEYDATA(file_dbp, pagep, argp->indx);
modified = 0;
cmp_n = log_compare(lsnp, &LSN(pagep));
@@ -961,8 +711,8 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info)
*/
memset(&dbt, 0, sizeof(dbt));
dbt.size = argp->prefix + argp->suffix + argp->repl.size;
- if ((ret = __os_malloc(dbenv, dbt.size, NULL, &dbt.data)) != 0)
- goto err;
+ if ((ret = __os_malloc(dbenv, dbt.size, &dbt.data)) != 0)
+ goto out;
p = dbt.data;
memcpy(p, bk->data, argp->prefix);
p += argp->prefix;
@@ -971,9 +721,9 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info)
memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix);
ret = __bam_ritem(dbc, pagep, argp->indx, &dbt);
- __os_free(dbt.data, dbt.size);
+ __os_free(dbenv, dbt.data);
if (ret != 0)
- goto err;
+ goto out;
LSN(pagep) = *lsnp;
modified = 1;
@@ -985,8 +735,8 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info)
*/
memset(&dbt, 0, sizeof(dbt));
dbt.size = argp->prefix + argp->suffix + argp->orig.size;
- if ((ret = __os_malloc(dbenv, dbt.size, NULL, &dbt.data)) != 0)
- goto err;
+ if ((ret = __os_malloc(dbenv, dbt.size, &dbt.data)) != 0)
+ goto out;
p = dbt.data;
memcpy(p, bk->data, argp->prefix);
p += argp->prefix;
@@ -995,27 +745,27 @@ __bam_repl_recover(dbenv, dbtp, lsnp, op, info)
memcpy(p, bk->data + (bk->len - argp->suffix), argp->suffix);
ret = __bam_ritem(dbc, pagep, argp->indx, &dbt);
- __os_free(dbt.data, dbt.size);
+ __os_free(dbenv, dbt.data);
if (ret != 0)
- goto err;
+ goto out;
/* Reset the deleted flag, if necessary. */
if (argp->isdeleted)
- B_DSET(GET_BKEYDATA(pagep, argp->indx)->type);
+ B_DSET(GET_BKEYDATA(file_dbp, pagep, argp->indx)->type);
LSN(pagep) = argp->lsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = __memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+ pagep = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
- if (0) {
-err: (void)memp_fput(mpf, pagep, 0);
- }
-out: REC_CLOSE;
+out: if (pagep != NULL)
+ (void)__memp_fput(mpf, pagep, 0);
+ REC_CLOSE;
}
/*
@@ -1040,14 +790,15 @@ __bam_root_recover(dbenv, dbtp, lsnp, op, info)
DB_MPOOLFILE *mpf;
int cmp_n, cmp_p, modified, ret;
+ meta = NULL;
COMPQUIET(info, NULL);
REC_PRINT(__bam_root_print);
REC_INTRO(__bam_root_read, 0);
- if ((ret = memp_fget(mpf, &argp->meta_pgno, 0, &meta)) != 0) {
+ if ((ret = __memp_fget(mpf, &argp->meta_pgno, 0, &meta)) != 0) {
/* The metadata page must always exist on redo. */
if (DB_REDO(op)) {
- (void)__db_pgerr(file_dbp, argp->meta_pgno);
+ ret = __db_pgerr(file_dbp, argp->meta_pgno, ret);
goto out;
} else
goto done;
@@ -1068,13 +819,16 @@ __bam_root_recover(dbenv, dbtp, lsnp, op, info)
meta->dbmeta.lsn = argp->meta_lsn;
modified = 1;
}
- if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
+ if ((ret = __memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0)
goto out;
+ meta = NULL;
done: *lsnp = argp->prev_lsn;
ret = 0;
-out: REC_CLOSE;
+out: if (meta != NULL)
+ (void)__memp_fput(mpf, meta, 0);
+ REC_CLOSE;
}
/*
@@ -1100,6 +854,7 @@ __bam_curadj_recover(dbenv, dbtp, lsnp, op, info)
int ret;
COMPQUIET(info, NULL);
+ COMPQUIET(mpf, NULL);
REC_PRINT(__bam_curadj_print);
REC_INTRO(__bam_curadj_read, 0);
@@ -1108,7 +863,7 @@ __bam_curadj_recover(dbenv, dbtp, lsnp, op, info)
if (op != DB_TXN_ABORT)
goto done;
- switch(argp->mode) {
+ switch (argp->mode) {
case DB_CA_DI:
if ((ret = __bam_ca_di(dbc, argp->from_pgno,
argp->from_indx, -(int)argp->first_indx)) != 0)
@@ -1116,7 +871,7 @@ __bam_curadj_recover(dbenv, dbtp, lsnp, op, info)
break;
case DB_CA_DUP:
if ((ret = __bam_ca_undodup(file_dbp, argp->first_indx,
- argp->from_pgno, argp->from_indx, argp->to_indx)) != 0)
+ argp->from_pgno, argp->from_indx, argp->to_indx)) != 0)
goto out;
break;
@@ -1160,6 +915,7 @@ __bam_rcuradj_recover(dbenv, dbtp, lsnp, op, info)
int ret, t_ret;
COMPQUIET(info, NULL);
+ COMPQUIET(mpf, NULL);
rdbc = NULL;
REC_PRINT(__bam_rcuradj_print);
@@ -1180,15 +936,15 @@ __bam_rcuradj_recover(dbenv, dbtp, lsnp, op, info)
* state into __ram_ca, and this way we don't need to make
* this function know anything about how offpage dups work.
*/
- if ((ret =
- __db_icursor(file_dbp, NULL, DB_RECNO, argp->root, 0, &rdbc)) != 0)
+ if ((ret = __db_cursor_int(file_dbp,
+ NULL, DB_RECNO, argp->root, 0, DB_LOCK_INVALIDID, &rdbc)) != 0)
goto out;
cp = (BTREE_CURSOR *)rdbc->internal;
F_SET(cp, C_RENUMBER);
cp->recno = argp->recno;
- switch(argp->mode) {
+ switch (argp->mode) {
case CA_DELETE:
/*
* The way to undo a delete is with an insert. Since
@@ -1197,7 +953,7 @@ __bam_rcuradj_recover(dbenv, dbtp, lsnp, op, info)
F_SET(cp, C_DELETED);
F_SET(cp, C_RENUMBER); /* Just in case. */
cp->order = argp->order;
- __ram_ca(rdbc, CA_ICURRENT);
+ (void)__ram_ca(rdbc, CA_ICURRENT);
break;
case CA_IAFTER:
case CA_IBEFORE:
@@ -1208,12 +964,12 @@ __bam_rcuradj_recover(dbenv, dbtp, lsnp, op, info)
*/
F_CLR(cp, C_DELETED);
cp->order = INVALID_ORDER;
- __ram_ca(rdbc, CA_DELETE);
+ (void)__ram_ca(rdbc, CA_DELETE);
break;
}
done: *lsnp = argp->prev_lsn;
-out: if (rdbc != NULL && (t_ret = rdbc->c_close(rdbc)) != 0 && ret == 0)
+out: if (rdbc != NULL && (t_ret = __db_c_close(rdbc)) != 0 && ret == 0)
ret = t_ret;
REC_CLOSE;
}
diff --git a/db/btree/bt_reclaim.c b/db/btree/bt_reclaim.c
index 538d837c2..bc85bd2d3 100644
--- a/db/btree/bt_reclaim.c
+++ b/db/btree/bt_reclaim.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1998, 1999, 2000
+ * Copyright (c) 1998-2003
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_reclaim.c,v 11.5 2000/03/22 04:21:01 ubell Exp $";
+static const char revid[] = "$Id: bt_reclaim.c,v 11.14 2003/06/30 17:19:33 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -18,10 +18,8 @@ static const char revid[] = "$Id: bt_reclaim.c,v 11.5 2000/03/22 04:21:01 ubell
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
/*
* __bam_reclaim --
@@ -38,7 +36,7 @@ __bam_reclaim(dbp, txn)
int ret, t_ret;
/* Acquire a cursor. */
- if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
+ if ((ret = __db_cursor(dbp, txn, &dbc, 0)) != 0)
return (ret);
/* Walk the tree, freeing pages. */
@@ -46,8 +44,34 @@ __bam_reclaim(dbp, txn)
DB_LOCK_WRITE, dbc->internal->root, __db_reclaim_callback, dbc);
/* Discard the cursor. */
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+ if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
ret = t_ret;
return (ret);
}
+
+/*
+ * __bam_truncate --
+ * Truncate a database.
+ *
+ * PUBLIC: int __bam_truncate __P((DBC *, u_int32_t *));
+ */
+int
+__bam_truncate(dbc, countp)
+ DBC *dbc;
+ u_int32_t *countp;
+{
+ db_trunc_param trunc;
+ int ret;
+
+ trunc.count = 0;
+ trunc.dbc = dbc;
+
+ /* Walk the tree, freeing pages. */
+ ret = __bam_traverse(dbc,
+ DB_LOCK_WRITE, dbc->internal->root, __db_truncate_callback, &trunc);
+
+ *countp = trunc.count;
+
+ return (ret);
+}
diff --git a/db/btree/bt_recno.c b/db/btree/bt_recno.c
index 6ac0cac35..2098e4d94 100644
--- a/db/btree/bt_recno.c
+++ b/db/btree/bt_recno.c
@@ -1,36 +1,29 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1997, 1998, 1999, 2000
+ * Copyright (c) 1997-2003
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_recno.c,v 11.65 2001/01/18 14:33:22 bostic Exp $";
+static const char revid[] = "$Id: bt_recno.c,v 11.113 2003/06/30 17:19:34 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
-#include <limits.h>
#include <string.h>
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-#include "db_ext.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "lock_ext.h"
-#include "qam.h"
-#include "txn.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/lock.h"
static int __ram_add __P((DBC *, db_recno_t *, DBT *, u_int32_t, u_int32_t));
-static int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
-static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
static int __ram_source __P((DB *));
static int __ram_sread __P((DBC *, db_recno_t));
static int __ram_update __P((DBC *, db_recno_t, int));
@@ -63,7 +56,7 @@ static int __ram_update __P((DBC *, db_recno_t, int));
} \
}
#define CD_ISSET(cp) \
- (F_ISSET(cp, C_RENUMBER) && F_ISSET(cp, C_DELETED))
+ (F_ISSET(cp, C_RENUMBER) && F_ISSET(cp, C_DELETED) ? 1 : 0)
/*
* Macros for comparing the ordering of two cursors.
@@ -90,17 +83,32 @@ static int __ram_update __P((DBC *, db_recno_t, int));
* Do we need to log the current cursor adjustment?
*/
#define CURADJ_LOG(dbc) \
- (DB_LOGGING((dbc)) && (dbc)->txn != NULL && (dbc)->txn->parent != NULL)
+ (DBC_LOGGING((dbc)) && (dbc)->txn != NULL && (dbc)->txn->parent != NULL)
+
+/*
+ * After a search, copy the found page into the cursor, discarding any
+ * currently held lock.
+ */
+#define STACK_TO_CURSOR(cp) { \
+ (cp)->page = (cp)->csp->page; \
+ (cp)->pgno = (cp)->csp->page->pgno; \
+ (cp)->indx = (cp)->csp->indx; \
+ (void)__TLPUT(dbc, (cp)->lock); \
+ (cp)->lock = (cp)->csp->lock; \
+ (cp)->lock_mode = (cp)->csp->lock_mode; \
+}
/*
* __ram_open --
* Recno open function.
*
- * PUBLIC: int __ram_open __P((DB *, const char *, db_pgno_t, u_int32_t));
+ * PUBLIC: int __ram_open __P((DB *,
+ * PUBLIC: DB_TXN *, const char *, db_pgno_t, u_int32_t));
*/
int
-__ram_open(dbp, name, base_pgno, flags)
+__ram_open(dbp, txn, name, base_pgno, flags)
DB *dbp;
+ DB_TXN *txn;
const char *name;
db_pgno_t base_pgno;
u_int32_t flags;
@@ -109,15 +117,11 @@ __ram_open(dbp, name, base_pgno, flags)
DBC *dbc;
int ret, t_ret;
+ COMPQUIET(name, NULL);
t = dbp->bt_internal;
- /* Initialize the remaining fields/methods of the DB. */
- dbp->del = __ram_delete;
- dbp->put = __ram_put;
- dbp->stat = __bam_stat;
-
/* Start up the tree. */
- if ((ret = __bam_read_root(dbp, name, base_pgno, flags)) != 0)
+ if ((ret = __bam_read_root(dbp, txn, base_pgno, flags)) != 0)
return (ret);
/*
@@ -132,9 +136,9 @@ __ram_open(dbp, name, base_pgno, flags)
return (ret);
/* If we're snapshotting an underlying source file, do it now. */
- if (F_ISSET(dbp, DB_RE_SNAPSHOT)) {
+ if (F_ISSET(dbp, DB_AM_SNAPSHOT)) {
/* Allocate a cursor. */
- if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
+ if ((ret = __db_cursor(dbp, NULL, &dbc, 0)) != 0)
return (ret);
/* Do the snapshot. */
@@ -143,108 +147,42 @@ __ram_open(dbp, name, base_pgno, flags)
ret = 0;
/* Discard the cursor. */
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+ if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
ret = t_ret;
}
- return (0);
-}
-
-/*
- * __ram_delete --
- * Recno db->del function.
- */
-static int
-__ram_delete(dbp, txn, key, flags)
- DB *dbp;
- DB_TXN *txn;
- DBT *key;
- u_int32_t flags;
-{
- BTREE_CURSOR *cp;
- DBC *dbc;
- db_recno_t recno;
- int ret, t_ret;
-
- PANIC_CHECK(dbp->dbenv);
-
- /* Check for invalid flags. */
- if ((ret = __db_delchk(dbp,
- key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
- return (ret);
-
- /* Acquire a cursor. */
- if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
- return (ret);
-
- DEBUG_LWRITE(dbc, txn, "ram_delete", key, NULL, flags);
-
- /* Check the user's record number and fill in as necessary. */
- if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0)
- goto err;
-
- /* Do the delete. */
- cp = (BTREE_CURSOR *)dbc->internal;
- cp->recno = recno;
-
- ret = __ram_c_del(dbc);
-
- /* Release the cursor. */
-err: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
return (ret);
}
/*
- * __ram_put --
- * Recno db->put function.
+ * __ram_append --
+ * Recno append function.
+ *
+ * PUBLIC: int __ram_append __P((DBC *, DBT *, DBT *));
*/
-static int
-__ram_put(dbp, txn, key, data, flags)
- DB *dbp;
- DB_TXN *txn;
+int
+__ram_append(dbc, key, data)
+ DBC *dbc;
DBT *key, *data;
- u_int32_t flags;
{
- DBC *dbc;
- db_recno_t recno;
- int ret, t_ret;
-
- PANIC_CHECK(dbp->dbenv);
-
- /* Check for invalid flags. */
- if ((ret = __db_putchk(dbp,
- key, data, flags, F_ISSET(dbp, DB_AM_RDONLY), 0)) != 0)
- return (ret);
-
- /* Allocate a cursor. */
- if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
- return (ret);
+ BTREE_CURSOR *cp;
+ int ret;
- DEBUG_LWRITE(dbc, txn, "ram_put", key, data, flags);
+ cp = (BTREE_CURSOR *)dbc->internal;
/*
- * If we're appending to the tree, make sure we've read in all of
- * the backing source file. Otherwise, check the user's record
- * number and fill in as necessary. If we found the record or it
- * simply didn't exist, add the user's record.
+ * Make sure we've read in all of the backing source file. If
+ * we found the record or it simply didn't exist, add the
+ * user's record.
*/
- if (flags == DB_APPEND)
- ret = __ram_update(dbc, DB_MAX_RECORDS, 0);
- else
- ret = __ram_getno(dbc, key, &recno, 1);
+ ret = __ram_update(dbc, DB_MAX_RECORDS, 0);
if (ret == 0 || ret == DB_NOTFOUND)
- ret = __ram_add(dbc, &recno, data, flags, 0);
-
- /* Discard the cursor. */
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
+ ret = __ram_add(dbc, &cp->recno, data, DB_APPEND, 0);
- /* Return the record number if we're appending to the tree. */
- if (ret == 0 && flags == DB_APPEND)
- ret = __db_retcopy(dbp, key, &recno, sizeof(recno),
- &dbc->rkey.data, &dbc->rkey.ulen);
+ /* Return the record number. */
+ if (ret == 0)
+ ret = __db_retcopy(dbc->dbp->dbenv, key, &cp->recno,
+ sizeof(cp->recno), &dbc->rkey->data, &dbc->rkey->ulen);
return (ret);
}
@@ -266,7 +204,7 @@ __ram_c_del(dbc)
DB_LSN lsn;
DBT hdr, data;
EPG *epg;
- int exact, ret, stack;
+ int exact, ret, stack, t_ret;
dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
@@ -295,9 +233,9 @@ __ram_c_del(dbc)
goto err;
}
stack = 1;
- cp->page = cp->csp->page;
- cp->pgno = cp->csp->page->pgno;
- cp->indx = cp->csp->indx;
+
+ /* Copy the page into the cursor. */
+ STACK_TO_CURSOR(cp);
/*
* If re-numbering records, the on-page deleted flag can only mean
@@ -310,7 +248,7 @@ __ram_c_del(dbc)
* delete records they never created, the latter is an error because
* if the record was "deleted", we could never have found it.
*/
- if (B_DISSET(GET_BKEYDATA(cp->page, cp->indx)->type)) {
+ if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type)) {
ret = DB_KEYEMPTY;
goto err;
}
@@ -319,11 +257,11 @@ __ram_c_del(dbc)
/* Delete the item, adjust the counts, adjust the cursors. */
if ((ret = __bam_ditem(dbc, cp->page, cp->indx)) != 0)
goto err;
- __bam_adjust(dbc, -1);
+ if ((ret = __bam_adjust(dbc, -1)) != 0)
+ goto err;
if (__ram_ca(dbc, CA_DELETE) > 0 &&
- CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(dbp->dbenv,
- dbc->txn, &lsn, 0, dbp->log_fileid, CA_DELETE,
- cp->root, cp->recno, cp->order)) != 0)
+ CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(dbp, dbc->txn,
+ &lsn, 0, CA_DELETE, cp->root, cp->recno, cp->order)) != 0)
goto err;
/*
@@ -346,15 +284,15 @@ __ram_c_del(dbc)
* going to be emptied by removing the single reference
* to the emptied page (or one of its parents).
*/
- for (epg = cp->sp; epg <= cp->csp; ++epg)
- if (NUM_ENT(epg->page) <= 1)
+ for (epg = cp->csp; epg >= cp->sp; --epg)
+ if (NUM_ENT(epg->page) > 1)
break;
/*
* We want to delete a single item out of the last page
- * that we're not deleting, back up to that page.
+ * that we're not deleting.
*/
- ret = __bam_dpages(dbc, --epg);
+ ret = __bam_dpages(dbc, epg);
/*
* Regardless of the return from __bam_dpages, it will
@@ -383,8 +321,8 @@ __ram_c_del(dbc)
t->re_modified = 1;
-err: if (stack)
- __bam_stkrel(dbc, STK_CLRDBC);
+err: if (stack && (t_ret = __bam_stkrel(dbc, STK_CLRDBC)) != 0 && ret == 0)
+ ret = t_ret;
return (ret);
}
@@ -412,6 +350,7 @@ __ram_c_get(dbc, key, data, flags, pgnop)
dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
+ LF_CLR(DB_MULTIPLE|DB_MULTIPLE_KEY);
retry: switch (flags) {
case DB_CURRENT:
/*
@@ -445,8 +384,13 @@ retry: switch (flags) {
* we have to avoid incrementing the record number so that we
* return the right record by virtue of renumbering the tree.
*/
- if (CD_ISSET(cp))
+ if (CD_ISSET(cp)) {
+ /*
+ * Clear the flag, we've moved off the deleted record.
+ */
+ CD_CLR(cp);
break;
+ }
if (cp->recno != RECNO_OOB) {
++cp->recno;
@@ -504,6 +448,7 @@ retry: switch (flags) {
goto err;
/* NOTREACHED */
case DB_GET_BOTH:
+ case DB_GET_BOTH_RANGE:
/*
* If we're searching a set of off-page dups, we start
* a new linear search from the first record. Otherwise,
@@ -531,6 +476,8 @@ retry: switch (flags) {
* read from the backing source file. Do it now for DB_CURRENT (if
* the current record was deleted we may need more records from the
* backing file for a DB_CURRENT operation), DB_FIRST and DB_NEXT.
+ * (We don't have to test for flags == DB_FIRST, because the switch
+ * statement above re-set flags to DB_NEXT in that case.)
*/
if ((flags == DB_NEXT || flags == DB_CURRENT) && ((ret =
__ram_update(dbc, cp->recno, 0)) != 0) && ret != DB_NOTFOUND)
@@ -547,16 +494,8 @@ retry: switch (flags) {
goto err;
}
- /*
- * Copy the page into the cursor, discarding any lock we
- * are currently holding.
- */
- cp->page = cp->csp->page;
- cp->pgno = cp->csp->page->pgno;
- cp->indx = cp->csp->indx;
- (void)__TLPUT(dbc, cp->lock);
- cp->lock = cp->csp->lock;
- cp->lock_mode = cp->csp->lock_mode;
+ /* Copy the page into the cursor. */
+ STACK_TO_CURSOR(cp);
/*
* If re-numbering records, the on-page deleted flag means this
@@ -567,21 +506,34 @@ retry: switch (flags) {
* walking through off-page duplicates, and fail if they were
* requested explicitly by the application.
*/
- if (B_DISSET(GET_BKEYDATA(cp->page, cp->indx)->type))
+ if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type))
switch (flags) {
case DB_NEXT:
case DB_PREV:
(void)__bam_stkrel(dbc, STK_CLRDBC);
goto retry;
case DB_GET_BOTH:
- (void)__bam_stkrel(dbc, STK_CLRDBC);
- continue;
+ case DB_GET_BOTH_RANGE:
+ /*
+ * If we're an OPD tree, we don't care about
+ * matching a record number on a DB_GET_BOTH
+ * -- everything belongs to the same tree. A
+ * normal recno should give up and return
+ * DB_NOTFOUND if the matching recno is deleted.
+ */
+ if (F_ISSET(dbc, DBC_OPD)) {
+ (void)__bam_stkrel(dbc, STK_CLRDBC);
+ continue;
+ }
+ ret = DB_NOTFOUND;
+ goto err;
default:
ret = DB_KEYEMPTY;
goto err;
}
- if (flags == DB_GET_BOTH || flags == DB_GET_BOTHC) {
+ if (flags == DB_GET_BOTH ||
+ flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) {
if ((ret = __bam_cmp(dbp, data,
cp->page, cp->indx, __bam_defcmp, &cmp)) != 0)
return (ret);
@@ -598,10 +550,11 @@ retry: switch (flags) {
/* Return the key if the user didn't give us one. */
if (!F_ISSET(dbc, DBC_OPD)) {
- if (flags != DB_SET && flags != DB_SET_RANGE)
- ret = __db_retcopy(dbp,
- key, &cp->recno, sizeof(cp->recno),
- &dbc->rkey.data, &dbc->rkey.ulen);
+ if (flags != DB_GET_BOTH && flags != DB_GET_BOTH_RANGE &&
+ flags != DB_SET && flags != DB_SET_RANGE)
+ ret = __db_retcopy(dbp->dbenv,
+ key, &cp->recno, sizeof(cp->recno),
+ &dbc->rkey->data, &dbc->rkey->ulen);
F_SET(key, DB_DBT_ISSET);
}
@@ -637,23 +590,45 @@ __ram_c_put(dbc, key, data, flags, pgnop)
cp = (BTREE_CURSOR *)dbc->internal;
/*
- * DB_KEYFIRST and DB_KEYLAST will only be set if we're dealing with
- * an off-page duplicate tree, they can't be specified at user level.
- * Translate them into something else.
+ * DB_KEYFIRST and DB_KEYLAST mean different things if they're
+ * used in an off-page duplicate tree. If we're an off-page
+ * duplicate tree, they really mean "put at the beginning of the
+ * tree" and "put at the end of the tree" respectively, so translate
+ * them to something else.
*/
- switch (flags) {
- case DB_KEYFIRST:
- cp->recno = 1;
- flags = DB_BEFORE;
- break;
- case DB_KEYLAST:
- if ((ret = __ram_add(dbc, &cp->recno, data, DB_APPEND, 0)) != 0)
- return (ret);
- if (CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(dbp->dbenv,
- dbc->txn, &lsn, 0, dbp->log_fileid, CA_ICURRENT,
- cp->root, cp->recno, cp->order)))
- return (ret);
- return (0);
+ if (F_ISSET(dbc, DBC_OPD))
+ switch (flags) {
+ case DB_KEYFIRST:
+ cp->recno = 1;
+ flags = DB_BEFORE;
+ break;
+ case DB_KEYLAST:
+ if ((ret = __ram_add(dbc,
+ &cp->recno, data, DB_APPEND, 0)) != 0)
+ return (ret);
+ if (CURADJ_LOG(dbc) &&
+ (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0,
+ CA_ICURRENT, cp->root, cp->recno, cp->order)) != 0)
+ return (ret);
+ return (0);
+ default:
+ break;
+ }
+
+ /*
+ * Handle normal DB_KEYFIRST/DB_KEYLAST; for a recno, which has
+ * no duplicates, these are identical and mean "put the given
+ * datum at the given recno".
+ *
+ * Note that the code here used to be in __ram_put; now, we
+ * go through the access-method-common __db_put function, which
+ * handles DB_NOOVERWRITE, so we and __ram_add don't have to.
+ */
+ if (flags == DB_KEYFIRST || flags == DB_KEYLAST) {
+ ret = __ram_getno(dbc, key, &cp->recno, 1);
+ if (ret == 0 || ret == DB_NOTFOUND)
+ ret = __ram_add(dbc, &cp->recno, data, 0, 0);
+ return (ret);
}
/*
@@ -677,9 +652,8 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
*/
DB_ASSERT(exact || CD_ISSET(cp));
- cp->page = cp->csp->page;
- cp->pgno = cp->csp->page->pgno;
- cp->indx = cp->csp->indx;
+ /* Copy the page into the cursor. */
+ STACK_TO_CURSOR(cp);
ret = __bam_iitem(dbc, key, data, iiflags, 0);
t_ret = __bam_stkrel(dbc, STK_CLRDBC);
@@ -688,7 +662,7 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
ret = t_ret;
else if (ret == DB_NEEDSPLIT) {
arg = &cp->recno;
- if ((ret = __bam_split(dbc, arg)) != 0)
+ if ((ret = __bam_split(dbc, arg, NULL)) != 0)
goto err;
goto split;
}
@@ -709,8 +683,7 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
/* Only log if __ram_ca found any relevant cursors. */
if (nc > 0 && CURADJ_LOG(dbc) &&
- (ret = __bam_rcuradj_log(dbp->dbenv,
- dbc->txn, &lsn, 0, dbp->log_fileid, CA_IAFTER,
+ (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_IAFTER,
cp->root, cp->recno, cp->order)) != 0)
goto err;
break;
@@ -720,8 +693,7 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
/* Only log if __ram_ca found any relevant cursors. */
if (nc > 0 && CURADJ_LOG(dbc) &&
- (ret = __bam_rcuradj_log(dbp->dbenv,
- dbc->txn, &lsn, 0, dbp->log_fileid, CA_IBEFORE,
+ (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_IBEFORE,
cp->root, cp->recno, cp->order)) != 0)
goto err;
break;
@@ -734,17 +706,19 @@ split: if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0)
* Only log if __ram_ca found any relevant cursors.
*/
if (CD_ISSET(cp) && __ram_ca(dbc, CA_ICURRENT) > 0 &&
- CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(
- dbp->dbenv, dbc->txn, &lsn, 0, dbp->log_fileid,
+ CURADJ_LOG(dbc) &&
+ (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0,
CA_ICURRENT, cp->root, cp->recno, cp->order)) != 0)
goto err;
break;
+ default:
+ break;
}
/* Return the key if we've created a new record. */
if (!F_ISSET(dbc, DBC_OPD) && (flags == DB_AFTER || flags == DB_BEFORE))
- ret = __db_retcopy(dbp, key, &cp->recno,
- sizeof(cp->recno), &dbc->rkey.data, &dbc->rkey.ulen);
+ ret = __db_retcopy(dbp->dbenv, key, &cp->recno,
+ sizeof(cp->recno), &dbc->rkey->data, &dbc->rkey->ulen);
/* The cursor was reset, no further delete adjustment is necessary. */
err: CD_CLR(cp);
@@ -940,13 +914,12 @@ __ram_update(dbc, recno, can_create)
int can_create;
{
BTREE *t;
- BTREE_CURSOR *cp;
DB *dbp;
+ DBT *rdata;
db_recno_t nrecs;
int ret;
dbp = dbc->dbp;
- cp = (BTREE_CURSOR *)dbc->internal;
t = dbp->bt_internal;
/*
@@ -976,27 +949,13 @@ __ram_update(dbc, recno, can_create)
if (!can_create || recno <= nrecs + 1)
return (0);
- dbc->rdata.dlen = 0;
- dbc->rdata.doff = 0;
- dbc->rdata.flags = 0;
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
- if (dbc->rdata.ulen < t->re_len) {
- if ((ret = __os_realloc(dbp->dbenv,
- t->re_len, NULL, &dbc->rdata.data)) != 0) {
- dbc->rdata.ulen = 0;
- dbc->rdata.data = NULL;
- return (ret);
- }
- dbc->rdata.ulen = t->re_len;
- }
- dbc->rdata.size = t->re_len;
- memset(dbc->rdata.data, t->re_pad, t->re_len);
- } else
- dbc->rdata.size = 0;
+ rdata = &dbc->my_rdata;
+ rdata->flags = 0;
+ rdata->size = 0;
while (recno > ++nrecs)
if ((ret = __ram_add(dbc,
- &nrecs, &dbc->rdata, 0, BI_DELETED)) != 0)
+ &nrecs, rdata, 0, BI_DELETED)) != 0)
return (ret);
return (0);
}
@@ -1017,9 +976,9 @@ __ram_source(dbp)
/* Find the real name, and swap out the one we had before. */
if ((ret = __db_appname(dbp->dbenv,
- DB_APP_DATA, NULL, t->re_source, 0, NULL, &source)) != 0)
+ DB_APP_DATA, t->re_source, 0, NULL, &source)) != 0)
return (ret);
- __os_freestr(t->re_source);
+ __os_free(dbp->dbenv, t->re_source);
t->re_source = source;
/*
@@ -1060,6 +1019,7 @@ __ram_writeback(dbp)
t = dbp->bt_internal;
dbenv = dbp->dbenv;
fp = NULL;
+ pad = NULL;
/* If the file wasn't modified, we're done. */
if (!t->re_modified)
@@ -1072,7 +1032,7 @@ __ram_writeback(dbp)
}
/* Allocate a cursor. */
- if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
+ if ((ret = __db_cursor(dbp, NULL, &dbc, 0)) != 0)
return (ret);
/*
@@ -1119,40 +1079,45 @@ __ram_writeback(dbp)
/*
* We step through the records, writing each one out. Use the record
* number and the dbp->get() function, instead of a cursor, so we find
- * and write out "deleted" or non-existent records.
+ * and write out "deleted" or non-existent records. The DB handle may
+ * be threaded, so allocate memory as we go.
*/
memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
key.size = sizeof(db_recno_t);
key.data = &keyno;
+ memset(&data, 0, sizeof(data));
+ F_SET(&data, DB_DBT_REALLOC);
/*
* We'll need the delimiter if we're doing variable-length records,
* and the pad character if we're doing fixed-length records.
*/
delim = t->re_delim;
- if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
- if ((ret = __os_malloc(dbenv, t->re_len, NULL, &pad)) != 0)
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN)) {
+ if ((ret = __os_malloc(dbenv, t->re_len, &pad)) != 0)
goto err;
memset(pad, t->re_pad, t->re_len);
- } else
- COMPQUIET(pad, NULL);
+ }
for (keyno = 1;; ++keyno) {
- switch (ret = dbp->get(dbp, NULL, &key, &data, 0)) {
+ switch (ret = __db_get(dbp, NULL, &key, &data, 0)) {
case 0:
- if (fwrite(data.data, 1, data.size, fp) != data.size)
+ if (data.size != 0 && (u_int32_t)fwrite(
+ data.data, 1, data.size, fp) != data.size)
goto write_err;
break;
case DB_KEYEMPTY:
- if (F_ISSET(dbp, DB_RE_FIXEDLEN) &&
- fwrite(pad, 1, t->re_len, fp) != t->re_len)
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN) &&
+ (u_int32_t)fwrite(pad, 1, t->re_len, fp) !=
+ t->re_len)
goto write_err;
break;
case DB_NOTFOUND:
ret = 0;
goto done;
+ default:
+ goto err;
}
- if (!F_ISSET(dbp, DB_RE_FIXEDLEN) &&
+ if (!F_ISSET(dbp, DB_AM_FIXEDLEN) &&
fwrite(&delim, 1, 1, fp) != 1) {
write_err: ret = errno;
__db_err(dbp->dbenv,
@@ -1171,9 +1136,15 @@ done: /* Close the file descriptor. */
}
/* Discard the cursor. */
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+ if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
ret = t_ret;
+ /* Discard memory allocated to hold the data items. */
+ if (data.data != NULL)
+ __os_ufree(dbenv, data.data);
+ if (pad != NULL)
+ __os_free(dbenv, pad);
+
if (ret == 0)
t->re_modified = 0;
@@ -1191,7 +1162,7 @@ __ram_sread(dbc, top)
{
BTREE *t;
DB *dbp;
- DBT data;
+ DBT data, *rdata;
db_recno_t recno;
size_t len;
int ch, ret, was_modified;
@@ -1203,45 +1174,56 @@ __ram_sread(dbc, top)
if ((ret = __bam_nrecs(dbc, &recno)) != 0)
return (ret);
- /* Use the record data return memory, it's only a short-term use. */
- len = F_ISSET(dbp, DB_RE_FIXEDLEN) ? t->re_len : 256;
- if (dbc->rdata.ulen < len) {
+ /*
+ * Use the record key return memory, it's only a short-term use.
+ * The record data return memory is used by __bam_iitem, which
+ * we'll indirectly call, so use the key so as not to collide.
+ */
+ len = F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_len : 256;
+ rdata = &dbc->my_rkey;
+ if (rdata->ulen < len) {
if ((ret = __os_realloc(
- dbp->dbenv, len, NULL, &dbc->rdata.data)) != 0) {
- dbc->rdata.ulen = 0;
- dbc->rdata.data = NULL;
+ dbp->dbenv, len, &rdata->data)) != 0) {
+ rdata->ulen = 0;
+ rdata->data = NULL;
return (ret);
}
- dbc->rdata.ulen = len;
+ rdata->ulen = (u_int32_t)len;
}
memset(&data, 0, sizeof(data));
while (recno < top) {
- data.data = dbc->rdata.data;
+ data.data = rdata->data;
data.size = 0;
- if (F_ISSET(dbp, DB_RE_FIXEDLEN))
+ if (F_ISSET(dbp, DB_AM_FIXEDLEN))
for (len = t->re_len; len > 0; --len) {
- if ((ch = getc(t->re_fp)) == EOF)
- goto eof;
+ if ((ch = getc(t->re_fp)) == EOF) {
+ if (data.size == 0)
+ goto eof;
+ break;
+ }
((u_int8_t *)data.data)[data.size++] = ch;
}
else
for (;;) {
- if ((ch = getc(t->re_fp)) == EOF)
- goto eof;
+ if ((ch = getc(t->re_fp)) == EOF) {
+ if (data.size == 0)
+ goto eof;
+ break;
+ }
if (ch == t->re_delim)
break;
((u_int8_t *)data.data)[data.size++] = ch;
- if (data.size == dbc->rdata.ulen) {
+ if (data.size == rdata->ulen) {
if ((ret = __os_realloc(dbp->dbenv,
- dbc->rdata.ulen *= 2,
- NULL, &dbc->rdata.data)) != 0) {
- dbc->rdata.ulen = 0;
- dbc->rdata.data = NULL;
+ rdata->ulen *= 2,
+ &rdata->data)) != 0) {
+ rdata->ulen = 0;
+ rdata->data = NULL;
return (ret);
} else
- data.data = dbc->rdata.data;
+ data.data = rdata->data;
}
}
@@ -1281,9 +1263,8 @@ __ram_add(dbc, recnop, data, flags, bi_flags)
DBT *data;
u_int32_t flags, bi_flags;
{
- BKEYDATA *bk;
BTREE_CURSOR *cp;
- int exact, ret, stack;
+ int exact, ret, stack, t_ret;
cp = (BTREE_CURSOR *)dbc->internal;
@@ -1292,9 +1273,9 @@ retry: /* Find the slot for insertion. */
S_INSERT | (flags == DB_APPEND ? S_APPEND : 0), 1, &exact)) != 0)
return (ret);
stack = 1;
- cp->page = cp->csp->page;
- cp->pgno = cp->csp->page->pgno;
- cp->indx = cp->csp->indx;
+
+ /* Copy the page into the cursor. */
+ STACK_TO_CURSOR(cp);
/*
* The application may modify the data based on the selected record
@@ -1305,24 +1286,6 @@ retry: /* Find the slot for insertion. */
goto err;
/*
- * If re-numbering records, the on-page deleted flag means this record
- * was implicitly created. If not re-numbering records, the on-page
- * deleted flag means this record was implicitly created, or, it was
- * deleted at some time.
- *
- * If DB_NOOVERWRITE is set and the item already exists in the tree,
- * return an error unless the item was either marked for deletion or
- * only implicitly created.
- */
- if (exact) {
- bk = GET_BKEYDATA(cp->page, cp->indx);
- if (!B_DISSET(bk->type) && flags == DB_NOOVERWRITE) {
- ret = DB_KEYEXIST;
- goto err;
- }
- }
-
- /*
* Select the arguments for __bam_iitem() and do the insert. If the
* key is an exact match, or we're replacing the data item with a
* new data item, replace the current item. If the key isn't an exact
@@ -1353,7 +1316,7 @@ retry: /* Find the slot for insertion. */
(void)__bam_stkrel(dbc, STK_CLRDBC);
stack = 0;
- if ((ret = __bam_split(dbc, recnop)) != 0)
+ if ((ret = __bam_split(dbc, recnop, NULL)) != 0)
goto err;
goto retry;
@@ -1362,8 +1325,8 @@ retry: /* Find the slot for insertion. */
goto err;
}
-err: if (stack)
- __bam_stkrel(dbc, STK_CLRDBC);
+err: if (stack && (t_ret = __bam_stkrel(dbc, STK_CLRDBC)) != 0 && ret == 0)
+ ret = t_ret;
return (ret);
}
diff --git a/db/btree/bt_rsearch.c b/db/btree/bt_rsearch.c
index 7102cd715..92eb82144 100644
--- a/db/btree/bt_rsearch.c
+++ b/db/btree/bt_rsearch.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
/*
@@ -40,7 +40,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_rsearch.c,v 11.21 2000/03/28 21:50:04 ubell Exp $";
+static const char revid[] = "$Id: bt_rsearch.c,v 11.37 2003/06/30 17:19:34 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -48,10 +48,11 @@ static const char revid[] = "$Id: bt_rsearch.c,v 11.21 2000/03/28 21:50:04 ubell
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "btree.h"
-#include "db_shash.h"
-#include "lock.h"
+#include "dbinc/db_page.h"
+#include "dbinc/btree.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/lock.h"
+#include "dbinc/mp.h"
/*
* __bam_rsearch --
@@ -70,6 +71,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
BTREE_CURSOR *cp;
DB *dbp;
DB_LOCK lock;
+ DB_MPOOLFILE *mpf;
PAGE *h;
RINTERNAL *ri;
db_indx_t adjust, deloffset, indx, top;
@@ -79,6 +81,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
int ret, stack;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
BT_STK_CLR(cp);
@@ -99,11 +102,11 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
* Retrieve the root page.
*/
pg = cp->root;
- stack = LF_ISSET(S_STACK);
+ stack = LF_ISSET(S_STACK) ? 1 : 0;
lock_mode = stack ? DB_LOCK_WRITE : DB_LOCK_READ;
if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
+ if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0) {
/* Did not read it, so we can release the lock */
(void)__LPUT(dbc, lock);
return (ret);
@@ -120,12 +123,12 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
if (!stack &&
((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
(LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
(void)__LPUT(dbc, lock);
lock_mode = DB_LOCK_WRITE;
if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
+ if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0) {
/* Did not read it, so we can release the lock */
(void)__LPUT(dbc, lock);
return (ret);
@@ -164,7 +167,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
* eliminate any concurrency. A possible fix
* would be to lock the last leaf page instead.
*/
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
(void)__TLPUT(dbc, lock);
return (DB_NOTFOUND);
}
@@ -198,12 +201,14 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
*exactp = 0;
if (!LF_ISSET(S_PAST_EOF) ||
recno > t_recno + 1) {
+ (void)__memp_fput(mpf, h, 0);
+ (void)__TLPUT(dbc, lock);
ret = DB_NOTFOUND;
goto err;
}
}
- if (!B_DISSET(
- GET_BKEYDATA(h, indx + deloffset)->type) &&
+ if (!B_DISSET(GET_BKEYDATA(dbp, h,
+ indx + deloffset)->type) &&
++t_recno == recno)
break;
}
@@ -216,7 +221,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
return (0);
case P_IBTREE:
for (indx = 0, top = NUM_ENT(h);;) {
- bi = GET_BINTERNAL(h, indx);
+ bi = GET_BINTERNAL(dbp, h, indx);
if (++indx == top || total + bi->nrecs >= recno)
break;
total += bi->nrecs;
@@ -235,7 +240,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
return (0);
case P_IRECNO:
for (indx = 0, top = NUM_ENT(h);;) {
- ri = GET_RINTERNAL(h, indx);
+ ri = GET_RINTERNAL(dbp, h, indx);
if (++indx == top || total + ri->nrecs >= recno)
break;
total += ri->nrecs;
@@ -243,7 +248,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
pg = ri->pgno;
break;
default:
- return (__db_pgfmt(dbp, h->pgno));
+ return (__db_pgfmt(dbp->dbenv, h->pgno));
}
--indx;
@@ -276,12 +281,12 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
(h->level - 1) == LEAFLEVEL)
stack = 1;
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
lock_mode = stack &&
LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ;
if ((ret = __db_lget(dbc,
- LCK_COUPLE, pg, lock_mode, 0, &lock)) != 0) {
+ LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) {
/*
* If we fail, discard the lock we held. This
* is OK because this only happens when we are
@@ -292,7 +297,7 @@ __bam_rsearch(dbc, recnop, flags, stop, exactp)
}
}
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0)
+ if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0)
goto err;
}
/* NOTREACHED */
@@ -315,12 +320,14 @@ __bam_adjust(dbc, adjust)
{
BTREE_CURSOR *cp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
EPG *epg;
PAGE *h;
db_pgno_t root_pgno;
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
root_pgno = cp->root;
@@ -328,22 +335,27 @@ __bam_adjust(dbc, adjust)
for (epg = cp->sp; epg <= cp->csp; ++epg) {
h = epg->page;
if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
- if (DB_LOGGING(dbc) &&
- (ret = __bam_cadjust_log(dbp->dbenv,
- dbc->txn, &LSN(h), 0, dbp->log_fileid,
- PGNO(h), &LSN(h), (u_int32_t)epg->indx, adjust,
- PGNO(h) == root_pgno ? CAD_UPDATEROOT : 0)) != 0)
- return (ret);
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __bam_cadjust_log(dbp, dbc->txn,
+ &LSN(h), 0, PGNO(h), &LSN(h),
+ (u_int32_t)epg->indx, adjust,
+ PGNO(h) == root_pgno ?
+ CAD_UPDATEROOT : 0)) != 0)
+ return (ret);
+ } else
+ LSN_NOT_LOGGED(LSN(h));
if (TYPE(h) == P_IBTREE)
- GET_BINTERNAL(h, epg->indx)->nrecs += adjust;
+ GET_BINTERNAL(dbp, h, epg->indx)->nrecs +=
+ adjust;
else
- GET_RINTERNAL(h, epg->indx)->nrecs += adjust;
+ GET_RINTERNAL(dbp, h, epg->indx)->nrecs +=
+ adjust;
if (PGNO(h) == root_pgno)
RE_NREC_ADJ(h, adjust);
- if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+ if ((ret = __memp_fset(mpf, h, DB_MPOOL_DIRTY)) != 0)
return (ret);
}
}
@@ -363,21 +375,23 @@ __bam_nrecs(dbc, rep)
{
DB *dbp;
DB_LOCK lock;
+ DB_MPOOLFILE *mpf;
PAGE *h;
db_pgno_t pgno;
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
pgno = dbc->internal->root;
if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lock)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if ((ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
return (ret);
*rep = RE_NREC(h);
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
(void)__TLPUT(dbc, lock);
return (0);
@@ -387,10 +401,11 @@ __bam_nrecs(dbc, rep)
* __bam_total --
* Return the number of records below a page.
*
- * PUBLIC: db_recno_t __bam_total __P((PAGE *));
+ * PUBLIC: db_recno_t __bam_total __P((DB *, PAGE *));
*/
db_recno_t
-__bam_total(h)
+__bam_total(dbp, h)
+ DB *dbp;
PAGE *h;
{
db_recno_t nrecs;
@@ -403,25 +418,26 @@ __bam_total(h)
case P_LBTREE:
/* Check for logically deleted records. */
for (indx = 0; indx < top; indx += P_INDX)
- if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type))
+ if (!B_DISSET(
+ GET_BKEYDATA(dbp, h, indx + O_INDX)->type))
++nrecs;
break;
case P_LDUP:
/* Check for logically deleted records. */
for (indx = 0; indx < top; indx += O_INDX)
- if (!B_DISSET(GET_BKEYDATA(h, indx)->type))
+ if (!B_DISSET(GET_BKEYDATA(dbp, h, indx)->type))
++nrecs;
break;
case P_IBTREE:
for (indx = 0; indx < top; indx += O_INDX)
- nrecs += GET_BINTERNAL(h, indx)->nrecs;
+ nrecs += GET_BINTERNAL(dbp, h, indx)->nrecs;
break;
case P_LRECNO:
nrecs = NUM_ENT(h);
break;
case P_IRECNO:
for (indx = 0; indx < top; indx += O_INDX)
- nrecs += GET_RINTERNAL(h, indx)->nrecs;
+ nrecs += GET_RINTERNAL(dbp, h, indx)->nrecs;
break;
}
diff --git a/db/btree/bt_search.c b/db/btree/bt_search.c
index d822198f2..dc35c7c68 100644
--- a/db/btree/bt_search.c
+++ b/db/btree/bt_search.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
/*
@@ -43,7 +43,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_search.c,v 11.32 2001/01/17 20:19:46 bostic Exp $";
+static const char revid[] = "$Id: bt_search.c,v 11.47 2003/06/30 17:19:35 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -53,21 +53,23 @@ static const char revid[] = "$Id: bt_search.c,v 11.32 2001/01/17 20:19:46 bostic
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "btree.h"
-#include "lock.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/btree.h"
+#include "dbinc/lock.h"
+#include "dbinc/mp.h"
/*
* __bam_search --
* Search a btree for a key.
*
- * PUBLIC: int __bam_search __P((DBC *,
+ * PUBLIC: int __bam_search __P((DBC *, db_pgno_t,
* PUBLIC: const DBT *, u_int32_t, int, db_recno_t *, int *));
*/
int
-__bam_search(dbc, key, flags, stop, recnop, exactp)
+__bam_search(dbc, root_pgno, key, flags, stop, recnop, exactp)
DBC *dbc;
+ db_pgno_t root_pgno;
const DBT *key;
u_int32_t flags;
int stop, *exactp;
@@ -77,8 +79,9 @@ __bam_search(dbc, key, flags, stop, recnop, exactp)
BTREE_CURSOR *cp;
DB *dbp;
DB_LOCK lock;
+ DB_MPOOLFILE *mpf;
PAGE *h;
- db_indx_t base, i, indx, lim;
+ db_indx_t base, i, indx, *inp, lim;
db_lockmode_t lock_mode;
db_pgno_t pg;
db_recno_t recno;
@@ -86,6 +89,7 @@ __bam_search(dbc, key, flags, stop, recnop, exactp)
int (*func) __P((DB *, const DBT *, const DBT *));
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
t = dbp->bt_internal;
recno = 0;
@@ -109,12 +113,12 @@ __bam_search(dbc, key, flags, stop, recnop, exactp)
* Retrieve the root page.
*/
try_again:
- pg = cp->root;
+ pg = root_pgno == PGNO_INVALID ? cp->root : root_pgno;
stack = LF_ISSET(S_STACK) && F_ISSET(cp, C_RECNUM);
lock_mode = stack ? DB_LOCK_WRITE : DB_LOCK_READ;
if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
+ if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0) {
/* Did not read it, so we can release the lock */
(void)__LPUT(dbc, lock);
return (ret);
@@ -131,21 +135,21 @@ try_again:
if (!stack &&
((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
(LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
(void)__LPUT(dbc, lock);
lock_mode = DB_LOCK_WRITE;
if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
+ if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0) {
/* Did not read it, so we can release the lock */
(void)__LPUT(dbc, lock);
return (ret);
}
- if (!((LF_ISSET(S_PARENT)
- && (u_int8_t)(stop + 1) >= h->level) ||
+ if (!((LF_ISSET(S_PARENT) &&
+ (u_int8_t)(stop + 1) >= h->level) ||
(LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
/* Someone else split the root, start over. */
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
(void)__LPUT(dbc, lock);
goto try_again;
}
@@ -158,6 +162,7 @@ try_again:
t->bt_compare;
for (;;) {
+ inp = P_INP(dbp, h);
/*
* Do a binary search on the current page. If we're searching
* a Btree leaf page, we have to walk the indices in groups of
@@ -199,7 +204,7 @@ try_again:
if (LF_ISSET(S_STK_ONLY)) {
BT_STK_NUM(dbp->dbenv, cp, h, base, ret);
__LPUT(dbc, lock);
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
return (ret);
}
@@ -232,21 +237,21 @@ try_again:
*/
next: if (recnop != NULL)
for (i = 0; i < indx; ++i)
- recno += GET_BINTERNAL(h, i)->nrecs;
+ recno += GET_BINTERNAL(dbp, h, i)->nrecs;
- pg = GET_BINTERNAL(h, indx)->pgno;
+ pg = GET_BINTERNAL(dbp, h, indx)->pgno;
if (LF_ISSET(S_STK_ONLY)) {
if (stop == h->level) {
BT_STK_NUM(dbp->dbenv, cp, h, indx, ret);
__LPUT(dbc, lock);
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
return (ret);
}
BT_STK_NUMPUSH(dbp->dbenv, cp, h, indx, ret);
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
if ((ret = __db_lget(dbc,
- LCK_COUPLE, pg, lock_mode, 0, &lock)) != 0) {
+ LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) {
/*
* Discard our lock and return on failure. This
* is OK because it only happens when descending
@@ -284,12 +289,12 @@ next: if (recnop != NULL)
(h->level - 1) == LEAFLEVEL)
stack = 1;
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
lock_mode = stack &&
LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ;
if ((ret = __db_lget(dbc,
- LCK_COUPLE, pg, lock_mode, 0, &lock)) != 0) {
+ LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) {
/*
* If we fail, discard the lock we held. This
* is OK because this only happens when we are
@@ -299,7 +304,7 @@ next: if (recnop != NULL)
goto err;
}
}
- if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0)
+ if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0)
goto err;
}
/* NOTREACHED */
@@ -307,14 +312,6 @@ next: if (recnop != NULL)
found: *exactp = 1;
/*
- * If we're trying to calculate the record number, add in the
- * offset on this page and correct for the fact that records
- * in the tree are 0-based.
- */
- if (recnop != NULL)
- *recnop = recno + (indx / P_INDX) + 1;
-
- /*
* If we got here, we know that we have a Btree leaf or off-page
* duplicates page. If it's a Btree leaf page, we have to handle
* on-page duplicates.
@@ -327,11 +324,11 @@ found: *exactp = 1;
if (TYPE(h) == P_LBTREE) {
if (LF_ISSET(S_DUPLAST))
while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
- h->inp[indx] == h->inp[indx + P_INDX])
+ inp[indx] == inp[indx + P_INDX])
indx += P_INDX;
else
while (indx > 0 &&
- h->inp[indx] == h->inp[indx - P_INDX])
+ inp[indx] == inp[indx - P_INDX])
indx -= P_INDX;
}
@@ -341,32 +338,51 @@ found: *exactp = 1;
* not move from the original found key on the basis of the S_DELNO
* flag.)
*/
+ DB_ASSERT(recnop == NULL || LF_ISSET(S_DELNO));
if (LF_ISSET(S_DELNO)) {
deloffset = TYPE(h) == P_LBTREE ? O_INDX : 0;
if (LF_ISSET(S_DUPLAST))
- while (B_DISSET(GET_BKEYDATA(
+ while (B_DISSET(GET_BKEYDATA(dbp,
h, indx + deloffset)->type) && indx > 0 &&
- h->inp[indx] == h->inp[indx - adjust])
+ inp[indx] == inp[indx - adjust])
indx -= adjust;
else
- while (B_DISSET(GET_BKEYDATA(
+ while (B_DISSET(GET_BKEYDATA(dbp,
h, indx + deloffset)->type) &&
indx < (db_indx_t)(NUM_ENT(h) - adjust) &&
- h->inp[indx] == h->inp[indx + adjust])
+ inp[indx] == inp[indx + adjust])
indx += adjust;
/*
* If we weren't able to find a non-deleted duplicate, return
* DB_NOTFOUND.
*/
- if (B_DISSET(GET_BKEYDATA(h, indx + deloffset)->type))
+ if (B_DISSET(GET_BKEYDATA(dbp, h, indx + deloffset)->type))
goto notfound;
+
+ /*
+ * Increment the record counter to point to the found element.
+ * Ignore any deleted key/data pairs. There doesn't need to
+ * be any correction for duplicates, as Btree doesn't support
+ * duplicates and record numbers in the same tree.
+ */
+ if (recnop != NULL) {
+ DB_ASSERT(TYPE(h) == P_LBTREE);
+
+ for (i = 0; i < indx; i += P_INDX)
+ if (!B_DISSET(
+ GET_BKEYDATA(dbp, h, i + O_INDX)->type))
+ ++recno;
+
+ /* Correct the number for a 0-base. */
+ *recnop = recno + 1;
+ }
}
if (LF_ISSET(S_STK_ONLY)) {
BT_STK_NUM(dbp->dbenv, cp, h, indx, ret);
__LPUT(dbc, lock);
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
} else {
BT_STK_ENTER(dbp->dbenv, cp, h, indx, lock, lock_mode, ret);
if (ret != 0)
@@ -376,7 +392,7 @@ found: *exactp = 1;
notfound:
/* Keep the page locked for serializability. */
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
(void)__TLPUT(dbc, lock);
ret = DB_NOTFOUND;
@@ -398,10 +414,12 @@ __bam_stkrel(dbc, flags)
{
BTREE_CURSOR *cp;
DB *dbp;
+ DB_MPOOLFILE *mpf;
EPG *epg;
int ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
/*
@@ -414,10 +432,10 @@ __bam_stkrel(dbc, flags)
if (epg->page != NULL) {
if (LF_ISSET(STK_CLRDBC) && cp->page == epg->page) {
cp->page = NULL;
- cp->lock.off = LOCK_INVALID;
+ LOCK_INIT(cp->lock);
}
- if ((t_ret = memp_fput(
- dbp->mpf, epg->page, 0)) != 0 && ret == 0)
+ if ((t_ret =
+ __memp_fput(mpf, epg->page, 0)) != 0 && ret == 0)
ret = t_ret;
/*
* XXX
@@ -428,12 +446,10 @@ __bam_stkrel(dbc, flags)
*/
epg->page = NULL;
}
- if (epg->lock.off != LOCK_INVALID) {
- if (LF_ISSET(STK_NOLOCK))
- (void)__LPUT(dbc, epg->lock);
- else
- (void)__TLPUT(dbc, epg->lock);
- }
+ if (LF_ISSET(STK_NOLOCK))
+ (void)__LPUT(dbc, epg->lock);
+ else
+ (void)__TLPUT(dbc, epg->lock);
}
/* Clear the stack, all pages have been released. */
@@ -463,7 +479,7 @@ __bam_stkgrow(dbenv, cp)
return (ret);
memcpy(p, cp->sp, entries * sizeof(EPG));
if (cp->sp != cp->stack)
- __os_free(cp->sp, entries * sizeof(EPG));
+ __os_free(dbenv, cp->sp);
cp->sp = p;
cp->csp = p + entries;
cp->esp = p + entries * 2;
diff --git a/db/btree/bt_split.c b/db/btree/bt_split.c
index f76337b19..8c5066aed 100644
--- a/db/btree/bt_split.c
+++ b/db/btree/bt_split.c
@@ -1,7 +1,7 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
/*
@@ -40,7 +40,7 @@
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_split.c,v 11.31 2000/12/22 19:08:27 bostic Exp $";
+static const char revid[] = "$Id: bt_split.c,v 11.60 2003/06/30 17:19:35 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -51,10 +51,11 @@ static const char revid[] = "$Id: bt_split.c,v 11.31 2000/12/22 19:08:27 bostic
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/lock.h"
+#include "dbinc/mp.h"
+#include "dbinc/btree.h"
static int __bam_broot __P((DBC *, PAGE *, PAGE *, PAGE *));
static int __bam_page __P((DBC *, EPG *, EPG *));
@@ -67,21 +68,19 @@ static int __ram_root __P((DBC *, PAGE *, PAGE *, PAGE *));
* __bam_split --
* Split a page.
*
- * PUBLIC: int __bam_split __P((DBC *, void *));
+ * PUBLIC: int __bam_split __P((DBC *, void *, db_pgno_t *));
*/
int
-__bam_split(dbc, arg)
+__bam_split(dbc, arg, root_pgnop)
DBC *dbc;
void *arg;
+ db_pgno_t *root_pgnop;
{
- BTREE *t;
BTREE_CURSOR *cp;
- DB *dbp;
enum { UP, DOWN } dir;
db_pgno_t root_pgno;
int exact, level, ret;
- dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
root_pgno = cp->root;
@@ -112,17 +111,20 @@ __bam_split(dbc, arg)
* split. This would be an easy change for this code, but I have no
* numbers that indicate it's worthwhile.
*/
- t = dbp->bt_internal;
for (dir = UP, level = LEAFLEVEL;; dir == UP ? ++level : --level) {
/*
* Acquire a page and its parent, locked.
*/
if ((ret = (dbc->dbtype == DB_BTREE ?
- __bam_search(dbc, arg, S_WRPAIR, level, NULL, &exact) :
+ __bam_search(dbc, PGNO_INVALID,
+ arg, S_WRPAIR, level, NULL, &exact) :
__bam_rsearch(dbc,
(db_recno_t *)arg, S_WRPAIR, level, &exact))) != 0)
return (ret);
+ if (root_pgnop != NULL)
+ *root_pgnop = cp->csp[0].page->pgno == root_pgno ?
+ root_pgno : cp->csp[-1].page->pgno;
/*
* Split the page if it still needs it (it's possible another
* thread of control has already split the page). If we are
@@ -130,7 +132,7 @@ __bam_split(dbc, arg)
* is no longer necessary.
*/
if (2 * B_MAXSIZEONPAGE(cp->ovflsize)
- <= (db_indx_t)P_FREESPACE(cp->csp[0].page)) {
+ <= (db_indx_t)P_FREESPACE(dbc->dbp, cp->csp[0].page)) {
__bam_stkrel(dbc, STK_NOLOCK);
return (0);
}
@@ -178,12 +180,14 @@ __bam_root(dbc, cp)
DB *dbp;
DBT log_dbt;
DB_LSN log_lsn;
+ DB_MPOOLFILE *mpf;
PAGE *lp, *rp;
db_indx_t split;
u_int32_t opflags;
int ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
/* Yeah, right. */
if (cp->page->level >= MAXBTREELEVEL) {
@@ -210,21 +214,22 @@ __bam_root(dbc, cp)
goto err;
/* Log the change. */
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
memset(&log_dbt, 0, sizeof(log_dbt));
log_dbt.data = cp->page;
log_dbt.size = dbp->pgsize;
ZERO_LSN(log_lsn);
opflags = F_ISSET(
(BTREE_CURSOR *)dbc->internal, C_RECNUM) ? SPL_NRECS : 0;
- if ((ret = __bam_split_log(dbp->dbenv, dbc->txn,
- &LSN(cp->page), 0, dbp->log_fileid, PGNO(lp), &LSN(lp),
- PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp), 0, &log_lsn,
+ if ((ret = __bam_split_log(dbp,
+ dbc->txn, &LSN(cp->page), 0, PGNO(lp), &LSN(lp), PGNO(rp),
+ &LSN(rp), (u_int32_t)NUM_ENT(lp), 0, &log_lsn,
dbc->internal->root, &log_dbt, opflags)) != 0)
goto err;
- LSN(lp) = LSN(cp->page);
- LSN(rp) = LSN(cp->page);
- }
+ } else
+ LSN_NOT_LOGGED(LSN(cp->page));
+ LSN(lp) = LSN(cp->page);
+ LSN(rp) = LSN(cp->page);
/* Clean up the new root page. */
if ((ret = (dbc->dbtype == DB_RECNO ?
@@ -238,18 +243,18 @@ __bam_root(dbc, cp)
goto err;
/* Success -- write the real pages back to the store. */
- (void)memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY);
+ (void)__memp_fput(mpf, cp->page, DB_MPOOL_DIRTY);
(void)__TLPUT(dbc, cp->lock);
- (void)memp_fput(dbp->mpf, lp, DB_MPOOL_DIRTY);
- (void)memp_fput(dbp->mpf, rp, DB_MPOOL_DIRTY);
+ (void)__memp_fput(mpf, lp, DB_MPOOL_DIRTY);
+ (void)__memp_fput(mpf, rp, DB_MPOOL_DIRTY);
return (0);
err: if (lp != NULL)
- (void)__db_free(dbc, lp);
+ (void)__memp_fput(mpf, lp, 0);
if (rp != NULL)
- (void)__db_free(dbc, rp);
- (void)memp_fput(dbp->mpf, cp->page, 0);
+ (void)__memp_fput(mpf, rp, 0);
+ (void)__memp_fput(mpf, cp->page, 0);
(void)__TLPUT(dbc, cp->lock);
return (ret);
}
@@ -267,7 +272,8 @@ __bam_page(dbc, pp, cp)
DBT log_dbt;
DB_LSN log_lsn;
DB *dbp;
- DB_LOCK tplock;
+ DB_LOCK rplock, tplock;
+ DB_MPOOLFILE *mpf;
DB_LSN save_lsn;
PAGE *lp, *rp, *alloc_rp, *tp;
db_indx_t split;
@@ -275,8 +281,10 @@ __bam_page(dbc, pp, cp)
int ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
alloc_rp = lp = rp = tp = NULL;
- tplock.off = LOCK_INVALID;
+ LOCK_INIT(rplock);
+ LOCK_INIT(tplock);
ret = -1;
/*
@@ -296,7 +304,7 @@ __bam_page(dbc, pp, cp)
* up the tree badly, because we've violated the rule of always locking
* down the tree, and never up.
*/
- if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &rp)) != 0)
+ if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &rp)) != 0)
goto err;
P_INIT(rp, dbp->pgsize, 0,
ISINTERNAL(cp->page) ? PGNO_INVALID : PGNO(cp->page),
@@ -307,7 +315,7 @@ __bam_page(dbc, pp, cp)
* Create new left page for the split, and fill in everything
* except its LSN and next-page page number.
*/
- if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &lp)) != 0)
+ if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &lp)) != 0)
goto err;
P_INIT(lp, dbp->pgsize, PGNO(cp->page),
ISINTERNAL(cp->page) ? PGNO_INVALID : PREV_PGNO(cp->page),
@@ -351,8 +359,7 @@ __bam_page(dbc, pp, cp)
if ((ret = __db_lget(dbc,
0, NEXT_PGNO(cp->page), DB_LOCK_WRITE, 0, &tplock)) != 0)
goto err;
- if ((ret =
- memp_fget(dbp->mpf, &NEXT_PGNO(cp->page), 0, &tp)) != 0)
+ if ((ret = __memp_fget(mpf, &NEXT_PGNO(cp->page), 0, &tp)) != 0)
goto err;
}
@@ -364,6 +371,15 @@ __bam_page(dbc, pp, cp)
goto err;
/*
+ * Lock the new page. We need to do this because someone
+ * could get here through bt_lpgno if this page was recently
+ * dealocated. They can't look at it before we commit.
+ */
+ if ((ret = __db_lget(dbc,
+ 0, PGNO(alloc_rp), DB_LOCK_WRITE, 0, &rplock)) != 0)
+ goto err;
+
+ /*
* Fix up the page numbers we didn't have before. We have to do this
* before calling __bam_pinsert because it may copy a page number onto
* the parent page and it takes the page number from its page argument.
@@ -376,29 +392,30 @@ __bam_page(dbc, pp, cp)
bc = (BTREE_CURSOR *)dbc->internal;
/* Log the change. */
- if (DB_LOGGING(dbc)) {
+ if (DBC_LOGGING(dbc)) {
memset(&log_dbt, 0, sizeof(log_dbt));
log_dbt.data = cp->page;
log_dbt.size = dbp->pgsize;
if (tp == NULL)
ZERO_LSN(log_lsn);
opflags = F_ISSET(bc, C_RECNUM) ? SPL_NRECS : 0;
- if ((ret = __bam_split_log(dbp->dbenv, dbc->txn,
- &LSN(cp->page), 0, dbp->log_fileid, PGNO(cp->page),
- &LSN(cp->page), PGNO(alloc_rp), &LSN(alloc_rp),
- (u_int32_t)NUM_ENT(lp),
+ if ((ret = __bam_split_log(dbp, dbc->txn, &LSN(cp->page), 0,
+ PGNO(cp->page), &LSN(cp->page), PGNO(alloc_rp),
+ &LSN(alloc_rp), (u_int32_t)NUM_ENT(lp),
tp == NULL ? 0 : PGNO(tp),
tp == NULL ? &log_lsn : &LSN(tp),
- bc->root, &log_dbt, opflags)) != 0)
+ PGNO_INVALID, &log_dbt, opflags)) != 0)
goto err;
- /* Update the LSNs for all involved pages. */
- LSN(alloc_rp) = LSN(cp->page);
- LSN(lp) = LSN(cp->page);
- LSN(rp) = LSN(cp->page);
- if (tp != NULL)
- LSN(tp) = LSN(cp->page);
- }
+ } else
+ LSN_NOT_LOGGED(LSN(cp->page));
+
+ /* Update the LSNs for all involved pages. */
+ LSN(alloc_rp) = LSN(cp->page);
+ LSN(lp) = LSN(cp->page);
+ LSN(rp) = LSN(cp->page);
+ if (tp != NULL)
+ LSN(tp) = LSN(cp->page);
/*
* Copy the left and right pages into place. There are two paths
@@ -411,13 +428,13 @@ __bam_page(dbc, pp, cp)
* do the copy.
*/
save_lsn = alloc_rp->lsn;
- memcpy(alloc_rp, rp, LOFFSET(rp));
+ memcpy(alloc_rp, rp, LOFFSET(dbp, rp));
memcpy((u_int8_t *)alloc_rp + HOFFSET(rp),
(u_int8_t *)rp + HOFFSET(rp), dbp->pgsize - HOFFSET(rp));
alloc_rp->lsn = save_lsn;
save_lsn = cp->page->lsn;
- memcpy(cp->page, lp, LOFFSET(lp));
+ memcpy(cp->page, lp, LOFFSET(dbp, lp));
memcpy((u_int8_t *)cp->page + HOFFSET(lp),
(u_int8_t *)lp + HOFFSET(lp), dbp->pgsize - HOFFSET(lp));
cp->page->lsn = save_lsn;
@@ -431,8 +448,8 @@ __bam_page(dbc, pp, cp)
PGNO(cp->page), PGNO(cp->page), PGNO(rp), split, 0)) != 0)
goto err;
- __os_free(lp, dbp->pgsize);
- __os_free(rp, dbp->pgsize);
+ __os_free(dbp->dbenv, lp);
+ __os_free(dbp->dbenv, rp);
/*
* Success -- write the real pages back to the store. As we never
@@ -441,44 +458,45 @@ __bam_page(dbc, pp, cp)
* modifying the page so it's not really necessary, but it's neater.
*/
if ((t_ret =
- memp_fput(dbp->mpf, alloc_rp, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+ __memp_fput(mpf, alloc_rp, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
+ (void)__TLPUT(dbc, rplock);
if ((t_ret =
- memp_fput(dbp->mpf, pp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+ __memp_fput(mpf, pp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
(void)__TLPUT(dbc, pp->lock);
if ((t_ret =
- memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+ __memp_fput(mpf, cp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
(void)__TLPUT(dbc, cp->lock);
if (tp != NULL) {
if ((t_ret =
- memp_fput(dbp->mpf, tp, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+ __memp_fput(mpf, tp, DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
(void)__TLPUT(dbc, tplock);
}
return (ret);
err: if (lp != NULL)
- __os_free(lp, dbp->pgsize);
+ __os_free(dbp->dbenv, lp);
if (rp != NULL)
- __os_free(rp, dbp->pgsize);
+ __os_free(dbp->dbenv, rp);
if (alloc_rp != NULL)
- (void)__db_free(dbc, alloc_rp);
-
+ (void)__memp_fput(mpf, alloc_rp, 0);
if (tp != NULL)
- (void)memp_fput(dbp->mpf, tp, 0);
- if (tplock.off != LOCK_INVALID)
- /* We never updated the next page, we can release it. */
- (void)__LPUT(dbc, tplock);
+ (void)__memp_fput(mpf, tp, 0);
+
+ /* We never updated the new or next pages, we can release them. */
+ (void)__LPUT(dbc, rplock);
+ (void)__LPUT(dbc, tplock);
- (void)memp_fput(dbp->mpf, pp->page, 0);
+ (void)__memp_fput(mpf, pp->page, 0);
if (ret == DB_NEEDSPLIT)
(void)__LPUT(dbc, pp->lock);
else
(void)__TLPUT(dbc, pp->lock);
- (void)memp_fput(dbp->mpf, cp->page, 0);
+ (void)__memp_fput(mpf, cp->page, 0);
if (ret == DB_NEEDSPLIT)
(void)__LPUT(dbc, cp->lock);
else
@@ -529,7 +547,7 @@ __bam_broot(dbc, rootp, lp, rp)
B_TSET(bi.type, B_KEYDATA, 0);
bi.pgno = lp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
- bi.nrecs = __bam_total(lp);
+ bi.nrecs = __bam_total(dbp, lp);
RE_NREC_SET(rootp, bi.nrecs);
}
hdr.data = &bi;
@@ -541,13 +559,13 @@ __bam_broot(dbc, rootp, lp, rp)
switch (TYPE(rp)) {
case P_IBTREE:
/* Copy the first key of the child page onto the root page. */
- child_bi = GET_BINTERNAL(rp, 0);
+ child_bi = GET_BINTERNAL(dbp, rp, 0);
bi.len = child_bi->len;
B_TSET(bi.type, child_bi->type, 0);
bi.pgno = rp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
- bi.nrecs = __bam_total(rp);
+ bi.nrecs = __bam_total(dbp, rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
hdr.data = &bi;
@@ -567,14 +585,14 @@ __bam_broot(dbc, rootp, lp, rp)
case P_LDUP:
case P_LBTREE:
/* Copy the first key of the child page onto the root page. */
- child_bk = GET_BKEYDATA(rp, 0);
+ child_bk = GET_BKEYDATA(dbp, rp, 0);
switch (B_TYPE(child_bk->type)) {
case B_KEYDATA:
bi.len = child_bk->len;
B_TSET(bi.type, child_bk->type, 0);
bi.pgno = rp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
- bi.nrecs = __bam_total(rp);
+ bi.nrecs = __bam_total(dbp, rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
hdr.data = &bi;
@@ -591,7 +609,7 @@ __bam_broot(dbc, rootp, lp, rp)
B_TSET(bi.type, child_bk->type, 0);
bi.pgno = rp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
- bi.nrecs = __bam_total(rp);
+ bi.nrecs = __bam_total(dbp, rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
hdr.data = &bi;
@@ -609,11 +627,11 @@ __bam_broot(dbc, rootp, lp, rp)
return (ret);
break;
default:
- return (__db_pgfmt(dbp, rp->pgno));
+ return (__db_pgfmt(dbp->dbenv, rp->pgno));
}
break;
default:
- return (__db_pgfmt(dbp, rp->pgno));
+ return (__db_pgfmt(dbp->dbenv, rp->pgno));
}
return (0);
}
@@ -647,12 +665,12 @@ __ram_root(dbc, rootp, lp, rp)
/* Insert the left and right keys, set the header information. */
ri.pgno = lp->pgno;
- ri.nrecs = __bam_total(lp);
+ ri.nrecs = __bam_total(dbp, lp);
if ((ret = __db_pitem(dbc, rootp, 0, RINTERNAL_SIZE, &hdr, NULL)) != 0)
return (ret);
RE_NREC_SET(rootp, ri.nrecs);
ri.pgno = rp->pgno;
- ri.nrecs = __bam_total(rp);
+ ri.nrecs = __bam_total(dbp, rp);
if ((ret = __db_pitem(dbc, rootp, 1, RINTERNAL_SIZE, &hdr, NULL)) != 0)
return (ret);
RE_NREC_ADJ(rootp, ri.nrecs);
@@ -690,7 +708,8 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check)
ppage = parent->page;
/* If handling record numbers, count records split to the right page. */
- nrecs = F_ISSET(cp, C_RECNUM) && !space_check ? __bam_total(rchild) : 0;
+ nrecs = F_ISSET(cp, C_RECNUM) &&
+ !space_check ? __bam_total(dbp, rchild) : 0;
/*
* Now we insert the new page's first key into the parent page, which
@@ -721,10 +740,10 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check)
*/
switch (TYPE(rchild)) {
case P_IBTREE:
- child_bi = GET_BINTERNAL(rchild, 0);
+ child_bi = GET_BINTERNAL(dbp, rchild, 0);
nbytes = BINTERNAL_PSIZE(child_bi->len);
- if (P_FREESPACE(ppage) < nbytes)
+ if (P_FREESPACE(dbp, ppage) < nbytes)
return (DB_NEEDSPLIT);
if (space_check)
return (0);
@@ -753,7 +772,7 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check)
break;
case P_LDUP:
case P_LBTREE:
- child_bk = GET_BKEYDATA(rchild, 0);
+ child_bk = GET_BKEYDATA(dbp, rchild, 0);
switch (B_TYPE(child_bk->type)) {
case B_KEYDATA:
/*
@@ -783,7 +802,7 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check)
goto noprefix;
if (ppage->prev_pgno == PGNO_INVALID && off <= 1)
goto noprefix;
- tmp_bk = GET_BKEYDATA(lchild, NUM_ENT(lchild) -
+ tmp_bk = GET_BKEYDATA(dbp, lchild, NUM_ENT(lchild) -
(TYPE(lchild) == P_LDUP ? O_INDX : P_INDX));
if (B_TYPE(tmp_bk->type) != B_KEYDATA)
goto noprefix;
@@ -793,13 +812,13 @@ __bam_pinsert(dbc, parent, lchild, rchild, space_check)
memset(&b, 0, sizeof(b));
b.size = child_bk->len;
b.data = child_bk->data;
- nksize = func(dbp, &a, &b);
+ nksize = (u_int32_t)func(dbp, &a, &b);
if ((n = BINTERNAL_PSIZE(nksize)) < nbytes)
nbytes = n;
else
noprefix: nksize = child_bk->len;
- if (P_FREESPACE(ppage) < nbytes)
+ if (P_FREESPACE(dbp, ppage) < nbytes)
return (DB_NEEDSPLIT);
if (space_check)
return (0);
@@ -823,7 +842,7 @@ noprefix: nksize = child_bk->len;
case B_OVERFLOW:
nbytes = BINTERNAL_PSIZE(BOVERFLOW_SIZE);
- if (P_FREESPACE(ppage) < nbytes)
+ if (P_FREESPACE(dbp, ppage) < nbytes)
return (DB_NEEDSPLIT);
if (space_check)
return (0);
@@ -850,14 +869,14 @@ noprefix: nksize = child_bk->len;
return (ret);
break;
default:
- return (__db_pgfmt(dbp, rchild->pgno));
+ return (__db_pgfmt(dbp->dbenv, rchild->pgno));
}
break;
case P_IRECNO:
case P_LRECNO:
nbytes = RINTERNAL_PSIZE;
- if (P_FREESPACE(ppage) < nbytes)
+ if (P_FREESPACE(dbp, ppage) < nbytes)
return (DB_NEEDSPLIT);
if (space_check)
return (0);
@@ -873,7 +892,7 @@ noprefix: nksize = child_bk->len;
return (ret);
break;
default:
- return (__db_pgfmt(dbp, rchild->pgno));
+ return (__db_pgfmt(dbp->dbenv, rchild->pgno));
}
/*
@@ -882,17 +901,19 @@ noprefix: nksize = child_bk->len;
*/
if (F_ISSET(cp, C_RECNUM)) {
/* Log the change. */
- if (DB_LOGGING(dbc) &&
- (ret = __bam_cadjust_log(dbp->dbenv, dbc->txn,
- &LSN(ppage), 0, dbp->log_fileid, PGNO(ppage),
+ if (DBC_LOGGING(dbc)) {
+ if ((ret = __bam_cadjust_log(dbp, dbc->txn,
+ &LSN(ppage), 0, PGNO(ppage),
&LSN(ppage), parent->indx, -(int32_t)nrecs, 0)) != 0)
return (ret);
+ } else
+ LSN_NOT_LOGGED(LSN(ppage));
/* Update the left page count. */
if (dbc->dbtype == DB_RECNO)
- GET_RINTERNAL(ppage, parent->indx)->nrecs -= nrecs;
+ GET_RINTERNAL(dbp, ppage, parent->indx)->nrecs -= nrecs;
else
- GET_BINTERNAL(ppage, parent->indx)->nrecs -= nrecs;
+ GET_BINTERNAL(dbp, ppage, parent->indx)->nrecs -= nrecs;
}
return (0);
@@ -911,28 +932,52 @@ __bam_psplit(dbc, cp, lp, rp, splitret)
{
DB *dbp;
PAGE *pp;
- db_indx_t half, nbytes, off, splitp, top;
+ db_indx_t half, *inp, nbytes, off, splitp, top;
int adjust, cnt, iflag, isbigkey, ret;
dbp = dbc->dbp;
pp = cp->page;
+ inp = P_INP(dbp, pp);
adjust = TYPE(pp) == P_LBTREE ? P_INDX : O_INDX;
/*
* If we're splitting the first (last) page on a level because we're
* inserting (appending) a key to it, it's likely that the data is
* sorted. Moving a single item to the new page is less work and can
- * push the fill factor higher than normal. If we're wrong it's not
- * a big deal, we'll just do the split the right way next time.
+ * push the fill factor higher than normal. This is trivial when we
+ * are splitting a new page before the beginning of the tree, all of
+ * the interesting tests are against values of 0.
+ *
+ * Catching appends to the tree is harder. In a simple append, we're
+ * inserting an item that sorts past the end of the tree; the cursor
+ * will point past the last element on the page. But, in trees with
+ * duplicates, the cursor may point to the last entry on the page --
+ * in this case, the entry will also be the last element of a duplicate
+ * set (the last because the search call specified the S_DUPLAST flag).
+ * The only way to differentiate between an insert immediately before
+ * the last item in a tree or an append after a duplicate set which is
+ * also the last item in the tree is to call the comparison function.
+ * When splitting internal pages during an append, the search code
+ * guarantees the cursor always points to the largest page item less
+ * than the new internal entry. To summarize, we want to catch three
+ * possible index values:
+ *
+ * NUM_ENT(page) Btree/Recno leaf insert past end-of-tree
+ * NUM_ENT(page) - O_INDX Btree or Recno internal insert past EOT
+ * NUM_ENT(page) - P_INDX Btree leaf insert past EOT after a set
+ * of duplicates
+ *
+ * two of which, (NUM_ENT(page) - O_INDX or P_INDX) might be an insert
+ * near the end of the tree, and not after the end of the tree at all.
+ * Do a simple test which might be wrong because calling the comparison
+ * functions is expensive. Regardless, it's not a big deal if we're
+ * wrong, we'll do the split the right way next time.
*/
off = 0;
- if (NEXT_PGNO(pp) == PGNO_INVALID &&
- ((ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page) - 1) ||
- (!ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page))))
- off = NUM_ENT(cp->page) - adjust;
+ if (NEXT_PGNO(pp) == PGNO_INVALID && cp->indx >= NUM_ENT(pp) - adjust)
+ off = NUM_ENT(pp) - adjust;
else if (PREV_PGNO(pp) == PGNO_INVALID && cp->indx == 0)
off = adjust;
-
if (off != 0)
goto sort;
@@ -962,16 +1007,18 @@ __bam_psplit(dbc, cp, lp, rp, splitret)
for (nbytes = 0, off = 0; off < top && nbytes < half; ++off)
switch (TYPE(pp)) {
case P_IBTREE:
- if (B_TYPE(GET_BINTERNAL(pp, off)->type) == B_KEYDATA)
- nbytes +=
- BINTERNAL_SIZE(GET_BINTERNAL(pp, off)->len);
+ if (B_TYPE(
+ GET_BINTERNAL(dbp, pp, off)->type) == B_KEYDATA)
+ nbytes += BINTERNAL_SIZE(
+ GET_BINTERNAL(dbp, pp, off)->len);
else
nbytes += BINTERNAL_SIZE(BOVERFLOW_SIZE);
break;
case P_LBTREE:
- if (B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA)
- nbytes +=
- BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
+ if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
+ B_KEYDATA)
+ nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp,
+ pp, off)->len);
else
nbytes += BOVERFLOW_SIZE;
@@ -979,9 +1026,10 @@ __bam_psplit(dbc, cp, lp, rp, splitret)
/* FALLTHROUGH */
case P_LDUP:
case P_LRECNO:
- if (B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA)
- nbytes +=
- BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
+ if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
+ B_KEYDATA)
+ nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp,
+ pp, off)->len);
else
nbytes += BOVERFLOW_SIZE;
break;
@@ -989,7 +1037,7 @@ __bam_psplit(dbc, cp, lp, rp, splitret)
nbytes += RINTERNAL_SIZE;
break;
default:
- return (__db_pgfmt(dbp, pp->pgno));
+ return (__db_pgfmt(dbp->dbenv, pp->pgno));
}
sort: splitp = off;
@@ -1002,12 +1050,14 @@ sort: splitp = off;
switch (TYPE(pp)) {
case P_IBTREE:
iflag = 1;
- isbigkey = B_TYPE(GET_BINTERNAL(pp, off)->type) != B_KEYDATA;
+ isbigkey =
+ B_TYPE(GET_BINTERNAL(dbp, pp, off)->type) != B_KEYDATA;
break;
case P_LBTREE:
case P_LDUP:
iflag = 0;
- isbigkey = B_TYPE(GET_BKEYDATA(pp, off)->type) != B_KEYDATA;
+ isbigkey = B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) !=
+ B_KEYDATA;
break;
default:
iflag = isbigkey = 0;
@@ -1016,18 +1066,20 @@ sort: splitp = off;
for (cnt = 1; cnt <= 3; ++cnt) {
off = splitp + cnt * adjust;
if (off < (db_indx_t)NUM_ENT(pp) &&
- ((iflag &&
- B_TYPE(GET_BINTERNAL(pp,off)->type) == B_KEYDATA) ||
- B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA)) {
+ ((iflag && B_TYPE(
+ GET_BINTERNAL(dbp, pp,off)->type) == B_KEYDATA) ||
+ B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
+ B_KEYDATA)) {
splitp = off;
break;
}
if (splitp <= (db_indx_t)(cnt * adjust))
continue;
off = splitp - cnt * adjust;
- if (iflag ?
- B_TYPE(GET_BINTERNAL(pp, off)->type) == B_KEYDATA :
- B_TYPE(GET_BKEYDATA(pp, off)->type) == B_KEYDATA) {
+ if (iflag ? B_TYPE(
+ GET_BINTERNAL(dbp, pp, off)->type) == B_KEYDATA :
+ B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
+ B_KEYDATA) {
splitp = off;
break;
}
@@ -1040,18 +1092,18 @@ sort: splitp = off;
* page set. So, this loop can't be unbounded.
*/
if (TYPE(pp) == P_LBTREE &&
- pp->inp[splitp] == pp->inp[splitp - adjust])
+ inp[splitp] == inp[splitp - adjust])
for (cnt = 1;; ++cnt) {
off = splitp + cnt * adjust;
if (off < NUM_ENT(pp) &&
- pp->inp[splitp] != pp->inp[off]) {
+ inp[splitp] != inp[off]) {
splitp = off;
break;
}
if (splitp <= (db_indx_t)(cnt * adjust))
continue;
off = splitp - cnt * adjust;
- if (pp->inp[splitp] != pp->inp[off]) {
+ if (inp[splitp] != inp[off]) {
splitp = off + adjust;
break;
}
@@ -1079,18 +1131,20 @@ __bam_copy(dbp, pp, cp, nxt, stop)
PAGE *pp, *cp;
u_int32_t nxt, stop;
{
- db_indx_t nbytes, off;
+ db_indx_t *cinp, nbytes, off, *pinp;
+ cinp = P_INP(dbp, cp);
+ pinp = P_INP(dbp, pp);
/*
- * Copy the rest of the data to the right page. Nxt is the next
- * offset placed on the target page.
+ * Nxt is the offset of the next record to be placed on the target page.
*/
for (off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) {
switch (TYPE(pp)) {
case P_IBTREE:
- if (B_TYPE(GET_BINTERNAL(pp, nxt)->type) == B_KEYDATA)
- nbytes =
- BINTERNAL_SIZE(GET_BINTERNAL(pp, nxt)->len);
+ if (B_TYPE(
+ GET_BINTERNAL(dbp, pp, nxt)->type) == B_KEYDATA)
+ nbytes = BINTERNAL_SIZE(
+ GET_BINTERNAL(dbp, pp, nxt)->len);
else
nbytes = BINTERNAL_SIZE(BOVERFLOW_SIZE);
break;
@@ -1100,16 +1154,17 @@ __bam_copy(dbp, pp, cp, nxt, stop)
* the offset.
*/
if (off != 0 && (nxt % P_INDX) == 0 &&
- pp->inp[nxt] == pp->inp[nxt - P_INDX]) {
- cp->inp[off] = cp->inp[off - P_INDX];
+ pinp[nxt] == pinp[nxt - P_INDX]) {
+ cinp[off] = cinp[off - P_INDX];
continue;
}
/* FALLTHROUGH */
case P_LDUP:
case P_LRECNO:
- if (B_TYPE(GET_BKEYDATA(pp, nxt)->type) == B_KEYDATA)
- nbytes =
- BKEYDATA_SIZE(GET_BKEYDATA(pp, nxt)->len);
+ if (B_TYPE(GET_BKEYDATA(dbp, pp, nxt)->type) ==
+ B_KEYDATA)
+ nbytes = BKEYDATA_SIZE(GET_BKEYDATA(dbp,
+ pp, nxt)->len);
else
nbytes = BOVERFLOW_SIZE;
break;
@@ -1117,10 +1172,10 @@ __bam_copy(dbp, pp, cp, nxt, stop)
nbytes = RINTERNAL_SIZE;
break;
default:
- return (__db_pgfmt(dbp, pp->pgno));
+ return (__db_pgfmt(dbp->dbenv, pp->pgno));
}
- cp->inp[off] = HOFFSET(cp) -= nbytes;
- memcpy(P_ENTRY(cp, off), P_ENTRY(pp, nxt), nbytes);
+ cinp[off] = HOFFSET(cp) -= nbytes;
+ memcpy(P_ENTRY(dbp, cp, off), P_ENTRY(dbp, pp, nxt), nbytes);
}
return (0);
}
diff --git a/db/btree/bt_stat.c b/db/btree/bt_stat.c
index 349bb40cf..0e8cff37f 100644
--- a/db/btree/bt_stat.c
+++ b/db/btree/bt_stat.c
@@ -1,14 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_stat.c,v 11.29 2000/11/28 21:42:27 bostic Exp $";
+static const char revid[] = "$Id: bt_stat.c,v 11.61 2003/09/13 18:52:21 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -18,102 +18,76 @@ static const char revid[] = "$Id: bt_stat.c,v 11.29 2000/11/28 21:42:27 bostic E
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_shash.h"
-#include "lock.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/btree.h"
+#include "dbinc/lock.h"
+#include "dbinc/mp.h"
/*
* __bam_stat --
* Gather/print the btree statistics
*
- * PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t));
+ * PUBLIC: int __bam_stat __P((DBC *, void *, u_int32_t));
*/
int
-__bam_stat(dbp, spp, db_malloc, flags)
- DB *dbp;
+__bam_stat(dbc, spp, flags)
+ DBC *dbc;
void *spp;
- void *(*db_malloc) __P((size_t));
u_int32_t flags;
{
BTMETA *meta;
BTREE *t;
BTREE_CURSOR *cp;
- DBC *dbc;
+ DB *dbp;
DB_BTREE_STAT *sp;
+ DB_ENV *dbenv;
DB_LOCK lock, metalock;
+ DB_MPOOLFILE *mpf;
PAGE *h;
db_pgno_t pgno;
- int ret, t_ret;
+ int ret, t_ret, write_meta;
- PANIC_CHECK(dbp->dbenv);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat");
+ dbp = dbc->dbp;
+ dbenv = dbp->dbenv;
meta = NULL;
t = dbp->bt_internal;
sp = NULL;
- metalock.off = lock.off = LOCK_INVALID;
+ LOCK_INIT(metalock);
+ LOCK_INIT(lock);
+ mpf = dbp->mpf;
h = NULL;
- ret = 0;
-
- /* Check for invalid flags. */
- if ((ret = __db_statchk(dbp, flags)) != 0)
- return (ret);
+ ret = write_meta = 0;
- /* Acquire a cursor. */
- if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
- return (ret);
cp = (BTREE_CURSOR *)dbc->internal;
- DEBUG_LWRITE(dbc, NULL, "bam_stat", NULL, NULL, flags);
-
/* Allocate and clear the structure. */
- if ((ret = __os_malloc(dbp->dbenv, sizeof(*sp), db_malloc, &sp)) != 0)
+ if ((ret = __os_umalloc(dbenv, sizeof(*sp), &sp)) != 0)
goto err;
memset(sp, 0, sizeof(*sp));
- /* If the app just wants the record count, make it fast. */
- if (flags == DB_RECORDCOUNT) {
- if ((ret = __db_lget(dbc, 0,
- cp->root, DB_LOCK_READ, 0, &lock)) != 0)
- goto err;
- if ((ret = memp_fget(dbp->mpf,
- &cp->root, 0, (PAGE **)&h)) != 0)
- goto err;
-
- sp->bt_nkeys = RE_NREC(h);
-
- goto done;
- }
- if (flags == DB_CACHED_COUNTS) {
- if ((ret = __db_lget(dbc,
- 0, t->bt_meta, DB_LOCK_READ, 0, &lock)) != 0)
- goto err;
- if ((ret =
- memp_fget(dbp->mpf, &t->bt_meta, 0, (PAGE **)&meta)) != 0)
- goto err;
- sp->bt_nkeys = meta->dbmeta.key_count;
- sp->bt_ndata = meta->dbmeta.record_count;
-
- goto done;
- }
-
/* Get the metadata page for the entire database. */
pgno = PGNO_BASE_MD;
if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &metalock)) != 0)
goto err;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, (PAGE **)&meta)) != 0)
+ if ((ret = __memp_fget(mpf, &pgno, 0, &meta)) != 0)
goto err;
+ if (flags == DB_RECORDCOUNT || flags == DB_CACHED_COUNTS)
+ flags = DB_FAST_STAT;
+ if (flags == DB_FAST_STAT)
+ goto meta_only;
+
/* Walk the metadata free list, counting pages. */
for (sp->bt_free = 0, pgno = meta->dbmeta.free; pgno != PGNO_INVALID;) {
++sp->bt_free;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if ((ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
goto err;
pgno = h->next_pgno;
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ if ((ret = __memp_fput(mpf, h, 0)) != 0)
goto err;
h = NULL;
}
@@ -122,14 +96,14 @@ __bam_stat(dbp, spp, db_malloc, flags)
pgno = cp->root;
if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lock)) != 0)
goto err;
- if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if ((ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
goto err;
/* Get the levels from the root page. */
sp->bt_levels = h->level;
/* Discard the root page. */
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ if ((ret = __memp_fput(mpf, h, 0)) != 0)
goto err;
h = NULL;
__LPUT(dbc, lock);
@@ -143,20 +117,36 @@ __bam_stat(dbp, spp, db_malloc, flags)
* Get the subdatabase metadata page if it's not the same as the
* one we already have.
*/
- if (t->bt_meta != PGNO_BASE_MD || !F_ISSET(dbp, DB_AM_RDONLY)) {
- if ((ret = memp_fput(dbp->mpf, meta, 0)) != 0)
+ write_meta = !F_ISSET(dbp, DB_AM_RDONLY);
+meta_only:
+ if (t->bt_meta != PGNO_BASE_MD || write_meta != 0) {
+ if ((ret = __memp_fput(mpf, meta, 0)) != 0)
goto err;
meta = NULL;
__LPUT(dbc, metalock);
if ((ret = __db_lget(dbc,
- 0, t->bt_meta, F_ISSET(dbp, DB_AM_RDONLY) ?
+ 0, t->bt_meta, write_meta == 0 ?
DB_LOCK_READ : DB_LOCK_WRITE, 0, &metalock)) != 0)
goto err;
- if ((ret =
- memp_fget(dbp->mpf, &t->bt_meta, 0, (PAGE **)&meta)) != 0)
+ if ((ret = __memp_fget(mpf, &t->bt_meta, 0, &meta)) != 0)
goto err;
}
+ if (flags == DB_FAST_STAT) {
+ if (dbp->type == DB_RECNO ||
+ (dbp->type == DB_BTREE && F_ISSET(dbp, DB_AM_RECNUM))) {
+ if ((ret = __db_lget(dbc, 0,
+ cp->root, DB_LOCK_READ, 0, &lock)) != 0)
+ goto err;
+ if ((ret =
+ __memp_fget(mpf, &cp->root, 0, (PAGE **)&h)) != 0)
+ goto err;
+
+ sp->bt_nkeys = RE_NREC(h);
+ } else
+ sp->bt_nkeys = meta->dbmeta.key_count;
+ sp->bt_ndata = meta->dbmeta.record_count;
+ }
/* Get metadata page statistics. */
sp->bt_metaflags = meta->dbmeta.flags;
@@ -167,38 +157,29 @@ __bam_stat(dbp, spp, db_malloc, flags)
sp->bt_pagesize = meta->dbmeta.pagesize;
sp->bt_magic = meta->dbmeta.magic;
sp->bt_version = meta->dbmeta.version;
- if (!F_ISSET(dbp, DB_AM_RDONLY)) {
+
+ if (write_meta != 0) {
meta->dbmeta.key_count = sp->bt_nkeys;
meta->dbmeta.record_count = sp->bt_ndata;
}
- /* Discard the metadata page. */
- if ((ret = memp_fput(dbp->mpf,
- meta, F_ISSET(dbp, DB_AM_RDONLY) ? 0 : DB_MPOOL_DIRTY)) != 0)
- goto err;
- meta = NULL;
- __LPUT(dbc, metalock);
-
-done: *(DB_BTREE_STAT **)spp = sp;
-
- if (0) {
-err: if (sp != NULL)
- __os_free(sp, sizeof(*sp));
- }
+ *(DB_BTREE_STAT **)spp = sp;
- if (h != NULL &&
- (t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0)
+err: /* Discard the second page. */
+ __LPUT(dbc, lock);
+ if (h != NULL && (t_ret = __memp_fput(mpf, h, 0)) != 0 && ret == 0)
ret = t_ret;
- if (meta != NULL &&
- (t_ret = memp_fput(dbp->mpf, meta, 0)) != 0 && ret == 0)
+ /* Discard the metadata page. */
+ __LPUT(dbc, metalock);
+ if (meta != NULL && (t_ret = __memp_fput(
+ mpf, meta, write_meta == 0 ? 0 : DB_MPOOL_DIRTY)) != 0 && ret == 0)
ret = t_ret;
- if (lock.off != LOCK_INVALID)
- __LPUT(dbc, lock);
-
- if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
+ if (ret != 0 && sp != NULL) {
+ __os_ufree(dbenv, sp);
+ *(DB_BTREE_STAT **)spp = NULL;
+ }
return (ret);
}
@@ -222,22 +203,27 @@ __bam_traverse(dbc, mode, root_pgno, callback, cookie)
BKEYDATA *bk;
DB *dbp;
DB_LOCK lock;
+ DB_MPOOLFILE *mpf;
PAGE *h;
RINTERNAL *ri;
db_indx_t indx;
int already_put, ret, t_ret;
dbp = dbc->dbp;
+ mpf = dbp->mpf;
+ already_put = 0;
if ((ret = __db_lget(dbc, 0, root_pgno, mode, 0, &lock)) != 0)
return (ret);
- if ((ret = memp_fget(dbp->mpf, &root_pgno, 0, &h)) != 0)
- goto err;
+ if ((ret = __memp_fget(mpf, &root_pgno, 0, &h)) != 0) {
+ __LPUT(dbc, lock);
+ return (ret);
+ }
switch (TYPE(h)) {
case P_IBTREE:
for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) {
- bi = GET_BINTERNAL(h, indx);
+ bi = GET_BINTERNAL(dbp, h, indx);
if (B_TYPE(bi->type) == B_OVERFLOW &&
(ret = __db_traverse_big(dbp,
((BOVERFLOW *)bi->data)->pgno,
@@ -245,34 +231,34 @@ __bam_traverse(dbc, mode, root_pgno, callback, cookie)
goto err;
if ((ret = __bam_traverse(
dbc, mode, bi->pgno, callback, cookie)) != 0)
- break;
+ goto err;
}
break;
case P_IRECNO:
for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) {
- ri = GET_RINTERNAL(h, indx);
+ ri = GET_RINTERNAL(dbp, h, indx);
if ((ret = __bam_traverse(
dbc, mode, ri->pgno, callback, cookie)) != 0)
- break;
+ goto err;
}
break;
case P_LBTREE:
for (indx = 0; indx < NUM_ENT(h); indx += P_INDX) {
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
if (B_TYPE(bk->type) == B_OVERFLOW &&
(ret = __db_traverse_big(dbp,
- GET_BOVERFLOW(h, indx)->pgno,
+ GET_BOVERFLOW(dbp, h, indx)->pgno,
callback, cookie)) != 0)
goto err;
- bk = GET_BKEYDATA(h, indx + O_INDX);
+ bk = GET_BKEYDATA(dbp, h, indx + O_INDX);
if (B_TYPE(bk->type) == B_DUPLICATE &&
(ret = __bam_traverse(dbc, mode,
- GET_BOVERFLOW(h, indx + O_INDX)->pgno,
+ GET_BOVERFLOW(dbp, h, indx + O_INDX)->pgno,
callback, cookie)) != 0)
goto err;
if (B_TYPE(bk->type) == B_OVERFLOW &&
(ret = __db_traverse_big(dbp,
- GET_BOVERFLOW(h, indx + O_INDX)->pgno,
+ GET_BOVERFLOW(dbp, h, indx + O_INDX)->pgno,
callback, cookie)) != 0)
goto err;
}
@@ -280,22 +266,21 @@ __bam_traverse(dbc, mode, root_pgno, callback, cookie)
case P_LDUP:
case P_LRECNO:
for (indx = 0; indx < NUM_ENT(h); indx += O_INDX) {
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
if (B_TYPE(bk->type) == B_OVERFLOW &&
(ret = __db_traverse_big(dbp,
- GET_BOVERFLOW(h, indx)->pgno,
+ GET_BOVERFLOW(dbp, h, indx)->pgno,
callback, cookie)) != 0)
goto err;
}
break;
+ default:
+ return (__db_pgfmt(dbp->dbenv, h->pgno));
}
- already_put = 0;
- if ((ret = callback(dbp, h, cookie, &already_put)) != 0)
- goto err;
+ ret = callback(dbp, h, cookie, &already_put);
-err: if (!already_put &&
- (t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret != 0)
+err: if (!already_put && (t_ret = __memp_fput(mpf, h, 0)) != 0 && ret != 0)
ret = t_ret;
__LPUT(dbc, lock);
@@ -316,33 +301,40 @@ __bam_stat_callback(dbp, h, cookie, putp)
int *putp;
{
DB_BTREE_STAT *sp;
- db_indx_t indx, top;
+ db_indx_t indx, *inp, top;
u_int8_t type;
sp = cookie;
*putp = 0;
top = NUM_ENT(h);
+ inp = P_INP(dbp, h);
switch (TYPE(h)) {
case P_IBTREE:
case P_IRECNO:
++sp->bt_int_pg;
- sp->bt_int_pgfree += P_FREESPACE(h);
+ sp->bt_int_pgfree += P_FREESPACE(dbp, h);
break;
case P_LBTREE:
/* Correct for on-page duplicates and deleted items. */
for (indx = 0; indx < top; indx += P_INDX) {
+ type = GET_BKEYDATA(dbp, h, indx + O_INDX)->type;
+ /* Ignore deleted items. */
+ if (B_DISSET(type))
+ continue;
+
+ /* Ignore duplicate keys. */
if (indx + P_INDX >= top ||
- h->inp[indx] != h->inp[indx + P_INDX])
+ inp[indx] != inp[indx + P_INDX])
++sp->bt_nkeys;
- type = GET_BKEYDATA(h, indx + O_INDX)->type;
- if (!B_DISSET(type) && B_TYPE(type) != B_DUPLICATE)
+ /* Ignore off-page duplicates. */
+ if (B_TYPE(type) != B_DUPLICATE)
++sp->bt_ndata;
}
++sp->bt_leaf_pg;
- sp->bt_leaf_pgfree += P_FREESPACE(h);
+ sp->bt_leaf_pgfree += P_FREESPACE(dbp, h);
break;
case P_LRECNO:
/*
@@ -356,39 +348,39 @@ __bam_stat_callback(dbp, h, cookie, putp)
* Correct for deleted items in non-renumbering
* Recno databases.
*/
- if (F_ISSET(dbp, DB_RE_RENUMBER))
+ if (F_ISSET(dbp, DB_AM_RENUMBER))
sp->bt_ndata += top;
else
for (indx = 0; indx < top; indx += O_INDX) {
- type = GET_BKEYDATA(h, indx)->type;
+ type = GET_BKEYDATA(dbp, h, indx)->type;
if (!B_DISSET(type))
++sp->bt_ndata;
}
++sp->bt_leaf_pg;
- sp->bt_leaf_pgfree += P_FREESPACE(h);
+ sp->bt_leaf_pgfree += P_FREESPACE(dbp, h);
} else {
sp->bt_ndata += top;
++sp->bt_dup_pg;
- sp->bt_dup_pgfree += P_FREESPACE(h);
+ sp->bt_dup_pgfree += P_FREESPACE(dbp, h);
}
break;
case P_LDUP:
/* Correct for deleted items. */
for (indx = 0; indx < top; indx += O_INDX)
- if (!B_DISSET(GET_BKEYDATA(h, indx)->type))
+ if (!B_DISSET(GET_BKEYDATA(dbp, h, indx)->type))
++sp->bt_ndata;
++sp->bt_dup_pg;
- sp->bt_dup_pgfree += P_FREESPACE(h);
+ sp->bt_dup_pgfree += P_FREESPACE(dbp, h);
break;
case P_OVERFLOW:
++sp->bt_over_pg;
- sp->bt_over_pgfree += P_OVFLSPACE(dbp->pgsize, h);
+ sp->bt_over_pgfree += P_OVFLSPACE(dbp, dbp->pgsize, h);
break;
default:
- return (__db_pgfmt(dbp, h->pgno));
+ return (__db_pgfmt(dbp->dbenv, h->pgno));
}
return (0);
}
@@ -398,38 +390,26 @@ __bam_stat_callback(dbp, h, cookie, putp)
* Return proportion of keys relative to given key. The numbers are
* slightly skewed due to on page duplicates.
*
- * PUBLIC: int __bam_key_range __P((DB *,
- * PUBLIC: DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t));
+ * PUBLIC: int __bam_key_range __P((DBC *, DBT *, DB_KEY_RANGE *, u_int32_t));
*/
int
-__bam_key_range(dbp, txn, dbt, kp, flags)
- DB *dbp;
- DB_TXN *txn;
+__bam_key_range(dbc, dbt, kp, flags)
+ DBC *dbc;
DBT *dbt;
DB_KEY_RANGE *kp;
u_int32_t flags;
{
BTREE_CURSOR *cp;
- DBC *dbc;
EPG *sp;
double factor;
- int exact, ret, t_ret;
+ int exact, ret;
- PANIC_CHECK(dbp->dbenv);
- DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->key_range");
+ COMPQUIET(flags, 0);
- if (flags != 0)
- return (__db_ferr(dbp->dbenv, "DB->key_range", 0));
-
- /* Acquire a cursor. */
- if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
+ if ((ret = __bam_search(dbc, PGNO_INVALID,
+ dbt, S_STK_ONLY, 1, NULL, &exact)) != 0)
return (ret);
- DEBUG_LWRITE(dbc, NULL, "bam_key_range", NULL, NULL, 0);
-
- if ((ret = __bam_search(dbc, dbt, S_STK_ONLY, 1, NULL, &exact)) != 0)
- goto err;
-
cp = (BTREE_CURSOR *)dbc->internal;
kp->less = kp->greater = 0.0;
@@ -453,7 +433,7 @@ __bam_key_range(dbp, txn, dbt, kp, flags)
else {
kp->less += factor * sp->indx / sp->entries;
kp->greater += factor *
- (sp->entries - sp->indx - 1) / sp->entries;
+ ((sp->entries - sp->indx) - 1) / sp->entries;
}
factor *= 1.0/sp->entries;
}
@@ -473,8 +453,5 @@ __bam_key_range(dbp, txn, dbt, kp, flags)
BT_STK_CLR(cp);
-err: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
- ret = t_ret;
-
- return (ret);
+ return (0);
}
diff --git a/db/btree/bt_upgrade.c b/db/btree/bt_upgrade.c
index 4032dba3b..71ee84222 100644
--- a/db/btree/bt_upgrade.c
+++ b/db/btree/bt_upgrade.c
@@ -1,28 +1,25 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_upgrade.c,v 11.19 2000/11/30 00:58:29 ubell Exp $";
+static const char revid[] = "$Id: bt_upgrade.c,v 11.29 2003/05/18 18:10:11 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
#include <sys/types.h>
-#include <limits.h>
#include <string.h>
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_swap.h"
-#include "btree.h"
-#include "db_am.h"
-#include "db_upgrade.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_upgrade.h"
+#include "dbinc/btree.h"
/*
* __bam_30_btreemeta --
@@ -107,7 +104,7 @@ __bam_31_btreemeta(dbp, real_name, flags, fhp, h, dirtyp)
newmeta->minkey = oldmeta->minkey;
newmeta->maxkey = oldmeta->maxkey;
memmove(newmeta->dbmeta.uid,
- oldmeta->dbmeta.uid, sizeof(oldmeta->dbmeta.uid));
+ oldmeta->dbmeta.uid, sizeof(oldmeta->dbmeta.uid));
newmeta->dbmeta.flags = oldmeta->dbmeta.flags;
newmeta->dbmeta.record_count = 0;
newmeta->dbmeta.key_count = 0;
@@ -126,7 +123,7 @@ __bam_31_btreemeta(dbp, real_name, flags, fhp, h, dirtyp)
/*
* __bam_31_lbtree --
- * Upgrade the database btree leaf pages.
+ * Upgrade the database btree leaf pages.
*
* PUBLIC: int __bam_31_lbtree
* PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *));
@@ -147,15 +144,15 @@ __bam_31_lbtree(dbp, real_name, flags, fhp, h, dirtyp)
ret = 0;
for (indx = O_INDX; indx < NUM_ENT(h); indx += P_INDX) {
- bk = GET_BKEYDATA(h, indx);
+ bk = GET_BKEYDATA(dbp, h, indx);
if (B_TYPE(bk->type) == B_DUPLICATE) {
- pgno = GET_BOVERFLOW(h, indx)->pgno;
+ pgno = GET_BOVERFLOW(dbp, h, indx)->pgno;
if ((ret = __db_31_offdup(dbp, real_name, fhp,
LF_ISSET(DB_DUPSORT) ? 1 : 0, &pgno)) != 0)
break;
- if (pgno != GET_BOVERFLOW(h, indx)->pgno) {
+ if (pgno != GET_BOVERFLOW(dbp, h, indx)->pgno) {
*dirtyp = 1;
- GET_BOVERFLOW(h, indx)->pgno = pgno;
+ GET_BOVERFLOW(dbp, h, indx)->pgno = pgno;
}
}
}
diff --git a/db/btree/bt_verify.c b/db/btree/bt_verify.c
index 9f8647e7e..cd8c57a4d 100644
--- a/db/btree/bt_verify.c
+++ b/db/btree/bt_verify.c
@@ -1,16 +1,16 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1999, 2000
+ * Copyright (c) 1999-2003
* Sleepycat Software. All rights reserved.
*
- * $Id: bt_verify.c,v 1.44 2000/12/06 19:55:44 ubell Exp $
+ * $Id: bt_verify.c,v 1.87 2003/10/06 14:09:23 bostic Exp $
*/
#include "db_config.h"
#ifndef lint
-static const char revid[] = "$Id: bt_verify.c,v 1.44 2000/12/06 19:55:44 ubell Exp $";
+static const char revid[] = "$Id: bt_verify.c,v 1.87 2003/10/06 14:09:23 bostic Exp $";
#endif /* not lint */
#ifndef NO_SYSTEM_INCLUDES
@@ -20,9 +20,11 @@ static const char revid[] = "$Id: bt_verify.c,v 1.44 2000/12/06 19:55:44 ubell E
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_verify.h"
-#include "btree.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_shash.h"
+#include "dbinc/db_verify.h"
+#include "dbinc/btree.h"
+#include "dbinc/mp.h"
static int __bam_safe_getdata __P((DB *, PAGE *, u_int32_t, int, DBT *, int *));
static int __bam_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
@@ -49,15 +51,17 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
db_pgno_t pgno;
u_int32_t flags;
{
+ DB_ENV *dbenv;
VRFY_PAGEINFO *pip;
int isbad, t_ret, ret;
db_indx_t ovflsize;
+ dbenv = dbp->dbenv;
+ isbad = 0;
+
if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
return (ret);
- isbad = 0;
-
/*
* If VRFY_INCOMPLETE is not set, then we didn't come through
* __db_vrfy_pagezero and didn't incompletely
@@ -79,19 +83,19 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
/* avoid division by zero */
ovflsize = meta->minkey > 0 ?
- B_MINKEY_TO_OVFLSIZE(meta->minkey, dbp->pgsize) : 0;
+ B_MINKEY_TO_OVFLSIZE(dbp, meta->minkey, dbp->pgsize) : 0;
if (meta->minkey < 2 ||
- ovflsize > B_MINKEY_TO_OVFLSIZE(DEFMINKEYPAGE, dbp->pgsize)) {
+ ovflsize > B_MINKEY_TO_OVFLSIZE(dbp, DEFMINKEYPAGE, dbp->pgsize)) {
pip->bt_minkey = 0;
isbad = 1;
- EPRINT((dbp->dbenv,
- "Nonsensical bt_minkey value %lu on metadata page %lu",
- (u_long)meta->minkey, (u_long)pgno));
+ EPRINT((dbenv,
+ "Page %lu: nonsensical bt_minkey value %lu on metadata page",
+ (u_long)pgno, (u_long)meta->minkey));
} else
pip->bt_minkey = meta->minkey;
- /* bt_maxkey: no constraints (XXX: right?) */
+ /* bt_maxkey: unsupported so no constraints. */
pip->bt_maxkey = meta->maxkey;
/* re_len: no constraints on this (may be zero or huge--we make rope) */
@@ -103,13 +107,13 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
* of the file, then the root page had better be page 1.
*/
pip->root = 0;
- if (meta->root == PGNO_INVALID
- || meta->root == pgno || !IS_VALID_PGNO(meta->root) ||
+ if (meta->root == PGNO_INVALID ||
+ meta->root == pgno || !IS_VALID_PGNO(meta->root) ||
(pgno == PGNO_BASE_MD && meta->root != 1)) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Nonsensical root page %lu on metadata page %lu",
- (u_long)meta->root, (u_long)vdp->last_pgno));
+ EPRINT((dbenv,
+ "Page %lu: nonsensical root page %lu on metadata page",
+ (u_long)pgno, (u_long)meta->root));
} else
pip->root = meta->root;
@@ -124,8 +128,8 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
*/
if (F_ISSET(&meta->dbmeta, BTM_DUP) && pgno == PGNO_BASE_MD) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Btree metadata page %lu has both duplicates and multiple databases",
+ EPRINT((dbenv,
+"Page %lu: Btree metadata page has both duplicates and multiple databases",
(u_long)pgno));
}
F_SET(pip, VRFY_HAS_SUBDBS);
@@ -138,8 +142,8 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
if (F_ISSET(&meta->dbmeta, BTM_RECNUM))
F_SET(pip, VRFY_HAS_RECNUMS);
if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_DUPS)) {
- EPRINT((dbp->dbenv,
- "Btree metadata page %lu illegally has both recnums and dups",
+ EPRINT((dbenv,
+ "Page %lu: Btree metadata page illegally has both recnums and dups",
(u_long)pgno));
isbad = 1;
}
@@ -149,14 +153,14 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
dbp->type = DB_RECNO;
} else if (F_ISSET(pip, VRFY_IS_RRECNO)) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Metadata page %lu has renumber flag set but is not recno",
+ EPRINT((dbenv,
+ "Page %lu: metadata page has renumber flag set but is not recno",
(u_long)pgno));
}
if (F_ISSET(pip, VRFY_IS_RECNO) && F_ISSET(pip, VRFY_HAS_DUPS)) {
- EPRINT((dbp->dbenv,
- "Recno metadata page %lu specifies duplicates",
+ EPRINT((dbenv,
+ "Page %lu: recno metadata page specifies duplicates",
(u_long)pgno));
isbad = 1;
}
@@ -169,9 +173,9 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
* database
*/
isbad = 1;
- EPRINT((dbp->dbenv,
- "re_len of %lu in non-fixed-length database",
- (u_long)pip->re_len));
+ EPRINT((dbenv,
+ "Page %lu: re_len of %lu in non-fixed-length database",
+ (u_long)pgno, (u_long)pip->re_len));
}
/*
@@ -179,7 +183,7 @@ __bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
* not be and may still be correct.
*/
-err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+err: if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
}
@@ -200,22 +204,24 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
u_int32_t flags;
{
BKEYDATA *bk;
+ DB_ENV *dbenv;
VRFY_PAGEINFO *pip;
db_indx_t i;
int ret, t_ret, isbad;
u_int32_t re_len_guess, len;
+ dbenv = dbp->dbenv;
isbad = 0;
+
if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
return (ret);
- if ((ret = __db_fchk(dbp->dbenv,
- "__ram_vrfy_leaf", flags, OKFLAGS)) != 0)
+ if ((ret = __db_fchk(dbenv, "__ram_vrfy_leaf", flags, OKFLAGS)) != 0)
goto err;
if (TYPE(h) != P_LRECNO) {
/* We should not have been called. */
- TYPE_ERR_PRINT(dbp->dbenv, "__ram_vrfy_leaf", pgno, TYPE(h));
+ TYPE_ERR_PRINT(dbenv, "__ram_vrfy_leaf", pgno, TYPE(h));
DB_ASSERT(0);
ret = EINVAL;
goto err;
@@ -241,8 +247,8 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
goto err;
if (F_ISSET(pip, VRFY_HAS_DUPS)) {
- EPRINT((dbp->dbenv,
- "Recno database has dups on page %lu", (u_long)pgno));
+ EPRINT((dbenv,
+ "Page %lu: Recno database has dups", (u_long)pgno));
ret = DB_VERIFY_BAD;
goto err;
}
@@ -255,7 +261,7 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
*/
re_len_guess = 0;
for (i = 0; i < NUM_ENT(h); i++) {
- bk = GET_BKEYDATA(h, i);
+ bk = GET_BKEYDATA(dbp, h, i);
/* KEYEMPTY. Go on. */
if (B_DISSET(bk->type))
continue;
@@ -265,9 +271,9 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
len = bk->len;
else {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Nonsensical type for item %lu, page %lu",
- (u_long)i, (u_long)pgno));
+ EPRINT((dbenv,
+ "Page %lu: nonsensical type for item %lu",
+ (u_long)pgno, (u_long)i));
continue;
}
if (re_len_guess == 0)
@@ -288,9 +294,9 @@ __ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
/* Save off record count. */
pip->rec_cnt = NUM_ENT(h);
-err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+err: if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : 0);
+ return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
}
/*
@@ -308,10 +314,13 @@ __bam_vrfy(dbp, vdp, h, pgno, flags)
db_pgno_t pgno;
u_int32_t flags;
{
+ DB_ENV *dbenv;
VRFY_PAGEINFO *pip;
int ret, t_ret, isbad;
+ dbenv = dbp->dbenv;
isbad = 0;
+
if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
return (ret);
@@ -322,7 +331,7 @@ __bam_vrfy(dbp, vdp, h, pgno, flags)
case P_LDUP:
break;
default:
- TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy", pgno, TYPE(h));
+ TYPE_ERR_PRINT(dbenv, "__bam_vrfy", pgno, TYPE(h));
DB_ASSERT(0);
ret = EINVAL;
goto err;
@@ -361,8 +370,8 @@ __bam_vrfy(dbp, vdp, h, pgno, flags)
isbad = 1;
else
goto err;
- EPRINT((dbp->dbenv,
- "item order check on page %lu unsafe: skipping",
+ EPRINT((dbenv,
+ "Page %lu: item order check unsafe: skipping",
(u_long)pgno));
} else if (!LF_ISSET(DB_NOORDERCHK) && (ret =
__bam_vrfy_itemorder(dbp, vdp, h, pgno, 0, 0, 0, flags)) != 0) {
@@ -377,9 +386,9 @@ __bam_vrfy(dbp, vdp, h, pgno, flags)
goto err;
}
-err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+err: if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
- return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : 0);
+ return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
}
/*
@@ -398,13 +407,16 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
db_indx_t *nentriesp;
u_int32_t flags;
{
+ DB_ENV *dbenv;
RINTERNAL *ri;
VRFY_CHILDINFO child;
VRFY_PAGEINFO *pip;
int ret, t_ret, isbad;
u_int32_t himark, i, offset, nentries;
+ db_indx_t *inp;
u_int8_t *pagelayout, *p;
+ dbenv = dbp->dbenv;
isbad = 0;
memset(&child, 0, sizeof(VRFY_CHILDINFO));
nentries = 0;
@@ -414,38 +426,38 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
return (ret);
if (TYPE(h) != P_IRECNO) {
- TYPE_ERR_PRINT(dbp->dbenv, "__ram_vrfy_inp", pgno, TYPE(h));
+ TYPE_ERR_PRINT(dbenv, "__ram_vrfy_inp", pgno, TYPE(h));
DB_ASSERT(0);
ret = EINVAL;
goto err;
}
himark = dbp->pgsize;
- if ((ret =
- __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &pagelayout)) != 0)
+ if ((ret = __os_malloc(dbenv, dbp->pgsize, &pagelayout)) != 0)
goto err;
memset(pagelayout, 0, dbp->pgsize);
+ inp = P_INP(dbp, h);
for (i = 0; i < NUM_ENT(h); i++) {
- if ((u_int8_t *)h->inp + i >= (u_int8_t *)h + himark) {
- EPRINT((dbp->dbenv,
- "Page %lu entries listing %lu overlaps data",
+ if ((u_int8_t *)inp + i >= (u_int8_t *)h + himark) {
+ EPRINT((dbenv,
+ "Page %lu: entries listing %lu overlaps data",
(u_long)pgno, (u_long)i));
ret = DB_VERIFY_BAD;
goto err;
}
- offset = h->inp[i];
+ offset = inp[i];
/*
* Check that the item offset is reasonable: it points
* somewhere after the inp array and before the end of the
* page.
*/
- if (offset <= (u_int32_t)((u_int8_t *)h->inp + i -
+ if (offset <= (u_int32_t)((u_int8_t *)inp + i -
(u_int8_t *)h) ||
offset > (u_int32_t)(dbp->pgsize - RINTERNAL_SIZE)) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Bad offset %lu at page %lu index %lu",
- (u_long)offset, (u_long)pgno, (u_long)i));
+ EPRINT((dbenv,
+ "Page %lu: bad offset %lu at index %lu",
+ (u_long)pgno, (u_long)offset, (u_long)i));
continue;
}
@@ -456,7 +468,7 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
nentries++;
/* Make sure this RINTERNAL is not multiply referenced. */
- ri = GET_RINTERNAL(h, i);
+ ri = GET_RINTERNAL(dbp, h, i);
if (pagelayout[offset] == 0) {
pagelayout[offset] = 1;
child.pgno = ri->pgno;
@@ -465,9 +477,9 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
if ((ret = __db_vrfy_childput(vdp, pgno, &child)) != 0)
goto err;
} else {
- EPRINT((dbp->dbenv,
- "RINTERNAL structure at offset %lu, page %lu referenced twice",
- (u_long)offset, (u_long)pgno));
+ EPRINT((dbenv,
+ "Page %lu: RINTERNAL structure at offset %lu referenced twice",
+ (u_long)pgno, (u_long)offset));
isbad = 1;
}
}
@@ -476,24 +488,25 @@ __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
p < pagelayout + dbp->pgsize;
p += RINTERNAL_SIZE)
if (*p != 1) {
- EPRINT((dbp->dbenv,
- "Gap between items at offset %lu, page %lu",
- (u_long)(p - pagelayout), (u_long)pgno));
+ EPRINT((dbenv,
+ "Page %lu: gap between items at offset %lu",
+ (u_long)pgno, (u_long)(p - pagelayout)));
isbad = 1;
}
if ((db_indx_t)himark != HOFFSET(h)) {
- EPRINT((dbp->dbenv, "Bad HOFFSET %lu, appears to be %lu",
- (u_long)(HOFFSET(h)), (u_long)himark));
+ EPRINT((dbenv,
+ "Page %lu: bad HOFFSET %lu, appears to be %lu",
+ (u_long)pgno, (u_long)(HOFFSET(h)), (u_long)himark));
isbad = 1;
}
*nentriesp = nentries;
-err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+err: if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
if (pagelayout != NULL)
- __os_free(pagelayout, dbp->pgsize);
+ __os_free(dbenv, pagelayout);
return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
}
@@ -513,6 +526,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
{
BKEYDATA *bk;
BOVERFLOW *bo;
+ DB_ENV *dbenv;
VRFY_CHILDINFO child;
VRFY_PAGEINFO *pip;
int isbad, initem, isdupitem, ret, t_ret;
@@ -520,6 +534,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
u_int32_t i, endoff, nentries;
u_int8_t *pagelayout;
+ dbenv = dbp->dbenv;
isbad = isdupitem = 0;
nentries = 0;
memset(&child, 0, sizeof(VRFY_CHILDINFO));
@@ -540,7 +555,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
*/
if (LF_ISSET(DB_SALVAGE))
break;
- TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy_inp", pgno, TYPE(h));
+ TYPE_ERR_PRINT(dbenv, "__bam_vrfy_inp", pgno, TYPE(h));
DB_ASSERT(0);
ret = EINVAL;
goto err;
@@ -558,22 +573,24 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
* it and the region immediately after it.
*/
himark = dbp->pgsize;
- if ((ret = __os_malloc(dbp->dbenv,
- dbp->pgsize, NULL, &pagelayout)) != 0)
+ if ((ret = __os_malloc(dbenv, dbp->pgsize, &pagelayout)) != 0)
goto err;
memset(pagelayout, 0, dbp->pgsize);
for (i = 0; i < NUM_ENT(h); i++) {
-
- ret = __db_vrfy_inpitem(dbp,
- h, pgno, i, 1, flags, &himark, &offset);
- if (ret == DB_VERIFY_BAD) {
+ switch (ret = __db_vrfy_inpitem(dbp,
+ h, pgno, i, 1, flags, &himark, &offset)) {
+ case 0:
+ break;
+ case DB_VERIFY_BAD:
isbad = 1;
continue;
- } else if (ret == DB_VERIFY_FATAL) {
+ case DB_VERIFY_FATAL:
isbad = 1;
goto err;
- } else if (ret != 0)
- DB_ASSERT(0);
+ default:
+ DB_ASSERT(ret != 0);
+ break;
+ }
/*
* We now have a plausible beginning for the item, and we know
@@ -582,7 +599,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
* Mark the beginning and end in pagelayout so we can make sure
* items have no overlaps or gaps.
*/
- bk = GET_BKEYDATA(h, i);
+ bk = GET_BKEYDATA(dbp, h, i);
#define ITEM_BEGIN 1
#define ITEM_END 2
if (pagelayout[offset] == 0)
@@ -608,9 +625,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
isdupitem = 1;
} else {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Duplicated item %lu on page %lu",
- (u_long)i, (u_long)pgno));
+ EPRINT((dbenv, "Page %lu: duplicated item %lu",
+ (u_long)pgno, (u_long)i));
}
}
@@ -621,7 +637,7 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
* If the end already has a sign other than 0, do nothing--
* it's an overlap that we'll catch later.
*/
- switch(B_TYPE(bk->type)) {
+ switch (B_TYPE(bk->type)) {
case B_KEYDATA:
if (TYPE(h) == P_IBTREE)
/* It's a BINTERNAL. */
@@ -661,9 +677,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
* the end had better coincide too.
*/
if (isdupitem && pagelayout[endoff] != ITEM_END) {
- EPRINT((dbp->dbenv,
- "Duplicated item %lu on page %lu",
- (u_long)i, (u_long)pgno));
+ EPRINT((dbenv, "Page %lu: duplicated item %lu",
+ (u_long)pgno, (u_long)i));
isbad = 1;
} else if (pagelayout[endoff] == 0)
pagelayout[endoff] = ITEM_END;
@@ -675,9 +690,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
*/
if (B_DISSET(bk->type) && TYPE(h) != P_LRECNO) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Item %lu on page %lu marked deleted",
- (u_long)i, (u_long)pgno));
+ EPRINT((dbenv, "Page %lu: item %lu marked deleted",
+ (u_long)pgno, (u_long)i));
}
/*
@@ -695,14 +709,14 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
case B_DUPLICATE:
if (TYPE(h) == P_IBTREE) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Duplicate page referenced by internal btree page %lu at item %lu",
+ EPRINT((dbenv,
+ "Page %lu: duplicate page referenced by internal btree page at item %lu",
(u_long)pgno, (u_long)i));
break;
} else if (TYPE(h) == P_LRECNO) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Duplicate page referenced by recno page %lu at item %lu",
+ EPRINT((dbenv,
+ "Page %lu: duplicate page referenced by recno page at item %lu",
(u_long)pgno, (u_long)i));
break;
}
@@ -716,10 +730,10 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
/* Make sure tlen is reasonable. */
if (bo->tlen > dbp->pgsize * vdp->last_pgno) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Impossible tlen %lu, item %lu, page %lu",
- (u_long)bo->tlen, (u_long)i,
- (u_long)pgno));
+ EPRINT((dbenv,
+ "Page %lu: impossible tlen %lu, item %lu",
+ (u_long)pgno,
+ (u_long)bo->tlen, (u_long)i));
/* Don't save as a child. */
break;
}
@@ -727,9 +741,9 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
if (!IS_VALID_PGNO(bo->pgno) || bo->pgno == pgno ||
bo->pgno == PGNO_INVALID) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Offpage item %lu, page %lu has bad pgno",
- (u_long)i, (u_long)pgno));
+ EPRINT((dbenv,
+ "Page %lu: offpage item %lu has bad pgno %lu",
+ (u_long)pgno, (u_long)i, (u_long)bo->pgno));
/* Don't save as a child. */
break;
}
@@ -743,9 +757,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
break;
default:
isbad = 1;
- EPRINT((dbp->dbenv,
- "Item %lu on page %lu of invalid type %lu",
- (u_long)i, (u_long)pgno));
+ EPRINT((dbenv, "Page %lu: item %lu of invalid type %lu",
+ (u_long)pgno, (u_long)i, (u_long)B_TYPE(bk->type)));
break;
}
}
@@ -764,8 +777,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
continue;
isbad = 1;
- EPRINT((dbp->dbenv,
- "Gap between items, page %lu offset %lu",
+ EPRINT((dbenv,
+ "Page %lu: gap between items at offset %lu",
(u_long)pgno, (u_long)i));
/* Find the end of the gap */
for ( ; pagelayout[i + 1] == 0 &&
@@ -776,9 +789,9 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
/* We've found an item. Check its alignment. */
if (i != ALIGN(i, sizeof(u_int32_t))) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Offset %lu page %lu unaligned",
- (u_long)i, (u_long)pgno));
+ EPRINT((dbenv,
+ "Page %lu: offset %lu unaligned",
+ (u_long)pgno, (u_long)i));
}
initem = 1;
nentries++;
@@ -790,8 +803,8 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
* be an overlap.
*/
isbad = 1;
- EPRINT((dbp->dbenv,
- "Overlapping items, page %lu offset %lu",
+ EPRINT((dbenv,
+ "Page %lu: overlapping items at offset %lu",
(u_long)pgno, (u_long)i));
break;
default:
@@ -815,25 +828,25 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
* end. Overlap.
*/
isbad = 1;
- EPRINT((dbp->dbenv,
- "Overlapping items, page %lu offset %lu",
+ EPRINT((dbenv,
+ "Page %lu: overlapping items at offset %lu",
(u_long)pgno, (u_long)i));
break;
}
- (void)__os_free(pagelayout, dbp->pgsize);
+ __os_free(dbenv, pagelayout);
/* Verify HOFFSET. */
if ((db_indx_t)himark != HOFFSET(h)) {
- EPRINT((dbp->dbenv, "Bad HOFFSET %lu, appears to be %lu",
- (u_long)HOFFSET(h), (u_long)himark));
+ EPRINT((dbenv, "Page %lu: bad HOFFSET %lu, appears to be %lu",
+ (u_long)pgno, (u_long)HOFFSET(h), (u_long)himark));
isbad = 1;
}
err: if (nentriesp != NULL)
*nentriesp = nentries;
- if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+ if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
@@ -865,14 +878,15 @@ __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags)
int ovflok, hasdups;
u_int32_t flags;
{
- DBT dbta, dbtb, dup1, dup2, *p1, *p2, *tmp;
- BTREE *bt;
BINTERNAL *bi;
BKEYDATA *bk;
BOVERFLOW *bo;
+ BTREE *bt;
+ DBT dbta, dbtb, dup_1, dup_2, *p1, *p2, *tmp;
+ DB_ENV *dbenv;
VRFY_PAGEINFO *pip;
db_indx_t i;
- int cmp, freedup1, freedup2, isbad, ret, t_ret;
+ int cmp, freedup_1, freedup_2, isbad, ret, t_ret;
int (*dupfunc) __P((DB *, const DBT *, const DBT *));
int (*func) __P((DB *, const DBT *, const DBT *));
void *buf1, *buf2, *tmpbuf;
@@ -889,6 +903,7 @@ __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags)
} else
pip = NULL;
+ dbenv = dbp->dbenv;
ret = isbad = 0;
bo = NULL; /* Shut up compiler. */
@@ -949,7 +964,7 @@ __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags)
*/
switch (TYPE(h)) {
case P_IBTREE:
- bi = GET_BINTERNAL(h, i);
+ bi = GET_BINTERNAL(dbp, h, i);
if (B_TYPE(bi->type) == B_OVERFLOW) {
bo = (BOVERFLOW *)(bi->data);
goto overflow;
@@ -971,15 +986,15 @@ __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags)
#if 0
if (i == 0 && bi->len != 0) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Lowest key on internal page %lu of nonzero length",
+ EPRINT((dbenv,
+ "Page %lu: lowest key on internal page of nonzero length",
(u_long)pgno));
}
#endif
break;
case P_LBTREE:
case P_LDUP:
- bk = GET_BKEYDATA(h, i);
+ bk = GET_BKEYDATA(dbp, h, i);
if (B_TYPE(bk->type) == B_OVERFLOW) {
bo = (BOVERFLOW *)bk;
goto overflow;
@@ -993,7 +1008,7 @@ __bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags)
* This means our caller screwed up and sent us
* an inappropriate page.
*/
- TYPE_ERR_PRINT(dbp->dbenv,
+ TYPE_ERR_PRINT(dbenv,
"__bam_vrfy_itemorder", pgno, TYPE(h))
DB_ASSERT(0);
ret = EINVAL;
@@ -1029,9 +1044,9 @@ overflow: if (!ovflok) {
if ((ret = __db_goff(dbp,
p2, bo->tlen, bo->pgno, NULL, NULL)) != 0) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Error %lu in fetching overflow item %lu, page %lu",
- (u_long)ret, (u_long)i, (u_long)pgno));
+ EPRINT((dbenv,
+ "Page %lu: error %lu in fetching overflow item %lu",
+ (u_long)pgno, (u_long)ret, (u_long)i));
}
/* In case it got realloc'ed and thus changed. */
buf2 = p2->data;
@@ -1044,8 +1059,8 @@ overflow: if (!ovflok) {
/* comparison succeeded */
if (cmp > 0) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Out-of-order key, page %lu item %lu",
+ EPRINT((dbenv,
+ "Page %lu: out-of-order key at entry %lu",
(u_long)pgno, (u_long)i));
/* proceed */
} else if (cmp == 0) {
@@ -1059,8 +1074,8 @@ overflow: if (!ovflok) {
F_SET(pip, VRFY_HAS_DUPS);
else if (hasdups == 0) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Database with no duplicates has duplicated keys on page %lu",
+ EPRINT((dbenv,
+ "Page %lu: database with no duplicates has duplicated keys",
(u_long)pgno));
}
@@ -1092,11 +1107,11 @@ overflow: if (!ovflok) {
* dups are probably (?) rare.
*/
if (((ret = __bam_safe_getdata(dbp,
- h, i - 1, ovflok, &dup1,
- &freedup1)) != 0) ||
+ h, i - 1, ovflok, &dup_1,
+ &freedup_1)) != 0) ||
((ret = __bam_safe_getdata(dbp,
- h, i + 1, ovflok, &dup2,
- &freedup2)) != 0))
+ h, i + 1, ovflok, &dup_2,
+ &freedup_2)) != 0))
goto err;
/*
@@ -1105,8 +1120,8 @@ overflow: if (!ovflok) {
* it's not safe to chase them now.
* Mark an incomplete and return.
*/
- if (dup1.data == NULL ||
- dup2.data == NULL) {
+ if (dup_1.data == NULL ||
+ dup_2.data == NULL) {
DB_ASSERT(!ovflok);
F_SET(pip, VRFY_INCOMPLETE);
goto err;
@@ -1118,26 +1133,26 @@ overflow: if (!ovflok) {
* until we do the structure check
* and see whether DUPSORT is set.
*/
- if (dupfunc(dbp, &dup1, &dup2) > 0)
+ if (dupfunc(dbp, &dup_1, &dup_2) > 0)
F_SET(pip, VRFY_DUPS_UNSORTED);
- if (freedup1)
- __os_free(dup1.data, 0);
- if (freedup2)
- __os_free(dup2.data, 0);
+ if (freedup_1)
+ __os_ufree(dbenv, dup_1.data);
+ if (freedup_2)
+ __os_ufree(dbenv, dup_2.data);
}
}
}
}
-err: if (pip != NULL &&
- ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0) && ret == 0)
+err: if (pip != NULL && ((t_ret =
+ __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0) && ret == 0)
ret = t_ret;
if (buf1 != NULL)
- __os_free(buf1, 0);
+ __os_ufree(dbenv, buf1);
if (buf2 != NULL)
- __os_free(buf2, 0);
+ __os_ufree(dbenv, buf2);
return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
}
@@ -1158,11 +1173,13 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags)
u_int32_t flags;
{
DB *pgset;
+ DB_ENV *dbenv;
VRFY_PAGEINFO *mip, *rip;
db_pgno_t root, p;
int t_ret, ret;
u_int32_t nrecs, level, relen, stflags;
+ dbenv = dbp->dbenv;
mip = rip = 0;
pgset = vdp->pgset;
@@ -1172,8 +1189,8 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags)
if ((ret = __db_vrfy_pgset_get(pgset, meta_pgno, (int *)&p)) != 0)
goto err;
if (p != 0) {
- EPRINT((dbp->dbenv,
- "Btree metadata page number %lu observed twice",
+ EPRINT((dbenv,
+ "Page %lu: btree metadata page observed twice",
(u_long)meta_pgno));
ret = DB_VERIFY_BAD;
goto err;
@@ -1184,8 +1201,9 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags)
root = mip->root;
if (root == 0) {
- EPRINT((dbp->dbenv,
- "Btree metadata page %lu has no root", (u_long)meta_pgno));
+ EPRINT((dbenv,
+ "Page %lu: btree metadata page has no root",
+ (u_long)meta_pgno));
ret = DB_VERIFY_BAD;
goto err;
}
@@ -1221,8 +1239,8 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags)
* that should never happen.
*/
if (mip->re_len > 0 && relen > 0 && mip->re_len != relen) {
- EPRINT((dbp->dbenv,
- "Recno database with meta page %lu has bad re_len %lu",
+ EPRINT((dbenv,
+ "Page %lu: recno database has bad re_len %lu",
(u_long)meta_pgno, (u_long)relen));
ret = DB_VERIFY_BAD;
goto err;
@@ -1230,25 +1248,25 @@ __bam_vrfy_structure(dbp, vdp, meta_pgno, flags)
ret = 0;
break;
case P_LDUP:
- EPRINT((dbp->dbenv,
- "Duplicate tree referenced from metadata page %lu",
+ EPRINT((dbenv,
+ "Page %lu: duplicate tree referenced from metadata page",
(u_long)meta_pgno));
ret = DB_VERIFY_BAD;
break;
default:
- EPRINT((dbp->dbenv,
- "Btree root of incorrect type %lu on meta page %lu",
- (u_long)rip->type, (u_long)meta_pgno));
+ EPRINT((dbenv,
+ "Page %lu: btree root of incorrect type %lu on metadata page",
+ (u_long)meta_pgno, (u_long)rip->type));
ret = DB_VERIFY_BAD;
break;
}
-err: if (mip != NULL &&
- ((t_ret = __db_vrfy_putpageinfo(vdp, mip)) != 0) && ret == 0)
- t_ret = ret;
- if (rip != NULL &&
- ((t_ret = __db_vrfy_putpageinfo(vdp, rip)) != 0) && ret == 0)
- t_ret = ret;
+err: if (mip != NULL && ((t_ret =
+ __db_vrfy_putpageinfo(dbenv, vdp, mip)) != 0) && ret == 0)
+ ret = t_ret;
+ if (rip != NULL && ((t_ret =
+ __db_vrfy_putpageinfo(dbenv, vdp, rip)) != 0) && ret == 0)
+ ret = t_ret;
return (ret);
}
@@ -1263,8 +1281,7 @@ err: if (mip != NULL &&
* PUBLIC: void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *));
*/
int
-__bam_vrfy_subtree(dbp,
- vdp, pgno, l, r, flags, levelp, nrecsp, relenp)
+__bam_vrfy_subtree(dbp, vdp, pgno, l, r, flags, levelp, nrecsp, relenp)
DB *dbp;
VRFY_DBINFO *vdp;
db_pgno_t pgno;
@@ -1274,19 +1291,27 @@ __bam_vrfy_subtree(dbp,
BINTERNAL *li, *ri, *lp, *rp;
DB *pgset;
DBC *cc;
+ DB_ENV *dbenv;
+ DB_MPOOLFILE *mpf;
PAGE *h;
VRFY_CHILDINFO *child;
VRFY_PAGEINFO *pip;
- db_recno_t nrecs, child_nrecs;
db_indx_t i;
- int ret, t_ret, isbad, toplevel, p;
+ db_pgno_t next_pgno, prev_pgno;
+ db_recno_t child_nrecs, nrecs;
+ u_int32_t child_level, child_relen, j, level, relen, stflags;
+ u_int8_t leaf_type;
int (*func) __P((DB *, const DBT *, const DBT *));
- u_int32_t level, child_level, stflags, child_relen, relen;
+ int isbad, p, ret, t_ret, toplevel;
+ dbenv = dbp->dbenv;
+ mpf = dbp->mpf;
ret = isbad = 0;
nrecs = 0;
h = NULL;
relen = 0;
+ leaf_type = P_INVALID;
+ next_pgno = prev_pgno = PGNO_INVALID;
rp = (BINTERNAL *)r;
lp = (BINTERNAL *)l;
@@ -1300,10 +1325,33 @@ __bam_vrfy_subtree(dbp,
cc = NULL;
level = pip->bt_level;
- toplevel = LF_ISSET(ST_TOPLEVEL);
+ toplevel = LF_ISSET(ST_TOPLEVEL) ? 1 : 0;
LF_CLR(ST_TOPLEVEL);
/*
+ * If this is the root, initialize the vdp's prev- and next-pgno
+ * accounting.
+ *
+ * For each leaf page we hit, we'll want to make sure that
+ * vdp->prev_pgno is the same as pip->prev_pgno and vdp->next_pgno is
+ * our page number. Then, we'll set vdp->next_pgno to pip->next_pgno
+ * and vdp->prev_pgno to our page number, and the next leaf page in
+ * line should be able to do the same verification.
+ */
+ if (toplevel) {
+ /*
+ * Cache the values stored in the vdp so that if we're an
+ * auxiliary tree such as an off-page duplicate set, our
+ * caller's leaf page chain doesn't get lost.
+ */
+ prev_pgno = vdp->prev_pgno;
+ next_pgno = vdp->next_pgno;
+ leaf_type = vdp->leaf_type;
+ vdp->next_pgno = vdp->prev_pgno = PGNO_INVALID;
+ vdp->leaf_type = P_INVALID;
+ }
+
+ /*
* We are recursively descending a btree, starting from the root
* and working our way out to the leaves.
*
@@ -1333,8 +1381,63 @@ __bam_vrfy_subtree(dbp,
case P_LDUP:
case P_LBTREE:
/*
- * Cases 1, 2 and 3 (overflow pages are common to all three);
- * traverse child list, looking for overflows.
+ * Cases 1, 2 and 3.
+ *
+ * We're some sort of leaf page; verify
+ * that our linked list of leaves is consistent.
+ */
+ if (vdp->leaf_type == P_INVALID) {
+ /*
+ * First leaf page. Set the type that all its
+ * successors should be, and verify that our prev_pgno
+ * is PGNO_INVALID.
+ */
+ vdp->leaf_type = pip->type;
+ if (pip->prev_pgno != PGNO_INVALID)
+ goto bad_prev;
+ } else {
+ /*
+ * Successor leaf page. Check our type, the previous
+ * page's next_pgno, and our prev_pgno.
+ */
+ if (pip->type != vdp->leaf_type) {
+ EPRINT((dbenv,
+ "Page %lu: unexpected page type %lu found in leaf chain (expected %lu)",
+ (u_long)pip->pgno, (u_long)pip->type,
+ (u_long)vdp->leaf_type));
+ isbad = 1;
+ }
+
+ /*
+ * Don't do the prev/next_pgno checks if we've lost
+ * leaf pages due to another corruption.
+ */
+ if (!F_ISSET(vdp, VRFY_LEAFCHAIN_BROKEN)) {
+ if (pip->pgno != vdp->next_pgno) {
+ EPRINT((dbenv,
+ "Page %lu: incorrect next_pgno %lu found in leaf chain (should be %lu)",
+ (u_long)vdp->prev_pgno,
+ (u_long)vdp->next_pgno,
+ (u_long)pip->pgno));
+ isbad = 1;
+ }
+ if (pip->prev_pgno != vdp->prev_pgno) {
+bad_prev: EPRINT((dbenv,
+ "Page %lu: incorrect prev_pgno %lu found in leaf chain (should be %lu)",
+ (u_long)pip->pgno,
+ (u_long)pip->prev_pgno,
+ (u_long)vdp->prev_pgno));
+ isbad = 1;
+ }
+ }
+ }
+ vdp->prev_pgno = pip->pgno;
+ vdp->next_pgno = pip->next_pgno;
+ F_CLR(vdp, VRFY_LEAFCHAIN_BROKEN);
+
+ /*
+ * Overflow pages are common to all three leaf types;
+ * traverse the child list, looking for overflows.
*/
if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0)
goto err;
@@ -1359,8 +1462,8 @@ __bam_vrfy_subtree(dbp,
if (!LF_ISSET(ST_IS_RECNO) &&
!(LF_ISSET(ST_DUPOK) && !LF_ISSET(ST_DUPSORT))) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Recno leaf page %lu in non-recno tree",
+ EPRINT((dbenv,
+ "Page %lu: recno leaf page non-recno tree",
(u_long)pgno));
goto done;
}
@@ -1371,8 +1474,8 @@ __bam_vrfy_subtree(dbp,
* subtree.
*/
isbad = 1;
- EPRINT((dbp->dbenv,
- "Non-recno leaf page %lu in recno tree",
+ EPRINT((dbenv,
+ "Page %lu: non-recno leaf page in recno tree",
(u_long)pgno));
goto done;
}
@@ -1388,8 +1491,8 @@ __bam_vrfy_subtree(dbp,
/* If dups aren't allowed in this btree, trouble. */
if (!LF_ISSET(ST_DUPOK)) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Duplicates on page %lu in non-dup btree",
+ EPRINT((dbenv,
+ "Page %lu: duplicates in non-dup btree",
(u_long)pgno));
} else {
/*
@@ -1414,8 +1517,8 @@ __bam_vrfy_subtree(dbp,
}
if ((ret = __bam_vrfy_subtree(
dbp, vdp, child->pgno, NULL,
- NULL, stflags, NULL, NULL,
- NULL)) != 0) {
+ NULL, stflags | ST_TOPLEVEL,
+ NULL, NULL, NULL)) != 0) {
if (ret !=
DB_VERIFY_BAD)
goto err;
@@ -1435,15 +1538,14 @@ __bam_vrfy_subtree(dbp,
*/
if (F_ISSET(pip, VRFY_DUPS_UNSORTED) &&
LF_ISSET(ST_DUPSORT)) {
- EPRINT((dbp->dbenv,
- "Unsorted duplicate set at page %lu in sorted-dup database",
+ EPRINT((dbenv,
+ "Page %lu: unsorted duplicate set in sorted-dup database",
(u_long)pgno));
isbad = 1;
}
}
}
goto leaf;
- break;
case P_IBTREE:
case P_IRECNO:
/* We handle these below. */
@@ -1455,10 +1557,27 @@ __bam_vrfy_subtree(dbp,
* Note that the code at the "done" label assumes that the
* current page is a btree/recno one of some sort; this
* is not the case here, so we goto err.
+ *
+ * If the page is entirely zeroed, its pip->type will be a lie
+ * (we assumed it was a hash page, as they're allowed to be
+ * zeroed); handle this case specially.
*/
- EPRINT((dbp->dbenv,
- "Page %lu is of inappropriate type %lu",
- (u_long)pgno, (u_long)pip->type));
+ if (F_ISSET(pip, VRFY_IS_ALLZEROES))
+ ZEROPG_ERR_PRINT(dbenv, pgno, "btree or recno page");
+ else
+ EPRINT((dbenv,
+ "Page %lu: btree or recno page is of inappropriate type %lu",
+ (u_long)pgno, (u_long)pip->type));
+
+ /*
+ * We probably lost a leaf page (or more if this was an
+ * internal page) from our prev/next_pgno chain. Flag
+ * that this is expected; we don't want or need to
+ * spew error messages about erroneous prev/next_pgnos,
+ * since that's probably not the real problem.
+ */
+ F_SET(vdp, VRFY_LEAFCHAIN_BROKEN);
+
ret = DB_VERIFY_BAD;
goto err;
}
@@ -1474,7 +1593,7 @@ __bam_vrfy_subtree(dbp,
ret = __db_vrfy_ccnext(cc, &child))
if (child->type == V_RECNO) {
if (pip->type != P_IRECNO) {
- TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy_subtree",
+ TYPE_ERR_PRINT(dbenv, "__bam_vrfy_subtree",
pgno, pip->type);
DB_ASSERT(0);
ret = EINVAL;
@@ -1499,30 +1618,64 @@ __bam_vrfy_subtree(dbp,
else if (child_relen > 0 &&
relen != child_relen) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Recno page %lu returned bad re_len",
- (u_long)child->pgno));
+ EPRINT((dbenv,
+ "Page %lu: recno page returned bad re_len %lu",
+ (u_long)child->pgno,
+ (u_long)child_relen));
}
if (relenp)
*relenp = relen;
}
if (LF_ISSET(ST_RECNUM))
nrecs += child_nrecs;
- if (level != child_level + 1) {
+ if (isbad == 0 && level != child_level + 1) {
isbad = 1;
- EPRINT((dbp->dbenv, "%s%lu%s%lu%s%lu",
- "Recno level incorrect on page ",
- (u_long)child->pgno, ": got ",
- (u_long)child_level, ", expected ",
+ EPRINT((dbenv,
+ "Page %lu: recno level incorrect: got %lu, expected %lu",
+ (u_long)child->pgno, (u_long)child_level,
(u_long)(level - 1)));
}
- } else if (child->type == V_OVERFLOW &&
- (ret = __db_vrfy_ovfl_structure(dbp, vdp,
- child->pgno, child->tlen, flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
+ } else if (child->type == V_OVERFLOW) {
+ /*
+ * It is possible for one internal page to reference
+ * a single overflow page twice, if all the items
+ * in the subtree referenced by slot 0 are deleted,
+ * then a similar number of items are put back
+ * before the key that formerly had been in slot 1.
+ *
+ * (Btree doesn't look at the key in slot 0, so the
+ * fact that the key formerly at slot 1 is the "wrong"
+ * parent of the stuff in the slot 0 subtree isn't
+ * really incorrect.)
+ *
+ * __db_vrfy_ovfl_structure is designed to be
+ * efficiently called multiple times for multiple
+ * references; call it here as many times as is
+ * appropriate.
+ */
+
+ /* Otherwise, __db_vrfy_childput would be broken. */
+ DB_ASSERT(child->refcnt >= 1);
+
+ /*
+ * An overflow referenced more than twice here
+ * shouldn't happen.
+ */
+ if (child->refcnt > 2) {
+ EPRINT((dbenv,
+ "Page %lu: overflow page %lu referenced more than twice from internal page",
+ (u_long)pgno, (u_long)child->pgno));
isbad = 1;
- else
- goto done;
+ } else
+ for (j = 0; j < child->refcnt; j++)
+ if ((ret = __db_vrfy_ovfl_structure(dbp,
+ vdp, child->pgno, child->tlen,
+ flags)) != 0) {
+ if (ret == DB_VERIFY_BAD)
+ isbad = 1;
+ else
+ goto done;
+ }
}
if ((ret = __db_vrfy_ccclose(cc)) != 0)
@@ -1543,12 +1696,12 @@ __bam_vrfy_subtree(dbp,
* itself, which must sort lower than all entries on its child;
* ri will be the key to its right, which must sort greater.
*/
- if (h == NULL && (ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if (h == NULL && (ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
goto err;
for (i = 0; i < pip->entries; i += O_INDX) {
- li = GET_BINTERNAL(h, i);
+ li = GET_BINTERNAL(dbp, h, i);
ri = (i + O_INDX < pip->entries) ?
- GET_BINTERNAL(h, i + O_INDX) : NULL;
+ GET_BINTERNAL(dbp, h, i + O_INDX) : NULL;
/*
* The leftmost key is forcibly sorted less than all entries,
@@ -1577,19 +1730,19 @@ __bam_vrfy_subtree(dbp,
*/
if (li->nrecs != child_nrecs) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Item %lu page %lu has incorrect record count of %lu, should be %lu",
- (u_long)i, (u_long)pgno, (u_long)li->nrecs,
+ EPRINT((dbenv,
+ "Page %lu: item %lu has incorrect record count of %lu, should be %lu",
+ (u_long)pgno, (u_long)i, (u_long)li->nrecs,
(u_long)child_nrecs));
}
}
if (level != child_level + 1) {
isbad = 1;
- EPRINT((dbp->dbenv, "%s%lu%s%lu%s%lu",
- "Btree level incorrect on page ", (u_long)li->pgno,
- ": got ", (u_long)child_level, ", expected ",
- (u_long)(level - 1)));
+ EPRINT((dbenv,
+ "Page %lu: Btree level incorrect: got %lu, expected %lu",
+ (u_long)li->pgno,
+ (u_long)child_level, (u_long)(level - 1)));
}
}
@@ -1616,7 +1769,7 @@ done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
* isbad == 0, though, it's now safe to do so, as we've
* traversed any child overflow pages. Do it.
*/
- if (h == NULL && (ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if (h == NULL && (ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
goto err;
if ((ret = __bam_vrfy_itemorder(dbp,
vdp, h, pgno, 0, 1, 0, flags)) != 0)
@@ -1625,12 +1778,35 @@ done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
}
/*
+ * It's possible to get to this point with a page that has no
+ * items, but without having detected any sort of failure yet.
+ * Having zero items is legal if it's a leaf--it may be the
+ * root page in an empty tree, or the tree may have been
+ * modified with the DB_REVSPLITOFF flag set (there's no way
+ * to tell from what's on disk). For an internal page,
+ * though, having no items is a problem (all internal pages
+ * must have children).
+ */
+ if (isbad == 0 && ret == 0) {
+ if (h == NULL && (ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
+ goto err;
+
+ if (NUM_ENT(h) == 0 && ISINTERNAL(h)) {
+ EPRINT((dbenv,
+ "Page %lu: internal page is empty and should not be",
+ (u_long)pgno));
+ isbad = 1;
+ goto err;
+ }
+ }
+
+ /*
* Our parent has sent us BINTERNAL pointers to parent records
* so that we can verify our place with respect to them. If it's
* appropriate--we have a default sort function--verify this.
*/
if (isbad == 0 && ret == 0 && !LF_ISSET(DB_NOORDERCHK) && lp != NULL) {
- if (h == NULL && (ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
+ if (h == NULL && (ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
goto err;
/*
@@ -1661,8 +1837,8 @@ done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
*/
if (LF_ISSET(ST_RECNUM) && nrecs != pip->rec_cnt && toplevel) {
isbad = 1;
- EPRINT((dbp->dbenv,
- "Bad record count on page %lu: got %lu, expected %lu",
+ EPRINT((dbenv,
+ "Page %lu: bad record count: has %lu records, claims %lu",
(u_long)pgno, (u_long)nrecs, (u_long)pip->rec_cnt));
}
@@ -1676,13 +1852,31 @@ done: if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
goto err;
if (p != 0) {
isbad = 1;
- EPRINT((dbp->dbenv, "Page %lu linked twice", (u_long)pgno));
+ EPRINT((dbenv, "Page %lu: linked twice", (u_long)pgno));
} else if ((ret = __db_vrfy_pgset_inc(pgset, pgno)) != 0)
goto err;
-err: if (h != NULL && (t_ret = memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0)
+ if (toplevel)
+ /*
+ * The last page's next_pgno in the leaf chain should have been
+ * PGNO_INVALID.
+ */
+ if (vdp->next_pgno != PGNO_INVALID) {
+ EPRINT((dbenv, "Page %lu: unterminated leaf chain",
+ (u_long)vdp->prev_pgno));
+ isbad = 1;
+ }
+
+err: if (toplevel) {
+ /* Restore our caller's settings. */
+ vdp->next_pgno = next_pgno;
+ vdp->prev_pgno = prev_pgno;
+ vdp->leaf_type = leaf_type;
+ }
+
+ if (h != NULL && (t_ret = __memp_fput(mpf, h, 0)) != 0 && ret == 0)
ret = t_ret;
- if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
+ if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0)
ret = t_ret;
if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0)
ret = t_ret;
@@ -1712,14 +1906,24 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags)
u_int32_t flags;
{
BOVERFLOW *bo;
+ DB_ENV *dbenv;
DBT dbt;
db_indx_t last;
int ret, cmp;
+ dbenv = dbp->dbenv;
memset(&dbt, 0, sizeof(DBT));
F_SET(&dbt, DB_DBT_MALLOC);
ret = 0;
+ /*
+ * Empty pages are sorted correctly by definition. We check
+ * to see whether they ought to be empty elsewhere; leaf
+ * pages legally may be.
+ */
+ if (NUM_ENT(h) == 0)
+ return (0);
+
switch (TYPE(h)) {
case P_IBTREE:
case P_LDUP:
@@ -1729,8 +1933,7 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags)
last = NUM_ENT(h) - P_INDX;
break;
default:
- TYPE_ERR_PRINT(dbp->dbenv,
- "__bam_vrfy_treeorder", pgno, TYPE(h));
+ TYPE_ERR_PRINT(dbenv, "__bam_vrfy_treeorder", pgno, TYPE(h));
DB_ASSERT(0);
return (EINVAL);
}
@@ -1759,26 +1962,27 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags)
return (ret);
} else {
DB_ASSERT(0);
- EPRINT((dbp->dbenv,
- "Unknown type for internal record"));
+ EPRINT((dbenv,
+ "Page %lu: unknown type for internal record",
+ (u_long)PGNO(h)));
return (EINVAL);
}
/* On error, fall through, free if neeeded, and return. */
if ((ret = __bam_cmp(dbp, &dbt, h, 0, func, &cmp)) == 0) {
if (cmp > 0) {
- EPRINT((dbp->dbenv,
- "First item on page %lu sorted greater than parent entry",
+ EPRINT((dbenv,
+ "Page %lu: first item on page sorted greater than parent entry",
(u_long)PGNO(h)));
ret = DB_VERIFY_BAD;
}
} else
- EPRINT((dbp->dbenv,
- "First item on page %lu had comparison error",
+ EPRINT((dbenv,
+ "Page %lu: first item on page had comparison error",
(u_long)PGNO(h)));
if (dbt.data != lp->data)
- __os_free(dbt.data, 0);
+ __os_ufree(dbenv, dbt.data);
if (ret != 0)
return (ret);
}
@@ -1794,26 +1998,27 @@ __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, func, flags)
return (ret);
} else {
DB_ASSERT(0);
- EPRINT((dbp->dbenv,
- "Unknown type for internal record"));
+ EPRINT((dbenv,
+ "Page %lu: unknown type for internal record",
+ (u_long)PGNO(h)));
return (EINVAL);
}
/* On error, fall through, free if neeeded, and return. */
if ((ret = __bam_cmp(dbp, &dbt, h, last, func, &cmp)) == 0) {
if (cmp < 0) {
- EPRINT((dbp->dbenv,
- "Last item on page %lu sorted greater than parent entry",
+ EPRINT((dbenv,
+ "Page %lu: last item on page sorted greater than parent entry",
(u_long)PGNO(h)));
ret = DB_VERIFY_BAD;
}
} else
- EPRINT((dbp->dbenv,
- "Last item on page %lu had comparison error",
+ EPRINT((dbenv,
+ "Page %lu: last item on page had comparison error",
(u_long)PGNO(h)));
if (dbt.data != rp->data)
- __os_free(dbt.data, 0);
+ __os_ufree(dbenv, dbt.data);
}
return (ret);
@@ -1841,37 +2046,41 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
u_int32_t flags;
{
DBT dbt, unkdbt;
+ DB_ENV *dbenv;
BKEYDATA *bk;
BOVERFLOW *bo;
- db_indx_t i, beg, end;
+ db_indx_t i, beg, end, *inp;
u_int32_t himark;
u_int8_t *pgmap;
void *ovflbuf;
int t_ret, ret, err_ret;
+ dbenv = dbp->dbenv;
+
/* Shut up lint. */
COMPQUIET(end, 0);
ovflbuf = pgmap = NULL;
err_ret = ret = 0;
+ inp = P_INP(dbp, h);
memset(&dbt, 0, sizeof(DBT));
dbt.flags = DB_DBT_REALLOC;
memset(&unkdbt, 0, sizeof(DBT));
- unkdbt.size = strlen("UNKNOWN") + 1;
+ unkdbt.size = (u_int32_t)(strlen("UNKNOWN") + 1);
unkdbt.data = "UNKNOWN";
/*
* Allocate a buffer for overflow items. Start at one page;
* __db_safe_goff will realloc as needed.
*/
- if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &ovflbuf)) != 0)
+ if ((ret = __os_malloc(dbenv, dbp->pgsize, &ovflbuf)) != 0)
return (ret);
if (LF_ISSET(DB_AGGRESSIVE)) {
if ((ret =
- __os_malloc(dbp->dbenv, dbp->pgsize, NULL, &pgmap)) != 0)
+ __os_malloc(dbenv, dbp->pgsize, &pgmap)) != 0)
goto err;
memset(pgmap, 0, dbp->pgsize);
}
@@ -1914,7 +2123,7 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
* We only want to print deleted items if
* DB_AGGRESSIVE is set.
*/
- bk = GET_BKEYDATA(h, i);
+ bk = GET_BKEYDATA(dbp, h, i);
if (!LF_ISSET(DB_AGGRESSIVE) && B_DISSET(bk->type))
continue;
@@ -1927,10 +2136,10 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
if (key != NULL &&
(i != 0 || !LF_ISSET(SA_SKIPFIRSTKEY)))
if ((ret = __db_prdbt(key,
- 0, " ", handle, callback, 0, NULL)) != 0)
+ 0, " ", handle, callback, 0, vdp)) != 0)
err_ret = ret;
- beg = h->inp[i];
+ beg = inp[i];
switch (B_TYPE(bk->type)) {
case B_DUPLICATE:
end = beg + BOVERFLOW_SIZE - 1;
@@ -1958,23 +2167,24 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
(i % P_INDX == 0)) {
/* Not much to do on failure. */
if ((ret = __db_prdbt(&unkdbt, 0, " ",
- handle, callback, 0, NULL)) != 0)
+ handle, callback, 0, vdp)) != 0)
err_ret = ret;
break;
}
if ((ret = __db_salvage_duptree(dbp,
vdp, bo->pgno, &dbt, handle, callback,
- flags | SA_SKIPFIRSTKEY)) != 0)
+ flags | SA_SKIPFIRSTKEY)) != 0)
err_ret = ret;
break;
case B_KEYDATA:
- end = ALIGN(beg + bk->len, sizeof(u_int32_t)) - 1;
+ end =
+ ALIGN(beg + bk->len, sizeof(u_int32_t)) - 1;
dbt.data = bk->data;
dbt.size = bk->len;
if ((ret = __db_prdbt(&dbt,
- 0, " ", handle, callback, 0, NULL)) != 0)
+ 0, " ", handle, callback, 0, vdp)) != 0)
err_ret = ret;
break;
case B_OVERFLOW:
@@ -1985,11 +2195,11 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
err_ret = ret;
/* We care about err_ret more. */
(void)__db_prdbt(&unkdbt, 0, " ",
- handle, callback, 0, NULL);
+ handle, callback, 0, vdp);
break;
}
if ((ret = __db_prdbt(&dbt,
- 0, " ", handle, callback, 0, NULL)) != 0)
+ 0, " ", handle, callback, 0, vdp)) != 0)
err_ret = ret;
break;
default:
@@ -2020,12 +2230,12 @@ __bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
* a datum; fix this imbalance by printing an "UNKNOWN".
*/
if (pgtype == P_LBTREE && (i % P_INDX == 1) && ((ret =
- __db_prdbt(&unkdbt, 0, " ", handle, callback, 0, NULL)) != 0))
+ __db_prdbt(&unkdbt, 0, " ", handle, callback, 0, vdp)) != 0))
err_ret = ret;
err: if (pgmap != NULL)
- __os_free(pgmap, 0);
- __os_free(ovflbuf, 0);
+ __os_free(dbenv, pgmap);
+ __os_free(dbenv, ovflbuf);
/* Mark this page as done. */
if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0)
@@ -2061,12 +2271,13 @@ __bam_salvage_walkdupint(dbp, vdp, h, key, handle, callback, flags)
for (i = 0; i < NUM_ENT(h); i++) {
switch (TYPE(h)) {
case P_IBTREE:
- bi = GET_BINTERNAL(h, i);
+ bi = GET_BINTERNAL(dbp, h, i);
if ((t_ret = __db_salvage_duptree(dbp,
vdp, bi->pgno, key, handle, callback, flags)) != 0)
ret = t_ret;
+ break;
case P_IRECNO:
- ri = GET_RINTERNAL(h, i);
+ ri = GET_RINTERNAL(dbp, h, i);
if ((t_ret = __db_salvage_duptree(dbp,
vdp, ri->pgno, key, handle, callback, flags)) != 0)
ret = t_ret;
@@ -2110,11 +2321,13 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
DB *pgset;
{
BINTERNAL *bi;
+ DB_MPOOLFILE *mpf;
PAGE *h;
RINTERNAL *ri;
db_pgno_t current, p;
int err_ret, ret;
+ mpf = dbp->mpf;
h = NULL;
ret = err_ret = 0;
DB_ASSERT(pgset != NULL);
@@ -2123,7 +2336,7 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
err_ret = DB_VERIFY_BAD;
goto err;
}
- if ((ret = memp_fget(dbp->mpf, &current, 0, &h)) != 0) {
+ if ((ret = __memp_fget(mpf, &current, 0, &h)) != 0) {
err_ret = ret;
goto err;
}
@@ -2137,10 +2350,10 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
goto err;
}
if (TYPE(h) == P_IBTREE) {
- bi = GET_BINTERNAL(h, 0);
+ bi = GET_BINTERNAL(dbp, h, 0);
current = bi->pgno;
} else { /* P_IRECNO */
- ri = GET_RINTERNAL(h, 0);
+ ri = GET_RINTERNAL(dbp, h, 0);
current = ri->pgno;
}
break;
@@ -2152,7 +2365,7 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
goto err;
}
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ if ((ret = __memp_fput(mpf, h, 0)) != 0)
err_ret = ret;
h = NULL;
}
@@ -2164,7 +2377,7 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
traverse:
while (IS_VALID_PGNO(current) && current != PGNO_INVALID) {
if (h == NULL &&
- (ret = memp_fget(dbp->mpf, &current, 0, &h) != 0)) {
+ (ret = __memp_fget(mpf, &current, 0, &h)) != 0) {
err_ret = ret;
break;
}
@@ -2184,13 +2397,13 @@ traverse:
goto err;
current = NEXT_PGNO(h);
- if ((ret = memp_fput(dbp->mpf, h, 0)) != 0)
+ if ((ret = __memp_fput(mpf, h, 0)) != 0)
err_ret = ret;
h = NULL;
}
err: if (h != NULL)
- (void)memp_fput(dbp->mpf, h, 0);
+ (void)__memp_fput(mpf, h, 0);
return (ret == 0 ? err_ret : ret);
}
@@ -2218,7 +2431,7 @@ __bam_safe_getdata(dbp, h, i, ovflok, dbt, freedbtp)
memset(dbt, 0, sizeof(DBT));
*freedbtp = 0;
- bk = GET_BKEYDATA(h, i);
+ bk = GET_BKEYDATA(dbp, h, i);
if (B_TYPE(bk->type) == B_OVERFLOW) {
if (!ovflok)
return (0);
diff --git a/db/btree/btree.src b/db/btree/btree.src
index a1eba7d7f..85faff67f 100644
--- a/db/btree/btree.src
+++ b/db/btree/btree.src
@@ -1,13 +1,14 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Copyright (c) 1996-2003
* Sleepycat Software. All rights reserved.
*
- * $Id: btree.src,v 10.26 2000/12/12 17:40:23 bostic Exp $
+ * $Id: btree.src,v 10.39 2003/11/14 05:32:34 ubell Exp $
*/
-PREFIX bam
+PREFIX __bam
+DBPRIVATE
INCLUDE #include "db_config.h"
INCLUDE
@@ -15,93 +16,22 @@ INCLUDE #ifndef NO_SYSTEM_INCLUDES
INCLUDE #include <sys/types.h>
INCLUDE
INCLUDE #include <ctype.h>
-INCLUDE #include <errno.h>
INCLUDE #include <string.h>
INCLUDE #endif
INCLUDE
INCLUDE #include "db_int.h"
-INCLUDE #include "db_page.h"
-INCLUDE #include "db_dispatch.h"
-INCLUDE #include "db_am.h"
-INCLUDE #include "btree.h"
-INCLUDE #include "txn.h"
+INCLUDE #include "dbinc/crypto.h"
+INCLUDE #include "dbinc/db_page.h"
+INCLUDE #include "dbinc/db_dispatch.h"
+INCLUDE #include "dbinc/db_am.h"
+INCLUDE #include "dbinc/btree.h"
+INCLUDE #include "dbinc/log.h"
+INCLUDE #include "dbinc/txn.h"
INCLUDE
/*
- * BTREE-pg_alloc: used to record allocating a new page.
- *
- * meta_lsn: the meta-data page's original lsn.
- * page_lsn: the allocated page's original lsn.
- * pgno: the page allocated.
- * next: the next page on the free list.
- */
-BEGIN pg_alloc 51
-ARG fileid int32_t ld
-POINTER meta_lsn DB_LSN * lu
-POINTER page_lsn DB_LSN * lu
-ARG pgno db_pgno_t lu
-ARG ptype u_int32_t lu
-ARG next db_pgno_t lu
-END
-
-DEPRECATED pg_alloc1 60
-ARG fileid int32_t ld
-POINTER meta_lsn DB_LSN * lu
-POINTER alloc_lsn DB_LSN * lu
-POINTER page_lsn DB_LSN * lu
-ARG pgno db_pgno_t lu
-ARG ptype u_int32_t lu
-ARG next db_pgno_t lu
-END
-
-/*
- * BTREE-pg_free: used to record freeing a page.
- *
- * pgno: the page being freed.
- * meta_lsn: the meta-data page's original lsn.
- * header: the header from the free'd page.
- * next: the previous next pointer on the metadata page.
- */
-BEGIN pg_free 52
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
-POINTER meta_lsn DB_LSN * lu
-DBT header DBT s
-ARG next db_pgno_t lu
-END
-
-DEPRECATED pg_free1 61
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
-POINTER meta_lsn DB_LSN * lu
-POINTER alloc_lsn DB_LSN * lu
-DBT header DBT s
-ARG next db_pgno_t lu
-END
-
-/*
- * BTREE-split: used to log a page split.
- *
- * left: the page number for the low-order contents.
- * llsn: the left page's original LSN.
- * right: the page number for the high-order contents.
- * rlsn: the right page's original LSN.
- * indx: the number of entries that went to the left page.
- * npgno: the next page number
- * nlsn: the next page's original LSN (or 0 if no next page).
- * pg: the split page's contents before the split.
+ * NOTE: pg_alloc and pg_free have been moved to db.src, where they belong.
*/
-DEPRECATED split1 53
-ARG fileid int32_t ld
-ARG left db_pgno_t lu
-POINTER llsn DB_LSN * lu
-ARG right db_pgno_t lu
-POINTER rlsn DB_LSN * lu
-ARG indx u_int32_t lu
-ARG npgno db_pgno_t lu
-POINTER nlsn DB_LSN * lu
-DBT pg DBT s
-END
/*
* BTREE-split: used to log a page split.
@@ -112,14 +42,13 @@ END
* rlsn: the right page's original LSN.
* indx: the number of entries that went to the left page.
* npgno: the next page number
- * npgno: the next page number
* nlsn: the next page's original LSN (or 0 if no next page).
* root_pgno: the root page number
* pg: the split page's contents before the split.
* opflags: SPL_NRECS: if splitting a tree that maintains a record count.
*/
BEGIN split 62
-ARG fileid int32_t ld
+DB fileid int32_t ld
ARG left db_pgno_t lu
POINTER llsn DB_LSN * lu
ARG right db_pgno_t lu
@@ -128,7 +57,7 @@ ARG indx u_int32_t lu
ARG npgno db_pgno_t lu
POINTER nlsn DB_LSN * lu
ARG root_pgno db_pgno_t lu
-DBT pg DBT s
+PGDBT pg DBT s
ARG opflags u_int32_t lu
END
@@ -137,33 +66,15 @@ END
*
* pgno: the page number of the page copied over the root.
* pgdbt: the page being copied on the root page.
- * nrec: the tree's record count.
- * rootent: last entry on the root page.
- * rootlsn: the root page's original lsn.
- */
-DEPRECATED rsplit1 54
-ARG fileid int32_t ld
-ARG pgno db_pgno_t lu
-DBT pgdbt DBT s
-ARG nrec db_pgno_t lu
-DBT rootent DBT s
-POINTER rootlsn DB_LSN * lu
-END
-
-/*
- * BTREE-rsplit: used to log a reverse-split
- *
- * pgno: the page number of the page copied over the root.
- * pgdbt: the page being copied on the root page.
* root_pgno: the root page number.
* nrec: the tree's record count.
* rootent: last entry on the root page.
* rootlsn: the root page's original lsn.
*/
BEGIN rsplit 63
-ARG fileid int32_t ld
+DB fileid int32_t ld
ARG pgno db_pgno_t lu
-DBT pgdbt DBT s
+PGDBT pgdbt DBT s
ARG root_pgno db_pgno_t lu
ARG nrec db_pgno_t lu
DBT rootent DBT s
@@ -180,7 +91,7 @@ END
* is_insert: 0 if a delete, 1 if an insert.
*/
BEGIN adj 55
-ARG fileid int32_t ld
+DB fileid int32_t ld
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG indx u_int32_t lu
@@ -198,7 +109,7 @@ END
* opflags: CAD_UPDATEROOT: if root page count was adjusted.
*/
BEGIN cadjust 56
-ARG fileid int32_t ld
+DB fileid int32_t ld
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG indx u_int32_t lu
@@ -214,7 +125,7 @@ END
* indx: the index to be deleted.
*/
BEGIN cdel 57
-ARG fileid int32_t ld
+DB fileid int32_t ld
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG indx u_int32_t lu
@@ -225,12 +136,15 @@ END
*
* pgno: the page modified.
* lsn: the page's original lsn.
+ * indx: the index to be replaced.
+ * isdeleted: set if the record was previously deleted.
* orig: the original data.
- * new: the replacement data.
- * duplicate: the prefix of the replacement that matches the original.
+ * repl: the replacement data.
+ * prefix: the prefix of the replacement that matches the original.
+ * suffix: the suffix of the replacement that matches the original.
*/
BEGIN repl 58
-ARG fileid int32_t ld
+DB fileid int32_t ld
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
ARG indx u_int32_t lu
@@ -245,7 +159,7 @@ END
* BTREE-root: log the assignment of a root btree page.
*/
BEGIN root 59
-ARG fileid int32_t ld
+DB fileid int32_t ld
ARG meta_pgno db_pgno_t lu
ARG root_pgno db_pgno_t lu
POINTER meta_lsn DB_LSN * lu
@@ -260,7 +174,7 @@ END
*/
BEGIN curadj 64
/* Fileid of db affected. */
-ARG fileid int32_t ld
+DB fileid int32_t ld
/* Which adjustment. */
ARG mode db_ca_mode ld
/* Page entry is from. */
@@ -284,7 +198,7 @@ END
*/
BEGIN rcuradj 65
/* Fileid of db affected. */
-ARG fileid int32_t ld
+DB fileid int32_t ld
/* Which adjustment. */
ARG mode ca_recno_arg ld
/* Root page number. */
diff --git a/db/btree/btree_auto.c b/db/btree/btree_auto.c
index fdb27b7d2..16ebbcad9 100644
--- a/db/btree/btree_auto.c
+++ b/db/btree/btree_auto.c
@@ -5,609 +5,30 @@
#include <sys/types.h>
#include <ctype.h>
-#include <errno.h>
#include <string.h>
#endif
#include "db_int.h"
-#include "db_page.h"
-#include "db_dispatch.h"
-#include "db_am.h"
-#include "btree.h"
-#include "txn.h"
-
+#include "dbinc/crypto.h"
+#include "dbinc/db_page.h"
+#include "dbinc/db_dispatch.h"
+#include "dbinc/db_am.h"
+#include "dbinc/btree.h"
+#include "dbinc/log.h"
+#include "dbinc/txn.h"
+
+/*
+ * PUBLIC: int __bam_split_log __P((DB *, DB_TXN *, DB_LSN *,
+ * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, u_int32_t,
+ * PUBLIC: db_pgno_t, DB_LSN *, db_pgno_t, const DBT *, u_int32_t));
+ */
int
-__bam_pg_alloc_log(dbenv, txnid, ret_lsnp, flags,
- fileid, meta_lsn, page_lsn, pgno, ptype, next)
- DB_ENV *dbenv;
+__bam_split_log(dbp, txnid, ret_lsnp, flags, left, llsn, right, rlsn, indx,
+ npgno, nlsn, root_pgno, pg, opflags)
+ DB *dbp;
DB_TXN *txnid;
DB_LSN *ret_lsnp;
u_int32_t flags;
- int32_t fileid;
- DB_LSN * meta_lsn;
- DB_LSN * page_lsn;
- db_pgno_t pgno;
- u_int32_t ptype;
- db_pgno_t next;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn;
- u_int32_t rectype, txn_num;
- int ret;
- u_int8_t *bp;
-
- rectype = DB_bam_pg_alloc;
- if (txnid != NULL &&
- TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
- return (ret);
- txn_num = txnid == NULL ? 0 : txnid->txnid;
- if (txnid == NULL) {
- ZERO_LSN(null_lsn);
- lsnp = &null_lsn;
- } else
- lsnp = &txnid->last_lsn;
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(*meta_lsn)
- + sizeof(*page_lsn)
- + sizeof(pgno)
- + sizeof(ptype)
- + sizeof(next);
- if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
-
- bp = logrec.data;
- memcpy(bp, &rectype, sizeof(rectype));
- bp += sizeof(rectype);
- memcpy(bp, &txn_num, sizeof(txn_num));
- bp += sizeof(txn_num);
- memcpy(bp, lsnp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- if (meta_lsn != NULL)
- memcpy(bp, meta_lsn, sizeof(*meta_lsn));
- else
- memset(bp, 0, sizeof(*meta_lsn));
- bp += sizeof(*meta_lsn);
- if (page_lsn != NULL)
- memcpy(bp, page_lsn, sizeof(*page_lsn));
- else
- memset(bp, 0, sizeof(*page_lsn));
- bp += sizeof(*page_lsn);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
- memcpy(bp, &ptype, sizeof(ptype));
- bp += sizeof(ptype);
- memcpy(bp, &next, sizeof(next));
- bp += sizeof(next);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
- ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, logrec.size);
- return (ret);
-}
-
-int
-__bam_pg_alloc_print(dbenv, dbtp, lsnp, notused2, notused3)
- DB_ENV *dbenv;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __bam_pg_alloc_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused2 = DB_TXN_ABORT;
- notused3 = NULL;
-
- if ((ret = __bam_pg_alloc_read(dbenv, dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_pg_alloc: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- printf("\tpage_lsn: [%lu][%lu]\n",
- (u_long)argp->page_lsn.file, (u_long)argp->page_lsn.offset);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tptype: %lu\n", (u_long)argp->ptype);
- printf("\tnext: %lu\n", (u_long)argp->next);
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-int
-__bam_pg_alloc_read(dbenv, recbuf, argpp)
- DB_ENV *dbenv;
- void *recbuf;
- __bam_pg_alloc_args **argpp;
-{
- __bam_pg_alloc_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(dbenv, sizeof(__bam_pg_alloc_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->meta_lsn, bp, sizeof(argp->meta_lsn));
- bp += sizeof(argp->meta_lsn);
- memcpy(&argp->page_lsn, bp, sizeof(argp->page_lsn));
- bp += sizeof(argp->page_lsn);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memcpy(&argp->ptype, bp, sizeof(argp->ptype));
- bp += sizeof(argp->ptype);
- memcpy(&argp->next, bp, sizeof(argp->next));
- bp += sizeof(argp->next);
- *argpp = argp;
- return (0);
-}
-
-int
-__bam_pg_alloc1_print(dbenv, dbtp, lsnp, notused2, notused3)
- DB_ENV *dbenv;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __bam_pg_alloc1_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused2 = DB_TXN_ABORT;
- notused3 = NULL;
-
- if ((ret = __bam_pg_alloc1_read(dbenv, dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_pg_alloc1: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- printf("\talloc_lsn: [%lu][%lu]\n",
- (u_long)argp->alloc_lsn.file, (u_long)argp->alloc_lsn.offset);
- printf("\tpage_lsn: [%lu][%lu]\n",
- (u_long)argp->page_lsn.file, (u_long)argp->page_lsn.offset);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tptype: %lu\n", (u_long)argp->ptype);
- printf("\tnext: %lu\n", (u_long)argp->next);
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-int
-__bam_pg_alloc1_read(dbenv, recbuf, argpp)
- DB_ENV *dbenv;
- void *recbuf;
- __bam_pg_alloc1_args **argpp;
-{
- __bam_pg_alloc1_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(dbenv, sizeof(__bam_pg_alloc1_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->meta_lsn, bp, sizeof(argp->meta_lsn));
- bp += sizeof(argp->meta_lsn);
- memcpy(&argp->alloc_lsn, bp, sizeof(argp->alloc_lsn));
- bp += sizeof(argp->alloc_lsn);
- memcpy(&argp->page_lsn, bp, sizeof(argp->page_lsn));
- bp += sizeof(argp->page_lsn);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memcpy(&argp->ptype, bp, sizeof(argp->ptype));
- bp += sizeof(argp->ptype);
- memcpy(&argp->next, bp, sizeof(argp->next));
- bp += sizeof(argp->next);
- *argpp = argp;
- return (0);
-}
-
-int
-__bam_pg_free_log(dbenv, txnid, ret_lsnp, flags,
- fileid, pgno, meta_lsn, header, next)
- DB_ENV *dbenv;
- DB_TXN *txnid;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- int32_t fileid;
- db_pgno_t pgno;
- DB_LSN * meta_lsn;
- const DBT *header;
- db_pgno_t next;
-{
- DBT logrec;
- DB_LSN *lsnp, null_lsn;
- u_int32_t zero;
- u_int32_t rectype, txn_num;
- int ret;
- u_int8_t *bp;
-
- rectype = DB_bam_pg_free;
- if (txnid != NULL &&
- TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
- return (ret);
- txn_num = txnid == NULL ? 0 : txnid->txnid;
- if (txnid == NULL) {
- ZERO_LSN(null_lsn);
- lsnp = &null_lsn;
- } else
- lsnp = &txnid->last_lsn;
- logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
- + sizeof(*meta_lsn)
- + sizeof(u_int32_t) + (header == NULL ? 0 : header->size)
- + sizeof(next);
- if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
-
- bp = logrec.data;
- memcpy(bp, &rectype, sizeof(rectype));
- bp += sizeof(rectype);
- memcpy(bp, &txn_num, sizeof(txn_num));
- bp += sizeof(txn_num);
- memcpy(bp, lsnp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
- if (meta_lsn != NULL)
- memcpy(bp, meta_lsn, sizeof(*meta_lsn));
- else
- memset(bp, 0, sizeof(*meta_lsn));
- bp += sizeof(*meta_lsn);
- if (header == NULL) {
- zero = 0;
- memcpy(bp, &zero, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- } else {
- memcpy(bp, &header->size, sizeof(header->size));
- bp += sizeof(header->size);
- memcpy(bp, header->data, header->size);
- bp += header->size;
- }
- memcpy(bp, &next, sizeof(next));
- bp += sizeof(next);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
- ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, logrec.size);
- return (ret);
-}
-
-int
-__bam_pg_free_print(dbenv, dbtp, lsnp, notused2, notused3)
- DB_ENV *dbenv;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __bam_pg_free_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused2 = DB_TXN_ABORT;
- notused3 = NULL;
-
- if ((ret = __bam_pg_free_read(dbenv, dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_pg_free: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- printf("\theader: ");
- for (i = 0; i < argp->header.size; i++) {
- ch = ((u_int8_t *)argp->header.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\tnext: %lu\n", (u_long)argp->next);
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-int
-__bam_pg_free_read(dbenv, recbuf, argpp)
- DB_ENV *dbenv;
- void *recbuf;
- __bam_pg_free_args **argpp;
-{
- __bam_pg_free_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(dbenv, sizeof(__bam_pg_free_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memcpy(&argp->meta_lsn, bp, sizeof(argp->meta_lsn));
- bp += sizeof(argp->meta_lsn);
- memset(&argp->header, 0, sizeof(argp->header));
- memcpy(&argp->header.size, bp, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- argp->header.data = bp;
- bp += argp->header.size;
- memcpy(&argp->next, bp, sizeof(argp->next));
- bp += sizeof(argp->next);
- *argpp = argp;
- return (0);
-}
-
-int
-__bam_pg_free1_print(dbenv, dbtp, lsnp, notused2, notused3)
- DB_ENV *dbenv;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __bam_pg_free1_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused2 = DB_TXN_ABORT;
- notused3 = NULL;
-
- if ((ret = __bam_pg_free1_read(dbenv, dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_pg_free1: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tmeta_lsn: [%lu][%lu]\n",
- (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- printf("\talloc_lsn: [%lu][%lu]\n",
- (u_long)argp->alloc_lsn.file, (u_long)argp->alloc_lsn.offset);
- printf("\theader: ");
- for (i = 0; i < argp->header.size; i++) {
- ch = ((u_int8_t *)argp->header.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\tnext: %lu\n", (u_long)argp->next);
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-int
-__bam_pg_free1_read(dbenv, recbuf, argpp)
- DB_ENV *dbenv;
- void *recbuf;
- __bam_pg_free1_args **argpp;
-{
- __bam_pg_free1_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(dbenv, sizeof(__bam_pg_free1_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memcpy(&argp->meta_lsn, bp, sizeof(argp->meta_lsn));
- bp += sizeof(argp->meta_lsn);
- memcpy(&argp->alloc_lsn, bp, sizeof(argp->alloc_lsn));
- bp += sizeof(argp->alloc_lsn);
- memset(&argp->header, 0, sizeof(argp->header));
- memcpy(&argp->header.size, bp, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- argp->header.data = bp;
- bp += argp->header.size;
- memcpy(&argp->next, bp, sizeof(argp->next));
- bp += sizeof(argp->next);
- *argpp = argp;
- return (0);
-}
-
-int
-__bam_split1_print(dbenv, dbtp, lsnp, notused2, notused3)
- DB_ENV *dbenv;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __bam_split1_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused2 = DB_TXN_ABORT;
- notused3 = NULL;
-
- if ((ret = __bam_split1_read(dbenv, dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_split1: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tleft: %lu\n", (u_long)argp->left);
- printf("\tllsn: [%lu][%lu]\n",
- (u_long)argp->llsn.file, (u_long)argp->llsn.offset);
- printf("\tright: %lu\n", (u_long)argp->right);
- printf("\trlsn: [%lu][%lu]\n",
- (u_long)argp->rlsn.file, (u_long)argp->rlsn.offset);
- printf("\tindx: %lu\n", (u_long)argp->indx);
- printf("\tnpgno: %lu\n", (u_long)argp->npgno);
- printf("\tnlsn: [%lu][%lu]\n",
- (u_long)argp->nlsn.file, (u_long)argp->nlsn.offset);
- printf("\tpg: ");
- for (i = 0; i < argp->pg.size; i++) {
- ch = ((u_int8_t *)argp->pg.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-int
-__bam_split1_read(dbenv, recbuf, argpp)
- DB_ENV *dbenv;
- void *recbuf;
- __bam_split1_args **argpp;
-{
- __bam_split1_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(dbenv, sizeof(__bam_split1_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->left, bp, sizeof(argp->left));
- bp += sizeof(argp->left);
- memcpy(&argp->llsn, bp, sizeof(argp->llsn));
- bp += sizeof(argp->llsn);
- memcpy(&argp->right, bp, sizeof(argp->right));
- bp += sizeof(argp->right);
- memcpy(&argp->rlsn, bp, sizeof(argp->rlsn));
- bp += sizeof(argp->rlsn);
- memcpy(&argp->indx, bp, sizeof(argp->indx));
- bp += sizeof(argp->indx);
- memcpy(&argp->npgno, bp, sizeof(argp->npgno));
- bp += sizeof(argp->npgno);
- memcpy(&argp->nlsn, bp, sizeof(argp->nlsn));
- bp += sizeof(argp->nlsn);
- memset(&argp->pg, 0, sizeof(argp->pg));
- memcpy(&argp->pg.size, bp, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- argp->pg.data = bp;
- bp += argp->pg.size;
- *argpp = argp;
- return (0);
-}
-
-int
-__bam_split_log(dbenv, txnid, ret_lsnp, flags,
- fileid, left, llsn, right, rlsn, indx,
- npgno, nlsn, root_pgno, pg, opflags)
- DB_ENV *dbenv;
- DB_TXN *txnid;
- DB_LSN *ret_lsnp;
- u_int32_t flags;
- int32_t fileid;
db_pgno_t left;
DB_LSN * llsn;
db_pgno_t right;
@@ -620,72 +41,140 @@ __bam_split_log(dbenv, txnid, ret_lsnp, flags,
u_int32_t opflags;
{
DBT logrec;
+ DB_ENV *dbenv;
+ DB_TXNLOGREC *lr;
DB_LSN *lsnp, null_lsn;
- u_int32_t zero;
- u_int32_t rectype, txn_num;
- int ret;
+ u_int32_t zero, uinttmp, rectype, txn_num;
+ u_int npad;
u_int8_t *bp;
-
- rectype = DB_bam_split;
- if (txnid != NULL &&
- TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
- return (ret);
- txn_num = txnid == NULL ? 0 : txnid->txnid;
+ int is_durable, ret;
+
+ dbenv = dbp->dbenv;
+ rectype = DB___bam_split;
+ npad = 0;
+
+ is_durable = 1;
+ if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
+ F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) ||
+ F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
+ if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL)
+ return (0);
+ is_durable = 0;
+ }
if (txnid == NULL) {
- ZERO_LSN(null_lsn);
+ txn_num = 0;
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
lsnp = &null_lsn;
- } else
+ } else {
+ if (TAILQ_FIRST(&txnid->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ return (ret);
+ txn_num = txnid->txnid;
lsnp = &txnid->last_lsn;
+ }
+
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(left)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ sizeof(*llsn)
- + sizeof(right)
+ + sizeof(u_int32_t)
+ sizeof(*rlsn)
- + sizeof(indx)
- + sizeof(npgno)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ sizeof(*nlsn)
- + sizeof(root_pgno)
+ + sizeof(u_int32_t)
+ sizeof(u_int32_t) + (pg == NULL ? 0 : pg->size)
- + sizeof(opflags);
- if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
+ + sizeof(u_int32_t);
+ if (CRYPTO_ON(dbenv)) {
+ npad =
+ ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size);
+ logrec.size += npad;
+ }
+
+ if (!is_durable && txnid != NULL) {
+ if ((ret = __os_malloc(dbenv,
+ logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
+ return (ret);
+#ifdef DIAGNOSTIC
+ goto do_malloc;
+#else
+ logrec.data = &lr->data;
+#endif
+ } else {
+#ifdef DIAGNOSTIC
+do_malloc:
+#endif
+ if ((ret =
+ __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) {
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL)
+ (void)__os_free(dbenv, lr);
+#endif
+ return (ret);
+ }
+ }
+ if (npad > 0)
+ memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
bp = logrec.data;
+
memcpy(bp, &rectype, sizeof(rectype));
bp += sizeof(rectype);
+
memcpy(bp, &txn_num, sizeof(txn_num));
bp += sizeof(txn_num);
+
memcpy(bp, lsnp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &left, sizeof(left));
- bp += sizeof(left);
+
+ DB_ASSERT(dbp->log_filename != NULL);
+ if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
+ (ret = __dbreg_lazy_id(dbp)) != 0)
+ return (ret);
+
+ uinttmp = (u_int32_t)dbp->log_filename->id;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)left;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
if (llsn != NULL)
memcpy(bp, llsn, sizeof(*llsn));
else
memset(bp, 0, sizeof(*llsn));
bp += sizeof(*llsn);
- memcpy(bp, &right, sizeof(right));
- bp += sizeof(right);
+
+ uinttmp = (u_int32_t)right;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
if (rlsn != NULL)
memcpy(bp, rlsn, sizeof(*rlsn));
else
memset(bp, 0, sizeof(*rlsn));
bp += sizeof(*rlsn);
- memcpy(bp, &indx, sizeof(indx));
- bp += sizeof(indx);
- memcpy(bp, &npgno, sizeof(npgno));
- bp += sizeof(npgno);
+
+ uinttmp = (u_int32_t)indx;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)npgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
if (nlsn != NULL)
memcpy(bp, nlsn, sizeof(*nlsn));
else
memset(bp, 0, sizeof(*nlsn));
bp += sizeof(*nlsn);
- memcpy(bp, &root_pgno, sizeof(root_pgno));
- bp += sizeof(root_pgno);
+
+ uinttmp = (u_int32_t)root_pgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
if (pg == NULL) {
zero = 0;
memcpy(bp, &zero, sizeof(u_int32_t));
@@ -696,16 +185,96 @@ __bam_split_log(dbenv, txnid, ret_lsnp, flags,
memcpy(bp, pg->data, pg->size);
bp += pg->size;
}
- memcpy(bp, &opflags, sizeof(opflags));
- bp += sizeof(opflags);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
- ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, logrec.size);
+
+ uinttmp = (u_int32_t)opflags;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL) {
+ /*
+ * We set the debug bit if we are going
+ * to log non-durable transactions so
+ * they will be ignored by recovery.
+ */
+ memcpy(lr->data, logrec.data, logrec.size);
+ rectype |= DB_debug_FLAG;
+ memcpy(logrec.data, &rectype, sizeof(rectype));
+ }
+#endif
+
+ if (!is_durable && txnid != NULL) {
+ ret = 0;
+ STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+#ifdef DIAGNOSTIC
+ goto do_put;
+#endif
+ } else{
+#ifdef DIAGNOSTIC
+do_put:
+#endif
+ ret = __log_put(dbenv,
+ ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
+ if (ret == 0 && txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ }
+
+ if (!is_durable)
+ LSN_NOT_LOGGED(*ret_lsnp);
+#ifdef LOG_DIAGNOSTIC
+ if (ret != 0)
+ (void)__bam_split_print(dbenv,
+ (DBT *)&logrec, ret_lsnp, NULL, NULL);
+#endif
+#ifndef DIAGNOSTIC
+ if (is_durable || txnid == NULL)
+#endif
+ __os_free(dbenv, logrec.data);
+
return (ret);
}
+#ifdef HAVE_REPLICATION
+/*
+ * PUBLIC: int __bam_split_getpgnos __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_split_getpgnos(dbenv, rec, lsnp, notused1, summary)
+ DB_ENV *dbenv;
+ DBT *rec;
+ DB_LSN *lsnp;
+ db_recops notused1;
+ void *summary;
+{
+ TXN_RECS *t;
+ int ret;
+ COMPQUIET(rec, NULL);
+ COMPQUIET(notused1, DB_TXN_ABORT);
+
+ t = (TXN_RECS *)summary;
+
+ if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0)
+ return (ret);
+
+ t->array[t->npages].flags = LSN_PAGE_NOLOCK;
+ t->array[t->npages].lsn = *lsnp;
+ t->array[t->npages].fid = DB_LOGFILEID_INVALID;
+ memset(&t->array[t->npages].pgdesc, 0,
+ sizeof(t->array[t->npages].pgdesc));
+
+ t->npages++;
+
+ return (0);
+}
+#endif /* HAVE_REPLICATION */
+
+/*
+ * PUBLIC: int __bam_split_print __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
int
__bam_split_print(dbenv, dbtp, lsnp, notused2, notused3)
DB_ENV *dbenv;
@@ -716,50 +285,51 @@ __bam_split_print(dbenv, dbtp, lsnp, notused2, notused3)
{
__bam_split_args *argp;
u_int32_t i;
- u_int ch;
+ int ch;
int ret;
- i = 0;
- ch = 0;
notused2 = DB_TXN_ABORT;
notused3 = NULL;
if ((ret = __bam_split_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
- printf("[%lu][%lu]bam_split: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (void)printf(
+ "[%lu][%lu]__bam_split%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
(u_long)lsnp->file,
(u_long)lsnp->offset,
+ (argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
(u_long)argp->txnid->txnid,
(u_long)argp->prev_lsn.file,
(u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tleft: %lu\n", (u_long)argp->left);
- printf("\tllsn: [%lu][%lu]\n",
+ (void)printf("\tfileid: %ld\n", (long)argp->fileid);
+ (void)printf("\tleft: %lu\n", (u_long)argp->left);
+ (void)printf("\tllsn: [%lu][%lu]\n",
(u_long)argp->llsn.file, (u_long)argp->llsn.offset);
- printf("\tright: %lu\n", (u_long)argp->right);
- printf("\trlsn: [%lu][%lu]\n",
+ (void)printf("\tright: %lu\n", (u_long)argp->right);
+ (void)printf("\trlsn: [%lu][%lu]\n",
(u_long)argp->rlsn.file, (u_long)argp->rlsn.offset);
- printf("\tindx: %lu\n", (u_long)argp->indx);
- printf("\tnpgno: %lu\n", (u_long)argp->npgno);
- printf("\tnlsn: [%lu][%lu]\n",
+ (void)printf("\tindx: %lu\n", (u_long)argp->indx);
+ (void)printf("\tnpgno: %lu\n", (u_long)argp->npgno);
+ (void)printf("\tnlsn: [%lu][%lu]\n",
(u_long)argp->nlsn.file, (u_long)argp->nlsn.offset);
- printf("\troot_pgno: %lu\n", (u_long)argp->root_pgno);
- printf("\tpg: ");
+ (void)printf("\troot_pgno: %lu\n", (u_long)argp->root_pgno);
+ (void)printf("\tpg: ");
for (i = 0; i < argp->pg.size; i++) {
ch = ((u_int8_t *)argp->pg.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\topflags: %lu\n", (u_long)argp->opflags);
- printf("\n");
- __os_free(argp, 0);
+ printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
+ }
+ (void)printf("\n");
+ (void)printf("\topflags: %lu\n", (u_long)argp->opflags);
+ (void)printf("\n");
+ __os_free(dbenv, argp);
+
return (0);
}
+/*
+ * PUBLIC: int __bam_split_read __P((DB_ENV *, void *, __bam_split_args **));
+ */
int
__bam_split_read(dbenv, recbuf, argpp)
DB_ENV *dbenv;
@@ -767,158 +337,84 @@ __bam_split_read(dbenv, recbuf, argpp)
__bam_split_args **argpp;
{
__bam_split_args *argp;
+ u_int32_t uinttmp;
u_int8_t *bp;
int ret;
- ret = __os_malloc(dbenv, sizeof(__bam_split_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
+ if ((ret = __os_malloc(dbenv,
+ sizeof(__bam_split_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
argp->txnid = (DB_TXN *)&argp[1];
+
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
+
memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
bp += sizeof(argp->txnid->txnid);
+
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->left, bp, sizeof(argp->left));
- bp += sizeof(argp->left);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->fileid = (int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->left = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
memcpy(&argp->llsn, bp, sizeof(argp->llsn));
bp += sizeof(argp->llsn);
- memcpy(&argp->right, bp, sizeof(argp->right));
- bp += sizeof(argp->right);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->right = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
memcpy(&argp->rlsn, bp, sizeof(argp->rlsn));
bp += sizeof(argp->rlsn);
- memcpy(&argp->indx, bp, sizeof(argp->indx));
- bp += sizeof(argp->indx);
- memcpy(&argp->npgno, bp, sizeof(argp->npgno));
- bp += sizeof(argp->npgno);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->indx = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->npgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
memcpy(&argp->nlsn, bp, sizeof(argp->nlsn));
bp += sizeof(argp->nlsn);
- memcpy(&argp->root_pgno, bp, sizeof(argp->root_pgno));
- bp += sizeof(argp->root_pgno);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->root_pgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
memset(&argp->pg, 0, sizeof(argp->pg));
memcpy(&argp->pg.size, bp, sizeof(u_int32_t));
bp += sizeof(u_int32_t);
argp->pg.data = bp;
bp += argp->pg.size;
- memcpy(&argp->opflags, bp, sizeof(argp->opflags));
- bp += sizeof(argp->opflags);
- *argpp = argp;
- return (0);
-}
-int
-__bam_rsplit1_print(dbenv, dbtp, lsnp, notused2, notused3)
- DB_ENV *dbenv;
- DBT *dbtp;
- DB_LSN *lsnp;
- db_recops notused2;
- void *notused3;
-{
- __bam_rsplit1_args *argp;
- u_int32_t i;
- u_int ch;
- int ret;
-
- i = 0;
- ch = 0;
- notused2 = DB_TXN_ABORT;
- notused3 = NULL;
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->opflags = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
- if ((ret = __bam_rsplit1_read(dbenv, dbtp->data, &argp)) != 0)
- return (ret);
- printf("[%lu][%lu]bam_rsplit1: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
- (u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tpgdbt: ");
- for (i = 0; i < argp->pgdbt.size; i++) {
- ch = ((u_int8_t *)argp->pgdbt.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\tnrec: %lu\n", (u_long)argp->nrec);
- printf("\trootent: ");
- for (i = 0; i < argp->rootent.size; i++) {
- ch = ((u_int8_t *)argp->rootent.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\trootlsn: [%lu][%lu]\n",
- (u_long)argp->rootlsn.file, (u_long)argp->rootlsn.offset);
- printf("\n");
- __os_free(argp, 0);
- return (0);
-}
-
-int
-__bam_rsplit1_read(dbenv, recbuf, argpp)
- DB_ENV *dbenv;
- void *recbuf;
- __bam_rsplit1_args **argpp;
-{
- __bam_rsplit1_args *argp;
- u_int8_t *bp;
- int ret;
-
- ret = __os_malloc(dbenv, sizeof(__bam_rsplit1_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
- return (ret);
- argp->txnid = (DB_TXN *)&argp[1];
- bp = recbuf;
- memcpy(&argp->type, bp, sizeof(argp->type));
- bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
- memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
- bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
- memset(&argp->pgdbt, 0, sizeof(argp->pgdbt));
- memcpy(&argp->pgdbt.size, bp, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- argp->pgdbt.data = bp;
- bp += argp->pgdbt.size;
- memcpy(&argp->nrec, bp, sizeof(argp->nrec));
- bp += sizeof(argp->nrec);
- memset(&argp->rootent, 0, sizeof(argp->rootent));
- memcpy(&argp->rootent.size, bp, sizeof(u_int32_t));
- bp += sizeof(u_int32_t);
- argp->rootent.data = bp;
- bp += argp->rootent.size;
- memcpy(&argp->rootlsn, bp, sizeof(argp->rootlsn));
- bp += sizeof(argp->rootlsn);
*argpp = argp;
return (0);
}
+/*
+ * PUBLIC: int __bam_rsplit_log __P((DB *, DB_TXN *, DB_LSN *,
+ * PUBLIC: u_int32_t, db_pgno_t, const DBT *, db_pgno_t, db_pgno_t,
+ * PUBLIC: const DBT *, DB_LSN *));
+ */
int
-__bam_rsplit_log(dbenv, txnid, ret_lsnp, flags,
- fileid, pgno, pgdbt, root_pgno, nrec, rootent,
- rootlsn)
- DB_ENV *dbenv;
+__bam_rsplit_log(dbp, txnid, ret_lsnp, flags, pgno, pgdbt, root_pgno, nrec, rootent,
+ rootlsn)
+ DB *dbp;
DB_TXN *txnid;
DB_LSN *ret_lsnp;
u_int32_t flags;
- int32_t fileid;
db_pgno_t pgno;
const DBT *pgdbt;
db_pgno_t root_pgno;
@@ -927,45 +423,102 @@ __bam_rsplit_log(dbenv, txnid, ret_lsnp, flags,
DB_LSN * rootlsn;
{
DBT logrec;
+ DB_ENV *dbenv;
+ DB_TXNLOGREC *lr;
DB_LSN *lsnp, null_lsn;
- u_int32_t zero;
- u_int32_t rectype, txn_num;
- int ret;
+ u_int32_t zero, uinttmp, rectype, txn_num;
+ u_int npad;
u_int8_t *bp;
-
- rectype = DB_bam_rsplit;
- if (txnid != NULL &&
- TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
- return (ret);
- txn_num = txnid == NULL ? 0 : txnid->txnid;
+ int is_durable, ret;
+
+ dbenv = dbp->dbenv;
+ rectype = DB___bam_rsplit;
+ npad = 0;
+
+ is_durable = 1;
+ if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
+ F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) ||
+ F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
+ if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL)
+ return (0);
+ is_durable = 0;
+ }
if (txnid == NULL) {
- ZERO_LSN(null_lsn);
+ txn_num = 0;
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
lsnp = &null_lsn;
- } else
+ } else {
+ if (TAILQ_FIRST(&txnid->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ return (ret);
+ txn_num = txnid->txnid;
lsnp = &txnid->last_lsn;
+ }
+
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ sizeof(u_int32_t) + (pgdbt == NULL ? 0 : pgdbt->size)
- + sizeof(root_pgno)
- + sizeof(nrec)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ sizeof(u_int32_t) + (rootent == NULL ? 0 : rootent->size)
+ sizeof(*rootlsn);
- if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
+ if (CRYPTO_ON(dbenv)) {
+ npad =
+ ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size);
+ logrec.size += npad;
+ }
+
+ if (!is_durable && txnid != NULL) {
+ if ((ret = __os_malloc(dbenv,
+ logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
+ return (ret);
+#ifdef DIAGNOSTIC
+ goto do_malloc;
+#else
+ logrec.data = &lr->data;
+#endif
+ } else {
+#ifdef DIAGNOSTIC
+do_malloc:
+#endif
+ if ((ret =
+ __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) {
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL)
+ (void)__os_free(dbenv, lr);
+#endif
+ return (ret);
+ }
+ }
+ if (npad > 0)
+ memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
bp = logrec.data;
+
memcpy(bp, &rectype, sizeof(rectype));
bp += sizeof(rectype);
+
memcpy(bp, &txn_num, sizeof(txn_num));
bp += sizeof(txn_num);
+
memcpy(bp, lsnp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
+
+ DB_ASSERT(dbp->log_filename != NULL);
+ if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
+ (ret = __dbreg_lazy_id(dbp)) != 0)
+ return (ret);
+
+ uinttmp = (u_int32_t)dbp->log_filename->id;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)pgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
if (pgdbt == NULL) {
zero = 0;
memcpy(bp, &zero, sizeof(u_int32_t));
@@ -976,10 +529,15 @@ __bam_rsplit_log(dbenv, txnid, ret_lsnp, flags,
memcpy(bp, pgdbt->data, pgdbt->size);
bp += pgdbt->size;
}
- memcpy(bp, &root_pgno, sizeof(root_pgno));
- bp += sizeof(root_pgno);
- memcpy(bp, &nrec, sizeof(nrec));
- bp += sizeof(nrec);
+
+ uinttmp = (u_int32_t)root_pgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)nrec;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
if (rootent == NULL) {
zero = 0;
memcpy(bp, &zero, sizeof(u_int32_t));
@@ -990,19 +548,98 @@ __bam_rsplit_log(dbenv, txnid, ret_lsnp, flags,
memcpy(bp, rootent->data, rootent->size);
bp += rootent->size;
}
+
if (rootlsn != NULL)
memcpy(bp, rootlsn, sizeof(*rootlsn));
else
memset(bp, 0, sizeof(*rootlsn));
bp += sizeof(*rootlsn);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
- ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, logrec.size);
+
+ DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL) {
+ /*
+ * We set the debug bit if we are going
+ * to log non-durable transactions so
+ * they will be ignored by recovery.
+ */
+ memcpy(lr->data, logrec.data, logrec.size);
+ rectype |= DB_debug_FLAG;
+ memcpy(logrec.data, &rectype, sizeof(rectype));
+ }
+#endif
+
+ if (!is_durable && txnid != NULL) {
+ ret = 0;
+ STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+#ifdef DIAGNOSTIC
+ goto do_put;
+#endif
+ } else{
+#ifdef DIAGNOSTIC
+do_put:
+#endif
+ ret = __log_put(dbenv,
+ ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
+ if (ret == 0 && txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ }
+
+ if (!is_durable)
+ LSN_NOT_LOGGED(*ret_lsnp);
+#ifdef LOG_DIAGNOSTIC
+ if (ret != 0)
+ (void)__bam_rsplit_print(dbenv,
+ (DBT *)&logrec, ret_lsnp, NULL, NULL);
+#endif
+#ifndef DIAGNOSTIC
+ if (is_durable || txnid == NULL)
+#endif
+ __os_free(dbenv, logrec.data);
+
return (ret);
}
+#ifdef HAVE_REPLICATION
+/*
+ * PUBLIC: int __bam_rsplit_getpgnos __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_rsplit_getpgnos(dbenv, rec, lsnp, notused1, summary)
+ DB_ENV *dbenv;
+ DBT *rec;
+ DB_LSN *lsnp;
+ db_recops notused1;
+ void *summary;
+{
+ TXN_RECS *t;
+ int ret;
+ COMPQUIET(rec, NULL);
+ COMPQUIET(notused1, DB_TXN_ABORT);
+
+ t = (TXN_RECS *)summary;
+
+ if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0)
+ return (ret);
+
+ t->array[t->npages].flags = LSN_PAGE_NOLOCK;
+ t->array[t->npages].lsn = *lsnp;
+ t->array[t->npages].fid = DB_LOGFILEID_INVALID;
+ memset(&t->array[t->npages].pgdesc, 0,
+ sizeof(t->array[t->npages].pgdesc));
+
+ t->npages++;
+
+ return (0);
+}
+#endif /* HAVE_REPLICATION */
+
+/*
+ * PUBLIC: int __bam_rsplit_print __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
int
__bam_rsplit_print(dbenv, dbtp, lsnp, notused2, notused3)
DB_ENV *dbenv;
@@ -1013,52 +650,50 @@ __bam_rsplit_print(dbenv, dbtp, lsnp, notused2, notused3)
{
__bam_rsplit_args *argp;
u_int32_t i;
- u_int ch;
+ int ch;
int ret;
- i = 0;
- ch = 0;
notused2 = DB_TXN_ABORT;
notused3 = NULL;
if ((ret = __bam_rsplit_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
- printf("[%lu][%lu]bam_rsplit: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (void)printf(
+ "[%lu][%lu]__bam_rsplit%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
(u_long)lsnp->file,
(u_long)lsnp->offset,
+ (argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
(u_long)argp->txnid->txnid,
(u_long)argp->prev_lsn.file,
(u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tpgdbt: ");
+ (void)printf("\tfileid: %ld\n", (long)argp->fileid);
+ (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
+ (void)printf("\tpgdbt: ");
for (i = 0; i < argp->pgdbt.size; i++) {
ch = ((u_int8_t *)argp->pgdbt.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\troot_pgno: %lu\n", (u_long)argp->root_pgno);
- printf("\tnrec: %lu\n", (u_long)argp->nrec);
- printf("\trootent: ");
+ printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
+ }
+ (void)printf("\n");
+ (void)printf("\troot_pgno: %lu\n", (u_long)argp->root_pgno);
+ (void)printf("\tnrec: %lu\n", (u_long)argp->nrec);
+ (void)printf("\trootent: ");
for (i = 0; i < argp->rootent.size; i++) {
ch = ((u_int8_t *)argp->rootent.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
+ printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
}
- printf("\n");
- printf("\trootlsn: [%lu][%lu]\n",
+ (void)printf("\n");
+ (void)printf("\trootlsn: [%lu][%lu]\n",
(u_long)argp->rootlsn.file, (u_long)argp->rootlsn.offset);
- printf("\n");
- __os_free(argp, 0);
+ (void)printf("\n");
+ __os_free(dbenv, argp);
+
return (0);
}
+/*
+ * PUBLIC: int __bam_rsplit_read __P((DB_ENV *, void *, __bam_rsplit_args **));
+ */
int
__bam_rsplit_read(dbenv, recbuf, argpp)
DB_ENV *dbenv;
@@ -1066,53 +701,71 @@ __bam_rsplit_read(dbenv, recbuf, argpp)
__bam_rsplit_args **argpp;
{
__bam_rsplit_args *argp;
+ u_int32_t uinttmp;
u_int8_t *bp;
int ret;
- ret = __os_malloc(dbenv, sizeof(__bam_rsplit_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
+ if ((ret = __os_malloc(dbenv,
+ sizeof(__bam_rsplit_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
argp->txnid = (DB_TXN *)&argp[1];
+
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
+
memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
bp += sizeof(argp->txnid->txnid);
+
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->fileid = (int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->pgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
memset(&argp->pgdbt, 0, sizeof(argp->pgdbt));
memcpy(&argp->pgdbt.size, bp, sizeof(u_int32_t));
bp += sizeof(u_int32_t);
argp->pgdbt.data = bp;
bp += argp->pgdbt.size;
- memcpy(&argp->root_pgno, bp, sizeof(argp->root_pgno));
- bp += sizeof(argp->root_pgno);
- memcpy(&argp->nrec, bp, sizeof(argp->nrec));
- bp += sizeof(argp->nrec);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->root_pgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->nrec = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
memset(&argp->rootent, 0, sizeof(argp->rootent));
memcpy(&argp->rootent.size, bp, sizeof(u_int32_t));
bp += sizeof(u_int32_t);
argp->rootent.data = bp;
bp += argp->rootent.size;
+
memcpy(&argp->rootlsn, bp, sizeof(argp->rootlsn));
bp += sizeof(argp->rootlsn);
+
*argpp = argp;
return (0);
}
+/*
+ * PUBLIC: int __bam_adj_log __P((DB *, DB_TXN *, DB_LSN *,
+ * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t, u_int32_t,
+ * PUBLIC: u_int32_t));
+ */
int
-__bam_adj_log(dbenv, txnid, ret_lsnp, flags,
- fileid, pgno, lsn, indx, indx_copy, is_insert)
- DB_ENV *dbenv;
+__bam_adj_log(dbp, txnid, ret_lsnp, flags, pgno, lsn, indx, indx_copy, is_insert)
+ DB *dbp;
DB_TXN *txnid;
DB_LSN *ret_lsnp;
u_int32_t flags;
- int32_t fileid;
db_pgno_t pgno;
DB_LSN * lsn;
u_int32_t indx;
@@ -1120,62 +773,204 @@ __bam_adj_log(dbenv, txnid, ret_lsnp, flags,
u_int32_t is_insert;
{
DBT logrec;
+ DB_ENV *dbenv;
+ DB_TXNLOGREC *lr;
DB_LSN *lsnp, null_lsn;
- u_int32_t rectype, txn_num;
- int ret;
+ u_int32_t uinttmp, rectype, txn_num;
+ u_int npad;
u_int8_t *bp;
-
- rectype = DB_bam_adj;
- if (txnid != NULL &&
- TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
- return (ret);
- txn_num = txnid == NULL ? 0 : txnid->txnid;
+ int is_durable, ret;
+
+ dbenv = dbp->dbenv;
+ rectype = DB___bam_adj;
+ npad = 0;
+
+ is_durable = 1;
+ if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
+ F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) ||
+ F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
+ if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL)
+ return (0);
+ is_durable = 0;
+ }
if (txnid == NULL) {
- ZERO_LSN(null_lsn);
+ txn_num = 0;
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
lsnp = &null_lsn;
- } else
+ } else {
+ if (TAILQ_FIRST(&txnid->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ return (ret);
+ txn_num = txnid->txnid;
lsnp = &txnid->last_lsn;
+ }
+
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ sizeof(*lsn)
- + sizeof(indx)
- + sizeof(indx_copy)
- + sizeof(is_insert);
- if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t);
+ if (CRYPTO_ON(dbenv)) {
+ npad =
+ ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size);
+ logrec.size += npad;
+ }
+
+ if (!is_durable && txnid != NULL) {
+ if ((ret = __os_malloc(dbenv,
+ logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
+ return (ret);
+#ifdef DIAGNOSTIC
+ goto do_malloc;
+#else
+ logrec.data = &lr->data;
+#endif
+ } else {
+#ifdef DIAGNOSTIC
+do_malloc:
+#endif
+ if ((ret =
+ __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) {
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL)
+ (void)__os_free(dbenv, lr);
+#endif
+ return (ret);
+ }
+ }
+ if (npad > 0)
+ memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
bp = logrec.data;
+
memcpy(bp, &rectype, sizeof(rectype));
bp += sizeof(rectype);
+
memcpy(bp, &txn_num, sizeof(txn_num));
bp += sizeof(txn_num);
+
memcpy(bp, lsnp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
+
+ DB_ASSERT(dbp->log_filename != NULL);
+ if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
+ (ret = __dbreg_lazy_id(dbp)) != 0)
+ return (ret);
+
+ uinttmp = (u_int32_t)dbp->log_filename->id;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)pgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
if (lsn != NULL)
memcpy(bp, lsn, sizeof(*lsn));
else
memset(bp, 0, sizeof(*lsn));
bp += sizeof(*lsn);
- memcpy(bp, &indx, sizeof(indx));
- bp += sizeof(indx);
- memcpy(bp, &indx_copy, sizeof(indx_copy));
- bp += sizeof(indx_copy);
- memcpy(bp, &is_insert, sizeof(is_insert));
- bp += sizeof(is_insert);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
- ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, logrec.size);
+
+ uinttmp = (u_int32_t)indx;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)indx_copy;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)is_insert;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL) {
+ /*
+ * We set the debug bit if we are going
+ * to log non-durable transactions so
+ * they will be ignored by recovery.
+ */
+ memcpy(lr->data, logrec.data, logrec.size);
+ rectype |= DB_debug_FLAG;
+ memcpy(logrec.data, &rectype, sizeof(rectype));
+ }
+#endif
+
+ if (!is_durable && txnid != NULL) {
+ ret = 0;
+ STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+#ifdef DIAGNOSTIC
+ goto do_put;
+#endif
+ } else{
+#ifdef DIAGNOSTIC
+do_put:
+#endif
+ ret = __log_put(dbenv,
+ ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
+ if (ret == 0 && txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ }
+
+ if (!is_durable)
+ LSN_NOT_LOGGED(*ret_lsnp);
+#ifdef LOG_DIAGNOSTIC
+ if (ret != 0)
+ (void)__bam_adj_print(dbenv,
+ (DBT *)&logrec, ret_lsnp, NULL, NULL);
+#endif
+#ifndef DIAGNOSTIC
+ if (is_durable || txnid == NULL)
+#endif
+ __os_free(dbenv, logrec.data);
+
return (ret);
}
+#ifdef HAVE_REPLICATION
+/*
+ * PUBLIC: int __bam_adj_getpgnos __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_adj_getpgnos(dbenv, rec, lsnp, notused1, summary)
+ DB_ENV *dbenv;
+ DBT *rec;
+ DB_LSN *lsnp;
+ db_recops notused1;
+ void *summary;
+{
+ TXN_RECS *t;
+ int ret;
+ COMPQUIET(rec, NULL);
+ COMPQUIET(notused1, DB_TXN_ABORT);
+
+ t = (TXN_RECS *)summary;
+
+ if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0)
+ return (ret);
+
+ t->array[t->npages].flags = LSN_PAGE_NOLOCK;
+ t->array[t->npages].lsn = *lsnp;
+ t->array[t->npages].fid = DB_LOGFILEID_INVALID;
+ memset(&t->array[t->npages].pgdesc, 0,
+ sizeof(t->array[t->npages].pgdesc));
+
+ t->npages++;
+
+ return (0);
+}
+#endif /* HAVE_REPLICATION */
+
+/*
+ * PUBLIC: int __bam_adj_print __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
int
__bam_adj_print(dbenv, dbtp, lsnp, notused2, notused3)
DB_ENV *dbenv;
@@ -1185,36 +980,38 @@ __bam_adj_print(dbenv, dbtp, lsnp, notused2, notused3)
void *notused3;
{
__bam_adj_args *argp;
- u_int32_t i;
- u_int ch;
int ret;
- i = 0;
- ch = 0;
notused2 = DB_TXN_ABORT;
notused3 = NULL;
if ((ret = __bam_adj_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
- printf("[%lu][%lu]bam_adj: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (void)printf(
+ "[%lu][%lu]__bam_adj%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
(u_long)lsnp->file,
(u_long)lsnp->offset,
+ (argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
(u_long)argp->txnid->txnid,
(u_long)argp->prev_lsn.file,
(u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tlsn: [%lu][%lu]\n",
+ (void)printf("\tfileid: %ld\n", (long)argp->fileid);
+ (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
+ (void)printf("\tlsn: [%lu][%lu]\n",
(u_long)argp->lsn.file, (u_long)argp->lsn.offset);
- printf("\tindx: %lu\n", (u_long)argp->indx);
- printf("\tindx_copy: %lu\n", (u_long)argp->indx_copy);
- printf("\tis_insert: %lu\n", (u_long)argp->is_insert);
- printf("\n");
- __os_free(argp, 0);
+ (void)printf("\tindx: %lu\n", (u_long)argp->indx);
+ (void)printf("\tindx_copy: %lu\n", (u_long)argp->indx_copy);
+ (void)printf("\tis_insert: %lu\n", (u_long)argp->is_insert);
+ (void)printf("\n");
+ __os_free(dbenv, argp);
+
return (0);
}
+/*
+ * PUBLIC: int __bam_adj_read __P((DB_ENV *, void *, __bam_adj_args **));
+ */
int
__bam_adj_read(dbenv, recbuf, argpp)
DB_ENV *dbenv;
@@ -1222,45 +1019,62 @@ __bam_adj_read(dbenv, recbuf, argpp)
__bam_adj_args **argpp;
{
__bam_adj_args *argp;
+ u_int32_t uinttmp;
u_int8_t *bp;
int ret;
- ret = __os_malloc(dbenv, sizeof(__bam_adj_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
+ if ((ret = __os_malloc(dbenv,
+ sizeof(__bam_adj_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
argp->txnid = (DB_TXN *)&argp[1];
+
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
+
memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
bp += sizeof(argp->txnid->txnid);
+
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->fileid = (int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->pgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
memcpy(&argp->lsn, bp, sizeof(argp->lsn));
bp += sizeof(argp->lsn);
- memcpy(&argp->indx, bp, sizeof(argp->indx));
- bp += sizeof(argp->indx);
- memcpy(&argp->indx_copy, bp, sizeof(argp->indx_copy));
- bp += sizeof(argp->indx_copy);
- memcpy(&argp->is_insert, bp, sizeof(argp->is_insert));
- bp += sizeof(argp->is_insert);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->indx = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->indx_copy = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->is_insert = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
*argpp = argp;
return (0);
}
+/*
+ * PUBLIC: int __bam_cadjust_log __P((DB *, DB_TXN *, DB_LSN *,
+ * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t, int32_t, u_int32_t));
+ */
int
-__bam_cadjust_log(dbenv, txnid, ret_lsnp, flags,
- fileid, pgno, lsn, indx, adjust, opflags)
- DB_ENV *dbenv;
+__bam_cadjust_log(dbp, txnid, ret_lsnp, flags, pgno, lsn, indx, adjust, opflags)
+ DB *dbp;
DB_TXN *txnid;
DB_LSN *ret_lsnp;
u_int32_t flags;
- int32_t fileid;
db_pgno_t pgno;
DB_LSN * lsn;
u_int32_t indx;
@@ -1268,62 +1082,204 @@ __bam_cadjust_log(dbenv, txnid, ret_lsnp, flags,
u_int32_t opflags;
{
DBT logrec;
+ DB_ENV *dbenv;
+ DB_TXNLOGREC *lr;
DB_LSN *lsnp, null_lsn;
- u_int32_t rectype, txn_num;
- int ret;
+ u_int32_t uinttmp, rectype, txn_num;
+ u_int npad;
u_int8_t *bp;
-
- rectype = DB_bam_cadjust;
- if (txnid != NULL &&
- TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
- return (ret);
- txn_num = txnid == NULL ? 0 : txnid->txnid;
+ int is_durable, ret;
+
+ dbenv = dbp->dbenv;
+ rectype = DB___bam_cadjust;
+ npad = 0;
+
+ is_durable = 1;
+ if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
+ F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) ||
+ F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
+ if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL)
+ return (0);
+ is_durable = 0;
+ }
if (txnid == NULL) {
- ZERO_LSN(null_lsn);
+ txn_num = 0;
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
lsnp = &null_lsn;
- } else
+ } else {
+ if (TAILQ_FIRST(&txnid->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ return (ret);
+ txn_num = txnid->txnid;
lsnp = &txnid->last_lsn;
+ }
+
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ sizeof(*lsn)
- + sizeof(indx)
- + sizeof(adjust)
- + sizeof(opflags);
- if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t);
+ if (CRYPTO_ON(dbenv)) {
+ npad =
+ ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size);
+ logrec.size += npad;
+ }
+
+ if (!is_durable && txnid != NULL) {
+ if ((ret = __os_malloc(dbenv,
+ logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
+ return (ret);
+#ifdef DIAGNOSTIC
+ goto do_malloc;
+#else
+ logrec.data = &lr->data;
+#endif
+ } else {
+#ifdef DIAGNOSTIC
+do_malloc:
+#endif
+ if ((ret =
+ __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) {
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL)
+ (void)__os_free(dbenv, lr);
+#endif
+ return (ret);
+ }
+ }
+ if (npad > 0)
+ memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
bp = logrec.data;
+
memcpy(bp, &rectype, sizeof(rectype));
bp += sizeof(rectype);
+
memcpy(bp, &txn_num, sizeof(txn_num));
bp += sizeof(txn_num);
+
memcpy(bp, lsnp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
+
+ DB_ASSERT(dbp->log_filename != NULL);
+ if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
+ (ret = __dbreg_lazy_id(dbp)) != 0)
+ return (ret);
+
+ uinttmp = (u_int32_t)dbp->log_filename->id;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)pgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
if (lsn != NULL)
memcpy(bp, lsn, sizeof(*lsn));
else
memset(bp, 0, sizeof(*lsn));
bp += sizeof(*lsn);
- memcpy(bp, &indx, sizeof(indx));
- bp += sizeof(indx);
- memcpy(bp, &adjust, sizeof(adjust));
- bp += sizeof(adjust);
- memcpy(bp, &opflags, sizeof(opflags));
- bp += sizeof(opflags);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
- ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, logrec.size);
+
+ uinttmp = (u_int32_t)indx;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)adjust;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)opflags;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL) {
+ /*
+ * We set the debug bit if we are going
+ * to log non-durable transactions so
+ * they will be ignored by recovery.
+ */
+ memcpy(lr->data, logrec.data, logrec.size);
+ rectype |= DB_debug_FLAG;
+ memcpy(logrec.data, &rectype, sizeof(rectype));
+ }
+#endif
+
+ if (!is_durable && txnid != NULL) {
+ ret = 0;
+ STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+#ifdef DIAGNOSTIC
+ goto do_put;
+#endif
+ } else{
+#ifdef DIAGNOSTIC
+do_put:
+#endif
+ ret = __log_put(dbenv,
+ ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
+ if (ret == 0 && txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ }
+
+ if (!is_durable)
+ LSN_NOT_LOGGED(*ret_lsnp);
+#ifdef LOG_DIAGNOSTIC
+ if (ret != 0)
+ (void)__bam_cadjust_print(dbenv,
+ (DBT *)&logrec, ret_lsnp, NULL, NULL);
+#endif
+#ifndef DIAGNOSTIC
+ if (is_durable || txnid == NULL)
+#endif
+ __os_free(dbenv, logrec.data);
+
return (ret);
}
+#ifdef HAVE_REPLICATION
+/*
+ * PUBLIC: int __bam_cadjust_getpgnos __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_cadjust_getpgnos(dbenv, rec, lsnp, notused1, summary)
+ DB_ENV *dbenv;
+ DBT *rec;
+ DB_LSN *lsnp;
+ db_recops notused1;
+ void *summary;
+{
+ TXN_RECS *t;
+ int ret;
+ COMPQUIET(rec, NULL);
+ COMPQUIET(notused1, DB_TXN_ABORT);
+
+ t = (TXN_RECS *)summary;
+
+ if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0)
+ return (ret);
+
+ t->array[t->npages].flags = LSN_PAGE_NOLOCK;
+ t->array[t->npages].lsn = *lsnp;
+ t->array[t->npages].fid = DB_LOGFILEID_INVALID;
+ memset(&t->array[t->npages].pgdesc, 0,
+ sizeof(t->array[t->npages].pgdesc));
+
+ t->npages++;
+
+ return (0);
+}
+#endif /* HAVE_REPLICATION */
+
+/*
+ * PUBLIC: int __bam_cadjust_print __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
int
__bam_cadjust_print(dbenv, dbtp, lsnp, notused2, notused3)
DB_ENV *dbenv;
@@ -1333,36 +1289,39 @@ __bam_cadjust_print(dbenv, dbtp, lsnp, notused2, notused3)
void *notused3;
{
__bam_cadjust_args *argp;
- u_int32_t i;
- u_int ch;
int ret;
- i = 0;
- ch = 0;
notused2 = DB_TXN_ABORT;
notused3 = NULL;
if ((ret = __bam_cadjust_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
- printf("[%lu][%lu]bam_cadjust: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (void)printf(
+ "[%lu][%lu]__bam_cadjust%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
(u_long)lsnp->file,
(u_long)lsnp->offset,
+ (argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
(u_long)argp->txnid->txnid,
(u_long)argp->prev_lsn.file,
(u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tlsn: [%lu][%lu]\n",
+ (void)printf("\tfileid: %ld\n", (long)argp->fileid);
+ (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
+ (void)printf("\tlsn: [%lu][%lu]\n",
(u_long)argp->lsn.file, (u_long)argp->lsn.offset);
- printf("\tindx: %lu\n", (u_long)argp->indx);
- printf("\tadjust: %ld\n", (long)argp->adjust);
- printf("\topflags: %lu\n", (u_long)argp->opflags);
- printf("\n");
- __os_free(argp, 0);
+ (void)printf("\tindx: %lu\n", (u_long)argp->indx);
+ (void)printf("\tadjust: %ld\n", (long)argp->adjust);
+ (void)printf("\topflags: %lu\n", (u_long)argp->opflags);
+ (void)printf("\n");
+ __os_free(dbenv, argp);
+
return (0);
}
+/*
+ * PUBLIC: int __bam_cadjust_read __P((DB_ENV *, void *,
+ * PUBLIC: __bam_cadjust_args **));
+ */
int
__bam_cadjust_read(dbenv, recbuf, argpp)
DB_ENV *dbenv;
@@ -1370,100 +1329,255 @@ __bam_cadjust_read(dbenv, recbuf, argpp)
__bam_cadjust_args **argpp;
{
__bam_cadjust_args *argp;
+ u_int32_t uinttmp;
u_int8_t *bp;
int ret;
- ret = __os_malloc(dbenv, sizeof(__bam_cadjust_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
+ if ((ret = __os_malloc(dbenv,
+ sizeof(__bam_cadjust_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
argp->txnid = (DB_TXN *)&argp[1];
+
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
+
memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
bp += sizeof(argp->txnid->txnid);
+
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->fileid = (int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->pgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
memcpy(&argp->lsn, bp, sizeof(argp->lsn));
bp += sizeof(argp->lsn);
- memcpy(&argp->indx, bp, sizeof(argp->indx));
- bp += sizeof(argp->indx);
- memcpy(&argp->adjust, bp, sizeof(argp->adjust));
- bp += sizeof(argp->adjust);
- memcpy(&argp->opflags, bp, sizeof(argp->opflags));
- bp += sizeof(argp->opflags);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->indx = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->adjust = (int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->opflags = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
*argpp = argp;
return (0);
}
+/*
+ * PUBLIC: int __bam_cdel_log __P((DB *, DB_TXN *, DB_LSN *,
+ * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t));
+ */
int
-__bam_cdel_log(dbenv, txnid, ret_lsnp, flags,
- fileid, pgno, lsn, indx)
- DB_ENV *dbenv;
+__bam_cdel_log(dbp, txnid, ret_lsnp, flags, pgno, lsn, indx)
+ DB *dbp;
DB_TXN *txnid;
DB_LSN *ret_lsnp;
u_int32_t flags;
- int32_t fileid;
db_pgno_t pgno;
DB_LSN * lsn;
u_int32_t indx;
{
DBT logrec;
+ DB_ENV *dbenv;
+ DB_TXNLOGREC *lr;
DB_LSN *lsnp, null_lsn;
- u_int32_t rectype, txn_num;
- int ret;
+ u_int32_t uinttmp, rectype, txn_num;
+ u_int npad;
u_int8_t *bp;
-
- rectype = DB_bam_cdel;
- if (txnid != NULL &&
- TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
- return (ret);
- txn_num = txnid == NULL ? 0 : txnid->txnid;
+ int is_durable, ret;
+
+ dbenv = dbp->dbenv;
+ rectype = DB___bam_cdel;
+ npad = 0;
+
+ is_durable = 1;
+ if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
+ F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) ||
+ F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
+ if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL)
+ return (0);
+ is_durable = 0;
+ }
if (txnid == NULL) {
- ZERO_LSN(null_lsn);
+ txn_num = 0;
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
lsnp = &null_lsn;
- } else
+ } else {
+ if (TAILQ_FIRST(&txnid->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ return (ret);
+ txn_num = txnid->txnid;
lsnp = &txnid->last_lsn;
+ }
+
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ sizeof(*lsn)
- + sizeof(indx);
- if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
+ + sizeof(u_int32_t);
+ if (CRYPTO_ON(dbenv)) {
+ npad =
+ ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size);
+ logrec.size += npad;
+ }
+
+ if (!is_durable && txnid != NULL) {
+ if ((ret = __os_malloc(dbenv,
+ logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
+ return (ret);
+#ifdef DIAGNOSTIC
+ goto do_malloc;
+#else
+ logrec.data = &lr->data;
+#endif
+ } else {
+#ifdef DIAGNOSTIC
+do_malloc:
+#endif
+ if ((ret =
+ __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) {
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL)
+ (void)__os_free(dbenv, lr);
+#endif
+ return (ret);
+ }
+ }
+ if (npad > 0)
+ memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
bp = logrec.data;
+
memcpy(bp, &rectype, sizeof(rectype));
bp += sizeof(rectype);
+
memcpy(bp, &txn_num, sizeof(txn_num));
bp += sizeof(txn_num);
+
memcpy(bp, lsnp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
+
+ DB_ASSERT(dbp->log_filename != NULL);
+ if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
+ (ret = __dbreg_lazy_id(dbp)) != 0)
+ return (ret);
+
+ uinttmp = (u_int32_t)dbp->log_filename->id;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)pgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
if (lsn != NULL)
memcpy(bp, lsn, sizeof(*lsn));
else
memset(bp, 0, sizeof(*lsn));
bp += sizeof(*lsn);
- memcpy(bp, &indx, sizeof(indx));
- bp += sizeof(indx);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
- ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, logrec.size);
+
+ uinttmp = (u_int32_t)indx;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL) {
+ /*
+ * We set the debug bit if we are going
+ * to log non-durable transactions so
+ * they will be ignored by recovery.
+ */
+ memcpy(lr->data, logrec.data, logrec.size);
+ rectype |= DB_debug_FLAG;
+ memcpy(logrec.data, &rectype, sizeof(rectype));
+ }
+#endif
+
+ if (!is_durable && txnid != NULL) {
+ ret = 0;
+ STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+#ifdef DIAGNOSTIC
+ goto do_put;
+#endif
+ } else{
+#ifdef DIAGNOSTIC
+do_put:
+#endif
+ ret = __log_put(dbenv,
+ ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
+ if (ret == 0 && txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ }
+
+ if (!is_durable)
+ LSN_NOT_LOGGED(*ret_lsnp);
+#ifdef LOG_DIAGNOSTIC
+ if (ret != 0)
+ (void)__bam_cdel_print(dbenv,
+ (DBT *)&logrec, ret_lsnp, NULL, NULL);
+#endif
+#ifndef DIAGNOSTIC
+ if (is_durable || txnid == NULL)
+#endif
+ __os_free(dbenv, logrec.data);
+
return (ret);
}
+#ifdef HAVE_REPLICATION
+/*
+ * PUBLIC: int __bam_cdel_getpgnos __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_cdel_getpgnos(dbenv, rec, lsnp, notused1, summary)
+ DB_ENV *dbenv;
+ DBT *rec;
+ DB_LSN *lsnp;
+ db_recops notused1;
+ void *summary;
+{
+ TXN_RECS *t;
+ int ret;
+ COMPQUIET(rec, NULL);
+ COMPQUIET(notused1, DB_TXN_ABORT);
+
+ t = (TXN_RECS *)summary;
+
+ if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0)
+ return (ret);
+
+ t->array[t->npages].flags = LSN_PAGE_NOLOCK;
+ t->array[t->npages].lsn = *lsnp;
+ t->array[t->npages].fid = DB_LOGFILEID_INVALID;
+ memset(&t->array[t->npages].pgdesc, 0,
+ sizeof(t->array[t->npages].pgdesc));
+
+ t->npages++;
+
+ return (0);
+}
+#endif /* HAVE_REPLICATION */
+
+/*
+ * PUBLIC: int __bam_cdel_print __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
int
__bam_cdel_print(dbenv, dbtp, lsnp, notused2, notused3)
DB_ENV *dbenv;
@@ -1473,34 +1587,36 @@ __bam_cdel_print(dbenv, dbtp, lsnp, notused2, notused3)
void *notused3;
{
__bam_cdel_args *argp;
- u_int32_t i;
- u_int ch;
int ret;
- i = 0;
- ch = 0;
notused2 = DB_TXN_ABORT;
notused3 = NULL;
if ((ret = __bam_cdel_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
- printf("[%lu][%lu]bam_cdel: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (void)printf(
+ "[%lu][%lu]__bam_cdel%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
(u_long)lsnp->file,
(u_long)lsnp->offset,
+ (argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
(u_long)argp->txnid->txnid,
(u_long)argp->prev_lsn.file,
(u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tlsn: [%lu][%lu]\n",
+ (void)printf("\tfileid: %ld\n", (long)argp->fileid);
+ (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
+ (void)printf("\tlsn: [%lu][%lu]\n",
(u_long)argp->lsn.file, (u_long)argp->lsn.offset);
- printf("\tindx: %lu\n", (u_long)argp->indx);
- printf("\n");
- __os_free(argp, 0);
+ (void)printf("\tindx: %lu\n", (u_long)argp->indx);
+ (void)printf("\n");
+ __os_free(dbenv, argp);
+
return (0);
}
+/*
+ * PUBLIC: int __bam_cdel_read __P((DB_ENV *, void *, __bam_cdel_args **));
+ */
int
__bam_cdel_read(dbenv, recbuf, argpp)
DB_ENV *dbenv;
@@ -1508,42 +1624,56 @@ __bam_cdel_read(dbenv, recbuf, argpp)
__bam_cdel_args **argpp;
{
__bam_cdel_args *argp;
+ u_int32_t uinttmp;
u_int8_t *bp;
int ret;
- ret = __os_malloc(dbenv, sizeof(__bam_cdel_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
+ if ((ret = __os_malloc(dbenv,
+ sizeof(__bam_cdel_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
argp->txnid = (DB_TXN *)&argp[1];
+
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
+
memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
bp += sizeof(argp->txnid->txnid);
+
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->fileid = (int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->pgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
memcpy(&argp->lsn, bp, sizeof(argp->lsn));
bp += sizeof(argp->lsn);
- memcpy(&argp->indx, bp, sizeof(argp->indx));
- bp += sizeof(argp->indx);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->indx = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
*argpp = argp;
return (0);
}
+/*
+ * PUBLIC: int __bam_repl_log __P((DB *, DB_TXN *, DB_LSN *,
+ * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, u_int32_t, u_int32_t,
+ * PUBLIC: const DBT *, const DBT *, u_int32_t, u_int32_t));
+ */
int
-__bam_repl_log(dbenv, txnid, ret_lsnp, flags,
- fileid, pgno, lsn, indx, isdeleted, orig,
- repl, prefix, suffix)
- DB_ENV *dbenv;
+__bam_repl_log(dbp, txnid, ret_lsnp, flags, pgno, lsn, indx, isdeleted, orig,
+ repl, prefix, suffix)
+ DB *dbp;
DB_TXN *txnid;
DB_LSN *ret_lsnp;
u_int32_t flags;
- int32_t fileid;
db_pgno_t pgno;
DB_LSN * lsn;
u_int32_t indx;
@@ -1554,56 +1684,118 @@ __bam_repl_log(dbenv, txnid, ret_lsnp, flags,
u_int32_t suffix;
{
DBT logrec;
+ DB_ENV *dbenv;
+ DB_TXNLOGREC *lr;
DB_LSN *lsnp, null_lsn;
- u_int32_t zero;
- u_int32_t rectype, txn_num;
- int ret;
+ u_int32_t zero, uinttmp, rectype, txn_num;
+ u_int npad;
u_int8_t *bp;
-
- rectype = DB_bam_repl;
- if (txnid != NULL &&
- TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
- return (ret);
- txn_num = txnid == NULL ? 0 : txnid->txnid;
+ int is_durable, ret;
+
+ dbenv = dbp->dbenv;
+ rectype = DB___bam_repl;
+ npad = 0;
+
+ is_durable = 1;
+ if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
+ F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) ||
+ F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
+ if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL)
+ return (0);
+ is_durable = 0;
+ }
if (txnid == NULL) {
- ZERO_LSN(null_lsn);
+ txn_num = 0;
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
lsnp = &null_lsn;
- } else
+ } else {
+ if (TAILQ_FIRST(&txnid->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ return (ret);
+ txn_num = txnid->txnid;
lsnp = &txnid->last_lsn;
+ }
+
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(pgno)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ sizeof(*lsn)
- + sizeof(indx)
- + sizeof(isdeleted)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ sizeof(u_int32_t) + (orig == NULL ? 0 : orig->size)
+ sizeof(u_int32_t) + (repl == NULL ? 0 : repl->size)
- + sizeof(prefix)
- + sizeof(suffix);
- if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t);
+ if (CRYPTO_ON(dbenv)) {
+ npad =
+ ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size);
+ logrec.size += npad;
+ }
+
+ if (!is_durable && txnid != NULL) {
+ if ((ret = __os_malloc(dbenv,
+ logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
+ return (ret);
+#ifdef DIAGNOSTIC
+ goto do_malloc;
+#else
+ logrec.data = &lr->data;
+#endif
+ } else {
+#ifdef DIAGNOSTIC
+do_malloc:
+#endif
+ if ((ret =
+ __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) {
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL)
+ (void)__os_free(dbenv, lr);
+#endif
+ return (ret);
+ }
+ }
+ if (npad > 0)
+ memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
bp = logrec.data;
+
memcpy(bp, &rectype, sizeof(rectype));
bp += sizeof(rectype);
+
memcpy(bp, &txn_num, sizeof(txn_num));
bp += sizeof(txn_num);
+
memcpy(bp, lsnp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &pgno, sizeof(pgno));
- bp += sizeof(pgno);
+
+ DB_ASSERT(dbp->log_filename != NULL);
+ if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
+ (ret = __dbreg_lazy_id(dbp)) != 0)
+ return (ret);
+
+ uinttmp = (u_int32_t)dbp->log_filename->id;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)pgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
if (lsn != NULL)
memcpy(bp, lsn, sizeof(*lsn));
else
memset(bp, 0, sizeof(*lsn));
bp += sizeof(*lsn);
- memcpy(bp, &indx, sizeof(indx));
- bp += sizeof(indx);
- memcpy(bp, &isdeleted, sizeof(isdeleted));
- bp += sizeof(isdeleted);
+
+ uinttmp = (u_int32_t)indx;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)isdeleted;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
if (orig == NULL) {
zero = 0;
memcpy(bp, &zero, sizeof(u_int32_t));
@@ -1614,6 +1806,7 @@ __bam_repl_log(dbenv, txnid, ret_lsnp, flags,
memcpy(bp, orig->data, orig->size);
bp += orig->size;
}
+
if (repl == NULL) {
zero = 0;
memcpy(bp, &zero, sizeof(u_int32_t));
@@ -1624,18 +1817,100 @@ __bam_repl_log(dbenv, txnid, ret_lsnp, flags,
memcpy(bp, repl->data, repl->size);
bp += repl->size;
}
- memcpy(bp, &prefix, sizeof(prefix));
- bp += sizeof(prefix);
- memcpy(bp, &suffix, sizeof(suffix));
- bp += sizeof(suffix);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
- ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, logrec.size);
+
+ uinttmp = (u_int32_t)prefix;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)suffix;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL) {
+ /*
+ * We set the debug bit if we are going
+ * to log non-durable transactions so
+ * they will be ignored by recovery.
+ */
+ memcpy(lr->data, logrec.data, logrec.size);
+ rectype |= DB_debug_FLAG;
+ memcpy(logrec.data, &rectype, sizeof(rectype));
+ }
+#endif
+
+ if (!is_durable && txnid != NULL) {
+ ret = 0;
+ STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+#ifdef DIAGNOSTIC
+ goto do_put;
+#endif
+ } else{
+#ifdef DIAGNOSTIC
+do_put:
+#endif
+ ret = __log_put(dbenv,
+ ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
+ if (ret == 0 && txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ }
+
+ if (!is_durable)
+ LSN_NOT_LOGGED(*ret_lsnp);
+#ifdef LOG_DIAGNOSTIC
+ if (ret != 0)
+ (void)__bam_repl_print(dbenv,
+ (DBT *)&logrec, ret_lsnp, NULL, NULL);
+#endif
+#ifndef DIAGNOSTIC
+ if (is_durable || txnid == NULL)
+#endif
+ __os_free(dbenv, logrec.data);
+
return (ret);
}
+#ifdef HAVE_REPLICATION
+/*
+ * PUBLIC: int __bam_repl_getpgnos __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_repl_getpgnos(dbenv, rec, lsnp, notused1, summary)
+ DB_ENV *dbenv;
+ DBT *rec;
+ DB_LSN *lsnp;
+ db_recops notused1;
+ void *summary;
+{
+ TXN_RECS *t;
+ int ret;
+ COMPQUIET(rec, NULL);
+ COMPQUIET(notused1, DB_TXN_ABORT);
+
+ t = (TXN_RECS *)summary;
+
+ if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0)
+ return (ret);
+
+ t->array[t->npages].flags = LSN_PAGE_NOLOCK;
+ t->array[t->npages].lsn = *lsnp;
+ t->array[t->npages].fid = DB_LOGFILEID_INVALID;
+ memset(&t->array[t->npages].pgdesc, 0,
+ sizeof(t->array[t->npages].pgdesc));
+
+ t->npages++;
+
+ return (0);
+}
+#endif /* HAVE_REPLICATION */
+
+/*
+ * PUBLIC: int __bam_repl_print __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
int
__bam_repl_print(dbenv, dbtp, lsnp, notused2, notused3)
DB_ENV *dbenv;
@@ -1646,54 +1921,52 @@ __bam_repl_print(dbenv, dbtp, lsnp, notused2, notused3)
{
__bam_repl_args *argp;
u_int32_t i;
- u_int ch;
+ int ch;
int ret;
- i = 0;
- ch = 0;
notused2 = DB_TXN_ABORT;
notused3 = NULL;
if ((ret = __bam_repl_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
- printf("[%lu][%lu]bam_repl: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (void)printf(
+ "[%lu][%lu]__bam_repl%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
(u_long)lsnp->file,
(u_long)lsnp->offset,
+ (argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
(u_long)argp->txnid->txnid,
(u_long)argp->prev_lsn.file,
(u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tpgno: %lu\n", (u_long)argp->pgno);
- printf("\tlsn: [%lu][%lu]\n",
+ (void)printf("\tfileid: %ld\n", (long)argp->fileid);
+ (void)printf("\tpgno: %lu\n", (u_long)argp->pgno);
+ (void)printf("\tlsn: [%lu][%lu]\n",
(u_long)argp->lsn.file, (u_long)argp->lsn.offset);
- printf("\tindx: %lu\n", (u_long)argp->indx);
- printf("\tisdeleted: %lu\n", (u_long)argp->isdeleted);
- printf("\torig: ");
+ (void)printf("\tindx: %lu\n", (u_long)argp->indx);
+ (void)printf("\tisdeleted: %lu\n", (u_long)argp->isdeleted);
+ (void)printf("\torig: ");
for (i = 0; i < argp->orig.size; i++) {
ch = ((u_int8_t *)argp->orig.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
+ printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
}
- printf("\n");
- printf("\trepl: ");
+ (void)printf("\n");
+ (void)printf("\trepl: ");
for (i = 0; i < argp->repl.size; i++) {
ch = ((u_int8_t *)argp->repl.data)[i];
- if (isprint(ch) || ch == 0xa)
- putchar(ch);
- else
- printf("%#x ", ch);
- }
- printf("\n");
- printf("\tprefix: %lu\n", (u_long)argp->prefix);
- printf("\tsuffix: %lu\n", (u_long)argp->suffix);
- printf("\n");
- __os_free(argp, 0);
+ printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch);
+ }
+ (void)printf("\n");
+ (void)printf("\tprefix: %lu\n", (u_long)argp->prefix);
+ (void)printf("\tsuffix: %lu\n", (u_long)argp->suffix);
+ (void)printf("\n");
+ __os_free(dbenv, argp);
+
return (0);
}
+/*
+ * PUBLIC: int __bam_repl_read __P((DB_ENV *, void *, __bam_repl_args **));
+ */
int
__bam_repl_read(dbenv, recbuf, argpp)
DB_ENV *dbenv;
@@ -1701,112 +1974,271 @@ __bam_repl_read(dbenv, recbuf, argpp)
__bam_repl_args **argpp;
{
__bam_repl_args *argp;
+ u_int32_t uinttmp;
u_int8_t *bp;
int ret;
- ret = __os_malloc(dbenv, sizeof(__bam_repl_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
+ if ((ret = __os_malloc(dbenv,
+ sizeof(__bam_repl_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
argp->txnid = (DB_TXN *)&argp[1];
+
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
+
memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
bp += sizeof(argp->txnid->txnid);
+
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->pgno, bp, sizeof(argp->pgno));
- bp += sizeof(argp->pgno);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->fileid = (int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->pgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
memcpy(&argp->lsn, bp, sizeof(argp->lsn));
bp += sizeof(argp->lsn);
- memcpy(&argp->indx, bp, sizeof(argp->indx));
- bp += sizeof(argp->indx);
- memcpy(&argp->isdeleted, bp, sizeof(argp->isdeleted));
- bp += sizeof(argp->isdeleted);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->indx = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->isdeleted = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
memset(&argp->orig, 0, sizeof(argp->orig));
memcpy(&argp->orig.size, bp, sizeof(u_int32_t));
bp += sizeof(u_int32_t);
argp->orig.data = bp;
bp += argp->orig.size;
+
memset(&argp->repl, 0, sizeof(argp->repl));
memcpy(&argp->repl.size, bp, sizeof(u_int32_t));
bp += sizeof(u_int32_t);
argp->repl.data = bp;
bp += argp->repl.size;
- memcpy(&argp->prefix, bp, sizeof(argp->prefix));
- bp += sizeof(argp->prefix);
- memcpy(&argp->suffix, bp, sizeof(argp->suffix));
- bp += sizeof(argp->suffix);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->prefix = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->suffix = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
*argpp = argp;
return (0);
}
+/*
+ * PUBLIC: int __bam_root_log __P((DB *, DB_TXN *, DB_LSN *,
+ * PUBLIC: u_int32_t, db_pgno_t, db_pgno_t, DB_LSN *));
+ */
int
-__bam_root_log(dbenv, txnid, ret_lsnp, flags,
- fileid, meta_pgno, root_pgno, meta_lsn)
- DB_ENV *dbenv;
+__bam_root_log(dbp, txnid, ret_lsnp, flags, meta_pgno, root_pgno, meta_lsn)
+ DB *dbp;
DB_TXN *txnid;
DB_LSN *ret_lsnp;
u_int32_t flags;
- int32_t fileid;
db_pgno_t meta_pgno;
db_pgno_t root_pgno;
DB_LSN * meta_lsn;
{
DBT logrec;
+ DB_ENV *dbenv;
+ DB_TXNLOGREC *lr;
DB_LSN *lsnp, null_lsn;
- u_int32_t rectype, txn_num;
- int ret;
+ u_int32_t uinttmp, rectype, txn_num;
+ u_int npad;
u_int8_t *bp;
-
- rectype = DB_bam_root;
- if (txnid != NULL &&
- TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
- return (ret);
- txn_num = txnid == NULL ? 0 : txnid->txnid;
+ int is_durable, ret;
+
+ dbenv = dbp->dbenv;
+ rectype = DB___bam_root;
+ npad = 0;
+
+ is_durable = 1;
+ if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
+ F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) ||
+ F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
+ if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL)
+ return (0);
+ is_durable = 0;
+ }
if (txnid == NULL) {
- ZERO_LSN(null_lsn);
+ txn_num = 0;
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
lsnp = &null_lsn;
- } else
+ } else {
+ if (TAILQ_FIRST(&txnid->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ return (ret);
+ txn_num = txnid->txnid;
lsnp = &txnid->last_lsn;
+ }
+
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(meta_pgno)
- + sizeof(root_pgno)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ sizeof(*meta_lsn);
- if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
+ if (CRYPTO_ON(dbenv)) {
+ npad =
+ ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size);
+ logrec.size += npad;
+ }
+
+ if (!is_durable && txnid != NULL) {
+ if ((ret = __os_malloc(dbenv,
+ logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
+ return (ret);
+#ifdef DIAGNOSTIC
+ goto do_malloc;
+#else
+ logrec.data = &lr->data;
+#endif
+ } else {
+#ifdef DIAGNOSTIC
+do_malloc:
+#endif
+ if ((ret =
+ __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) {
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL)
+ (void)__os_free(dbenv, lr);
+#endif
+ return (ret);
+ }
+ }
+ if (npad > 0)
+ memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
bp = logrec.data;
+
memcpy(bp, &rectype, sizeof(rectype));
bp += sizeof(rectype);
+
memcpy(bp, &txn_num, sizeof(txn_num));
bp += sizeof(txn_num);
+
memcpy(bp, lsnp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &meta_pgno, sizeof(meta_pgno));
- bp += sizeof(meta_pgno);
- memcpy(bp, &root_pgno, sizeof(root_pgno));
- bp += sizeof(root_pgno);
+
+ DB_ASSERT(dbp->log_filename != NULL);
+ if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
+ (ret = __dbreg_lazy_id(dbp)) != 0)
+ return (ret);
+
+ uinttmp = (u_int32_t)dbp->log_filename->id;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)meta_pgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)root_pgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
if (meta_lsn != NULL)
memcpy(bp, meta_lsn, sizeof(*meta_lsn));
else
memset(bp, 0, sizeof(*meta_lsn));
bp += sizeof(*meta_lsn);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
- ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, logrec.size);
+
+ DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL) {
+ /*
+ * We set the debug bit if we are going
+ * to log non-durable transactions so
+ * they will be ignored by recovery.
+ */
+ memcpy(lr->data, logrec.data, logrec.size);
+ rectype |= DB_debug_FLAG;
+ memcpy(logrec.data, &rectype, sizeof(rectype));
+ }
+#endif
+
+ if (!is_durable && txnid != NULL) {
+ ret = 0;
+ STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+#ifdef DIAGNOSTIC
+ goto do_put;
+#endif
+ } else{
+#ifdef DIAGNOSTIC
+do_put:
+#endif
+ ret = __log_put(dbenv,
+ ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
+ if (ret == 0 && txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ }
+
+ if (!is_durable)
+ LSN_NOT_LOGGED(*ret_lsnp);
+#ifdef LOG_DIAGNOSTIC
+ if (ret != 0)
+ (void)__bam_root_print(dbenv,
+ (DBT *)&logrec, ret_lsnp, NULL, NULL);
+#endif
+#ifndef DIAGNOSTIC
+ if (is_durable || txnid == NULL)
+#endif
+ __os_free(dbenv, logrec.data);
+
return (ret);
}
+#ifdef HAVE_REPLICATION
+/*
+ * PUBLIC: int __bam_root_getpgnos __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_root_getpgnos(dbenv, rec, lsnp, notused1, summary)
+ DB_ENV *dbenv;
+ DBT *rec;
+ DB_LSN *lsnp;
+ db_recops notused1;
+ void *summary;
+{
+ TXN_RECS *t;
+ int ret;
+ COMPQUIET(rec, NULL);
+ COMPQUIET(notused1, DB_TXN_ABORT);
+
+ t = (TXN_RECS *)summary;
+
+ if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0)
+ return (ret);
+
+ t->array[t->npages].flags = LSN_PAGE_NOLOCK;
+ t->array[t->npages].lsn = *lsnp;
+ t->array[t->npages].fid = DB_LOGFILEID_INVALID;
+ memset(&t->array[t->npages].pgdesc, 0,
+ sizeof(t->array[t->npages].pgdesc));
+
+ t->npages++;
+
+ return (0);
+}
+#endif /* HAVE_REPLICATION */
+
+/*
+ * PUBLIC: int __bam_root_print __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
int
__bam_root_print(dbenv, dbtp, lsnp, notused2, notused3)
DB_ENV *dbenv;
@@ -1816,34 +2248,36 @@ __bam_root_print(dbenv, dbtp, lsnp, notused2, notused3)
void *notused3;
{
__bam_root_args *argp;
- u_int32_t i;
- u_int ch;
int ret;
- i = 0;
- ch = 0;
notused2 = DB_TXN_ABORT;
notused3 = NULL;
if ((ret = __bam_root_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
- printf("[%lu][%lu]bam_root: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (void)printf(
+ "[%lu][%lu]__bam_root%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
(u_long)lsnp->file,
(u_long)lsnp->offset,
+ (argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
(u_long)argp->txnid->txnid,
(u_long)argp->prev_lsn.file,
(u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno);
- printf("\troot_pgno: %lu\n", (u_long)argp->root_pgno);
- printf("\tmeta_lsn: [%lu][%lu]\n",
+ (void)printf("\tfileid: %ld\n", (long)argp->fileid);
+ (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno);
+ (void)printf("\troot_pgno: %lu\n", (u_long)argp->root_pgno);
+ (void)printf("\tmeta_lsn: [%lu][%lu]\n",
(u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
- printf("\n");
- __os_free(argp, 0);
+ (void)printf("\n");
+ __os_free(dbenv, argp);
+
return (0);
}
+/*
+ * PUBLIC: int __bam_root_read __P((DB_ENV *, void *, __bam_root_args **));
+ */
int
__bam_root_read(dbenv, recbuf, argpp)
DB_ENV *dbenv;
@@ -1851,42 +2285,56 @@ __bam_root_read(dbenv, recbuf, argpp)
__bam_root_args **argpp;
{
__bam_root_args *argp;
+ u_int32_t uinttmp;
u_int8_t *bp;
int ret;
- ret = __os_malloc(dbenv, sizeof(__bam_root_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
+ if ((ret = __os_malloc(dbenv,
+ sizeof(__bam_root_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
argp->txnid = (DB_TXN *)&argp[1];
+
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
+
memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
bp += sizeof(argp->txnid->txnid);
+
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->meta_pgno, bp, sizeof(argp->meta_pgno));
- bp += sizeof(argp->meta_pgno);
- memcpy(&argp->root_pgno, bp, sizeof(argp->root_pgno));
- bp += sizeof(argp->root_pgno);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->fileid = (int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->meta_pgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->root_pgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
memcpy(&argp->meta_lsn, bp, sizeof(argp->meta_lsn));
bp += sizeof(argp->meta_lsn);
+
*argpp = argp;
return (0);
}
+/*
+ * PUBLIC: int __bam_curadj_log __P((DB *, DB_TXN *, DB_LSN *,
+ * PUBLIC: u_int32_t, db_ca_mode, db_pgno_t, db_pgno_t, db_pgno_t,
+ * PUBLIC: u_int32_t, u_int32_t, u_int32_t));
+ */
int
-__bam_curadj_log(dbenv, txnid, ret_lsnp, flags,
- fileid, mode, from_pgno, to_pgno, left_pgno, first_indx,
- from_indx, to_indx)
- DB_ENV *dbenv;
+__bam_curadj_log(dbp, txnid, ret_lsnp, flags, mode, from_pgno, to_pgno, left_pgno, first_indx,
+ from_indx, to_indx)
+ DB *dbp;
DB_TXN *txnid;
DB_LSN *ret_lsnp;
u_int32_t flags;
- int32_t fileid;
db_ca_mode mode;
db_pgno_t from_pgno;
db_pgno_t to_pgno;
@@ -1896,65 +2344,212 @@ __bam_curadj_log(dbenv, txnid, ret_lsnp, flags,
u_int32_t to_indx;
{
DBT logrec;
+ DB_ENV *dbenv;
+ DB_TXNLOGREC *lr;
DB_LSN *lsnp, null_lsn;
- u_int32_t rectype, txn_num;
- int ret;
+ u_int32_t uinttmp, rectype, txn_num;
+ u_int npad;
u_int8_t *bp;
-
- rectype = DB_bam_curadj;
- if (txnid != NULL &&
- TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
- return (ret);
- txn_num = txnid == NULL ? 0 : txnid->txnid;
+ int is_durable, ret;
+
+ dbenv = dbp->dbenv;
+ rectype = DB___bam_curadj;
+ npad = 0;
+
+ is_durable = 1;
+ if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
+ F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) ||
+ F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
+ if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL)
+ return (0);
+ is_durable = 0;
+ }
if (txnid == NULL) {
- ZERO_LSN(null_lsn);
+ txn_num = 0;
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
lsnp = &null_lsn;
- } else
+ } else {
+ if (TAILQ_FIRST(&txnid->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ return (ret);
+ txn_num = txnid->txnid;
lsnp = &txnid->last_lsn;
+ }
+
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(mode)
- + sizeof(from_pgno)
- + sizeof(to_pgno)
- + sizeof(left_pgno)
- + sizeof(first_indx)
- + sizeof(from_indx)
- + sizeof(to_indx);
- if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t);
+ if (CRYPTO_ON(dbenv)) {
+ npad =
+ ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size);
+ logrec.size += npad;
+ }
+
+ if (!is_durable && txnid != NULL) {
+ if ((ret = __os_malloc(dbenv,
+ logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
+ return (ret);
+#ifdef DIAGNOSTIC
+ goto do_malloc;
+#else
+ logrec.data = &lr->data;
+#endif
+ } else {
+#ifdef DIAGNOSTIC
+do_malloc:
+#endif
+ if ((ret =
+ __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) {
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL)
+ (void)__os_free(dbenv, lr);
+#endif
+ return (ret);
+ }
+ }
+ if (npad > 0)
+ memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
bp = logrec.data;
+
memcpy(bp, &rectype, sizeof(rectype));
bp += sizeof(rectype);
+
memcpy(bp, &txn_num, sizeof(txn_num));
bp += sizeof(txn_num);
+
memcpy(bp, lsnp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &mode, sizeof(mode));
- bp += sizeof(mode);
- memcpy(bp, &from_pgno, sizeof(from_pgno));
- bp += sizeof(from_pgno);
- memcpy(bp, &to_pgno, sizeof(to_pgno));
- bp += sizeof(to_pgno);
- memcpy(bp, &left_pgno, sizeof(left_pgno));
- bp += sizeof(left_pgno);
- memcpy(bp, &first_indx, sizeof(first_indx));
- bp += sizeof(first_indx);
- memcpy(bp, &from_indx, sizeof(from_indx));
- bp += sizeof(from_indx);
- memcpy(bp, &to_indx, sizeof(to_indx));
- bp += sizeof(to_indx);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
- ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, logrec.size);
+
+ DB_ASSERT(dbp->log_filename != NULL);
+ if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
+ (ret = __dbreg_lazy_id(dbp)) != 0)
+ return (ret);
+
+ uinttmp = (u_int32_t)dbp->log_filename->id;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)mode;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)from_pgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)to_pgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)left_pgno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)first_indx;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)from_indx;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)to_indx;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL) {
+ /*
+ * We set the debug bit if we are going
+ * to log non-durable transactions so
+ * they will be ignored by recovery.
+ */
+ memcpy(lr->data, logrec.data, logrec.size);
+ rectype |= DB_debug_FLAG;
+ memcpy(logrec.data, &rectype, sizeof(rectype));
+ }
+#endif
+
+ if (!is_durable && txnid != NULL) {
+ ret = 0;
+ STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+#ifdef DIAGNOSTIC
+ goto do_put;
+#endif
+ } else{
+#ifdef DIAGNOSTIC
+do_put:
+#endif
+ ret = __log_put(dbenv,
+ ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
+ if (ret == 0 && txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ }
+
+ if (!is_durable)
+ LSN_NOT_LOGGED(*ret_lsnp);
+#ifdef LOG_DIAGNOSTIC
+ if (ret != 0)
+ (void)__bam_curadj_print(dbenv,
+ (DBT *)&logrec, ret_lsnp, NULL, NULL);
+#endif
+#ifndef DIAGNOSTIC
+ if (is_durable || txnid == NULL)
+#endif
+ __os_free(dbenv, logrec.data);
+
return (ret);
}
+#ifdef HAVE_REPLICATION
+/*
+ * PUBLIC: int __bam_curadj_getpgnos __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_curadj_getpgnos(dbenv, rec, lsnp, notused1, summary)
+ DB_ENV *dbenv;
+ DBT *rec;
+ DB_LSN *lsnp;
+ db_recops notused1;
+ void *summary;
+{
+ TXN_RECS *t;
+ int ret;
+ COMPQUIET(rec, NULL);
+ COMPQUIET(notused1, DB_TXN_ABORT);
+
+ t = (TXN_RECS *)summary;
+
+ if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0)
+ return (ret);
+
+ t->array[t->npages].flags = LSN_PAGE_NOLOCK;
+ t->array[t->npages].lsn = *lsnp;
+ t->array[t->npages].fid = DB_LOGFILEID_INVALID;
+ memset(&t->array[t->npages].pgdesc, 0,
+ sizeof(t->array[t->npages].pgdesc));
+
+ t->npages++;
+
+ return (0);
+}
+#endif /* HAVE_REPLICATION */
+
+/*
+ * PUBLIC: int __bam_curadj_print __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
int
__bam_curadj_print(dbenv, dbtp, lsnp, notused2, notused3)
DB_ENV *dbenv;
@@ -1964,37 +2559,39 @@ __bam_curadj_print(dbenv, dbtp, lsnp, notused2, notused3)
void *notused3;
{
__bam_curadj_args *argp;
- u_int32_t i;
- u_int ch;
int ret;
- i = 0;
- ch = 0;
notused2 = DB_TXN_ABORT;
notused3 = NULL;
if ((ret = __bam_curadj_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
- printf("[%lu][%lu]bam_curadj: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (void)printf(
+ "[%lu][%lu]__bam_curadj%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
(u_long)lsnp->file,
(u_long)lsnp->offset,
+ (argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
(u_long)argp->txnid->txnid,
(u_long)argp->prev_lsn.file,
(u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tmode: %ld\n", (long)argp->mode);
- printf("\tfrom_pgno: %lu\n", (u_long)argp->from_pgno);
- printf("\tto_pgno: %lu\n", (u_long)argp->to_pgno);
- printf("\tleft_pgno: %lu\n", (u_long)argp->left_pgno);
- printf("\tfirst_indx: %lu\n", (u_long)argp->first_indx);
- printf("\tfrom_indx: %lu\n", (u_long)argp->from_indx);
- printf("\tto_indx: %lu\n", (u_long)argp->to_indx);
- printf("\n");
- __os_free(argp, 0);
+ (void)printf("\tfileid: %ld\n", (long)argp->fileid);
+ (void)printf("\tmode: %ld\n", (long)argp->mode);
+ (void)printf("\tfrom_pgno: %lu\n", (u_long)argp->from_pgno);
+ (void)printf("\tto_pgno: %lu\n", (u_long)argp->to_pgno);
+ (void)printf("\tleft_pgno: %lu\n", (u_long)argp->left_pgno);
+ (void)printf("\tfirst_indx: %lu\n", (u_long)argp->first_indx);
+ (void)printf("\tfrom_indx: %lu\n", (u_long)argp->from_indx);
+ (void)printf("\tto_indx: %lu\n", (u_long)argp->to_indx);
+ (void)printf("\n");
+ __os_free(dbenv, argp);
+
return (0);
}
+/*
+ * PUBLIC: int __bam_curadj_read __P((DB_ENV *, void *, __bam_curadj_args **));
+ */
int
__bam_curadj_read(dbenv, recbuf, argpp)
DB_ENV *dbenv;
@@ -2002,105 +2599,268 @@ __bam_curadj_read(dbenv, recbuf, argpp)
__bam_curadj_args **argpp;
{
__bam_curadj_args *argp;
+ u_int32_t uinttmp;
u_int8_t *bp;
int ret;
- ret = __os_malloc(dbenv, sizeof(__bam_curadj_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
+ if ((ret = __os_malloc(dbenv,
+ sizeof(__bam_curadj_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
argp->txnid = (DB_TXN *)&argp[1];
+
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
+
memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
bp += sizeof(argp->txnid->txnid);
+
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->mode, bp, sizeof(argp->mode));
- bp += sizeof(argp->mode);
- memcpy(&argp->from_pgno, bp, sizeof(argp->from_pgno));
- bp += sizeof(argp->from_pgno);
- memcpy(&argp->to_pgno, bp, sizeof(argp->to_pgno));
- bp += sizeof(argp->to_pgno);
- memcpy(&argp->left_pgno, bp, sizeof(argp->left_pgno));
- bp += sizeof(argp->left_pgno);
- memcpy(&argp->first_indx, bp, sizeof(argp->first_indx));
- bp += sizeof(argp->first_indx);
- memcpy(&argp->from_indx, bp, sizeof(argp->from_indx));
- bp += sizeof(argp->from_indx);
- memcpy(&argp->to_indx, bp, sizeof(argp->to_indx));
- bp += sizeof(argp->to_indx);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->fileid = (int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->mode = (db_ca_mode)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->from_pgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->to_pgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->left_pgno = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->first_indx = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->from_indx = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->to_indx = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
*argpp = argp;
return (0);
}
+/*
+ * PUBLIC: int __bam_rcuradj_log __P((DB *, DB_TXN *, DB_LSN *,
+ * PUBLIC: u_int32_t, ca_recno_arg, db_pgno_t, db_recno_t, u_int32_t));
+ */
int
-__bam_rcuradj_log(dbenv, txnid, ret_lsnp, flags,
- fileid, mode, root, recno, order)
- DB_ENV *dbenv;
+__bam_rcuradj_log(dbp, txnid, ret_lsnp, flags, mode, root, recno, order)
+ DB *dbp;
DB_TXN *txnid;
DB_LSN *ret_lsnp;
u_int32_t flags;
- int32_t fileid;
ca_recno_arg mode;
db_pgno_t root;
db_recno_t recno;
u_int32_t order;
{
DBT logrec;
+ DB_ENV *dbenv;
+ DB_TXNLOGREC *lr;
DB_LSN *lsnp, null_lsn;
- u_int32_t rectype, txn_num;
- int ret;
+ u_int32_t uinttmp, rectype, txn_num;
+ u_int npad;
u_int8_t *bp;
-
- rectype = DB_bam_rcuradj;
- if (txnid != NULL &&
- TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
- return (ret);
- txn_num = txnid == NULL ? 0 : txnid->txnid;
+ int is_durable, ret;
+
+ dbenv = dbp->dbenv;
+ rectype = DB___bam_rcuradj;
+ npad = 0;
+
+ is_durable = 1;
+ if (LF_ISSET(DB_LOG_NOT_DURABLE) ||
+ F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) ||
+ F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
+ if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL)
+ return (0);
+ is_durable = 0;
+ }
if (txnid == NULL) {
- ZERO_LSN(null_lsn);
+ txn_num = 0;
+ null_lsn.file = 0;
+ null_lsn.offset = 0;
lsnp = &null_lsn;
- } else
+ } else {
+ if (TAILQ_FIRST(&txnid->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ return (ret);
+ txn_num = txnid->txnid;
lsnp = &txnid->last_lsn;
+ }
+
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
- + sizeof(fileid)
- + sizeof(mode)
- + sizeof(root)
- + sizeof(recno)
- + sizeof(order);
- if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0)
- return (ret);
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t)
+ + sizeof(u_int32_t);
+ if (CRYPTO_ON(dbenv)) {
+ npad =
+ ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size);
+ logrec.size += npad;
+ }
+
+ if (!is_durable && txnid != NULL) {
+ if ((ret = __os_malloc(dbenv,
+ logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0)
+ return (ret);
+#ifdef DIAGNOSTIC
+ goto do_malloc;
+#else
+ logrec.data = &lr->data;
+#endif
+ } else {
+#ifdef DIAGNOSTIC
+do_malloc:
+#endif
+ if ((ret =
+ __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) {
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL)
+ (void)__os_free(dbenv, lr);
+#endif
+ return (ret);
+ }
+ }
+ if (npad > 0)
+ memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad);
bp = logrec.data;
+
memcpy(bp, &rectype, sizeof(rectype));
bp += sizeof(rectype);
+
memcpy(bp, &txn_num, sizeof(txn_num));
bp += sizeof(txn_num);
+
memcpy(bp, lsnp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(bp, &fileid, sizeof(fileid));
- bp += sizeof(fileid);
- memcpy(bp, &mode, sizeof(mode));
- bp += sizeof(mode);
- memcpy(bp, &root, sizeof(root));
- bp += sizeof(root);
- memcpy(bp, &recno, sizeof(recno));
- bp += sizeof(recno);
- memcpy(bp, &order, sizeof(order));
- bp += sizeof(order);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size);
- ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags);
- if (txnid != NULL)
- txnid->last_lsn = *ret_lsnp;
- __os_free(logrec.data, logrec.size);
+
+ DB_ASSERT(dbp->log_filename != NULL);
+ if (dbp->log_filename->id == DB_LOGFILEID_INVALID &&
+ (ret = __dbreg_lazy_id(dbp)) != 0)
+ return (ret);
+
+ uinttmp = (u_int32_t)dbp->log_filename->id;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)mode;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)root;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)recno;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ uinttmp = (u_int32_t)order;
+ memcpy(bp, &uinttmp, sizeof(uinttmp));
+ bp += sizeof(uinttmp);
+
+ DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+
+#ifdef DIAGNOSTIC
+ if (!is_durable && txnid != NULL) {
+ /*
+ * We set the debug bit if we are going
+ * to log non-durable transactions so
+ * they will be ignored by recovery.
+ */
+ memcpy(lr->data, logrec.data, logrec.size);
+ rectype |= DB_debug_FLAG;
+ memcpy(logrec.data, &rectype, sizeof(rectype));
+ }
+#endif
+
+ if (!is_durable && txnid != NULL) {
+ ret = 0;
+ STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+#ifdef DIAGNOSTIC
+ goto do_put;
+#endif
+ } else{
+#ifdef DIAGNOSTIC
+do_put:
+#endif
+ ret = __log_put(dbenv,
+ ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY);
+ if (ret == 0 && txnid != NULL)
+ txnid->last_lsn = *ret_lsnp;
+ }
+
+ if (!is_durable)
+ LSN_NOT_LOGGED(*ret_lsnp);
+#ifdef LOG_DIAGNOSTIC
+ if (ret != 0)
+ (void)__bam_rcuradj_print(dbenv,
+ (DBT *)&logrec, ret_lsnp, NULL, NULL);
+#endif
+#ifndef DIAGNOSTIC
+ if (is_durable || txnid == NULL)
+#endif
+ __os_free(dbenv, logrec.data);
+
return (ret);
}
+#ifdef HAVE_REPLICATION
+/*
+ * PUBLIC: int __bam_rcuradj_getpgnos __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
+int
+__bam_rcuradj_getpgnos(dbenv, rec, lsnp, notused1, summary)
+ DB_ENV *dbenv;
+ DBT *rec;
+ DB_LSN *lsnp;
+ db_recops notused1;
+ void *summary;
+{
+ TXN_RECS *t;
+ int ret;
+ COMPQUIET(rec, NULL);
+ COMPQUIET(notused1, DB_TXN_ABORT);
+
+ t = (TXN_RECS *)summary;
+
+ if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0)
+ return (ret);
+
+ t->array[t->npages].flags = LSN_PAGE_NOLOCK;
+ t->array[t->npages].lsn = *lsnp;
+ t->array[t->npages].fid = DB_LOGFILEID_INVALID;
+ memset(&t->array[t->npages].pgdesc, 0,
+ sizeof(t->array[t->npages].pgdesc));
+
+ t->npages++;
+
+ return (0);
+}
+#endif /* HAVE_REPLICATION */
+
+/*
+ * PUBLIC: int __bam_rcuradj_print __P((DB_ENV *, DBT *, DB_LSN *,
+ * PUBLIC: db_recops, void *));
+ */
int
__bam_rcuradj_print(dbenv, dbtp, lsnp, notused2, notused3)
DB_ENV *dbenv;
@@ -2110,34 +2870,37 @@ __bam_rcuradj_print(dbenv, dbtp, lsnp, notused2, notused3)
void *notused3;
{
__bam_rcuradj_args *argp;
- u_int32_t i;
- u_int ch;
int ret;
- i = 0;
- ch = 0;
notused2 = DB_TXN_ABORT;
notused3 = NULL;
if ((ret = __bam_rcuradj_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
- printf("[%lu][%lu]bam_rcuradj: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+ (void)printf(
+ "[%lu][%lu]__bam_rcuradj%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
(u_long)lsnp->file,
(u_long)lsnp->offset,
+ (argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
(u_long)argp->txnid->txnid,
(u_long)argp->prev_lsn.file,
(u_long)argp->prev_lsn.offset);
- printf("\tfileid: %ld\n", (long)argp->fileid);
- printf("\tmode: %ld\n", (long)argp->mode);
- printf("\troot: %ld\n", (long)argp->root);
- printf("\trecno: %ld\n", (long)argp->recno);
- printf("\torder: %ld\n", (long)argp->order);
- printf("\n");
- __os_free(argp, 0);
+ (void)printf("\tfileid: %ld\n", (long)argp->fileid);
+ (void)printf("\tmode: %ld\n", (long)argp->mode);
+ (void)printf("\troot: %ld\n", (long)argp->root);
+ (void)printf("\trecno: %ld\n", (long)argp->recno);
+ (void)printf("\torder: %ld\n", (long)argp->order);
+ (void)printf("\n");
+ __os_free(dbenv, argp);
+
return (0);
}
+/*
+ * PUBLIC: int __bam_rcuradj_read __P((DB_ENV *, void *,
+ * PUBLIC: __bam_rcuradj_args **));
+ */
int
__bam_rcuradj_read(dbenv, recbuf, argpp)
DB_ENV *dbenv;
@@ -2145,140 +2908,173 @@ __bam_rcuradj_read(dbenv, recbuf, argpp)
__bam_rcuradj_args **argpp;
{
__bam_rcuradj_args *argp;
+ u_int32_t uinttmp;
u_int8_t *bp;
int ret;
- ret = __os_malloc(dbenv, sizeof(__bam_rcuradj_args) +
- sizeof(DB_TXN), NULL, &argp);
- if (ret != 0)
+ if ((ret = __os_malloc(dbenv,
+ sizeof(__bam_rcuradj_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
argp->txnid = (DB_TXN *)&argp[1];
+
bp = recbuf;
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
+
memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
bp += sizeof(argp->txnid->txnid);
+
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
- memcpy(&argp->fileid, bp, sizeof(argp->fileid));
- bp += sizeof(argp->fileid);
- memcpy(&argp->mode, bp, sizeof(argp->mode));
- bp += sizeof(argp->mode);
- memcpy(&argp->root, bp, sizeof(argp->root));
- bp += sizeof(argp->root);
- memcpy(&argp->recno, bp, sizeof(argp->recno));
- bp += sizeof(argp->recno);
- memcpy(&argp->order, bp, sizeof(argp->order));
- bp += sizeof(argp->order);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->fileid = (int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->mode = (ca_recno_arg)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->root = (db_pgno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->recno = (db_recno_t)uinttmp;
+ bp += sizeof(uinttmp);
+
+ memcpy(&uinttmp, bp, sizeof(uinttmp));
+ argp->order = (u_int32_t)uinttmp;
+ bp += sizeof(uinttmp);
+
*argpp = argp;
return (0);
}
+/*
+ * PUBLIC: int __bam_init_print __P((DB_ENV *, int (***)(DB_ENV *,
+ * PUBLIC: DBT *, DB_LSN *, db_recops, void *), size_t *));
+ */
int
-__bam_init_print(dbenv)
+__bam_init_print(dbenv, dtabp, dtabsizep)
DB_ENV *dbenv;
+ int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ size_t *dtabsizep;
{
int ret;
- if ((ret = __db_add_recovery(dbenv,
- __bam_pg_alloc_print, DB_bam_pg_alloc)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_pg_alloc1_print, DB_bam_pg_alloc1)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_pg_free_print, DB_bam_pg_free)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_pg_free1_print, DB_bam_pg_free1)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_split_print, DB___bam_split)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_split1_print, DB_bam_split1)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_rsplit_print, DB___bam_rsplit)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_split_print, DB_bam_split)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_adj_print, DB___bam_adj)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_rsplit1_print, DB_bam_rsplit1)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_cadjust_print, DB___bam_cadjust)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_rsplit_print, DB_bam_rsplit)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_cdel_print, DB___bam_cdel)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_adj_print, DB_bam_adj)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_repl_print, DB___bam_repl)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_cadjust_print, DB_bam_cadjust)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_root_print, DB___bam_root)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_cdel_print, DB_bam_cdel)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_curadj_print, DB___bam_curadj)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_repl_print, DB_bam_repl)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_root_print, DB_bam_root)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_curadj_print, DB_bam_curadj)) != 0)
- return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_rcuradj_print, DB_bam_rcuradj)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_rcuradj_print, DB___bam_rcuradj)) != 0)
return (ret);
return (0);
}
+#ifdef HAVE_REPLICATION
+/*
+ * PUBLIC: int __bam_init_getpgnos __P((DB_ENV *, int (***)(DB_ENV *,
+ * PUBLIC: DBT *, DB_LSN *, db_recops, void *), size_t *));
+ */
int
-__bam_init_recover(dbenv)
+__bam_init_getpgnos(dbenv, dtabp, dtabsizep)
DB_ENV *dbenv;
+ int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ size_t *dtabsizep;
{
int ret;
- if ((ret = __db_add_recovery(dbenv,
- __bam_pg_alloc_recover, DB_bam_pg_alloc)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_split_getpgnos, DB___bam_split)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __deprecated_recover, DB_bam_pg_alloc1)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_rsplit_getpgnos, DB___bam_rsplit)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_pg_free_recover, DB_bam_pg_free)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_adj_getpgnos, DB___bam_adj)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __deprecated_recover, DB_bam_pg_free1)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_cadjust_getpgnos, DB___bam_cadjust)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __deprecated_recover, DB_bam_split1)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_cdel_getpgnos, DB___bam_cdel)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_split_recover, DB_bam_split)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_repl_getpgnos, DB___bam_repl)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __deprecated_recover, DB_bam_rsplit1)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_root_getpgnos, DB___bam_root)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_rsplit_recover, DB_bam_rsplit)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_curadj_getpgnos, DB___bam_curadj)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_adj_recover, DB_bam_adj)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_rcuradj_getpgnos, DB___bam_rcuradj)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_cadjust_recover, DB_bam_cadjust)) != 0)
+ return (0);
+}
+#endif /* HAVE_REPLICATION */
+
+/*
+ * PUBLIC: int __bam_init_recover __P((DB_ENV *, int (***)(DB_ENV *,
+ * PUBLIC: DBT *, DB_LSN *, db_recops, void *), size_t *));
+ */
+int
+__bam_init_recover(dbenv, dtabp, dtabsizep)
+ DB_ENV *dbenv;
+ int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *));
+ size_t *dtabsizep;
+{
+ int ret;
+
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_split_recover, DB___bam_split)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_rsplit_recover, DB___bam_rsplit)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_cdel_recover, DB_bam_cdel)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_adj_recover, DB___bam_adj)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_repl_recover, DB_bam_repl)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_cadjust_recover, DB___bam_cadjust)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_root_recover, DB_bam_root)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_cdel_recover, DB___bam_cdel)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_curadj_recover, DB_bam_curadj)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_repl_recover, DB___bam_repl)) != 0)
return (ret);
- if ((ret = __db_add_recovery(dbenv,
- __bam_rcuradj_recover, DB_bam_rcuradj)) != 0)
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_root_recover, DB___bam_root)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_curadj_recover, DB___bam_curadj)) != 0)
+ return (ret);
+ if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep,
+ __bam_rcuradj_recover, DB___bam_rcuradj)) != 0)
return (ret);
return (0);
}
-