diff options
author | jbj <devnull@localhost> | 2003-12-15 21:42:09 +0000 |
---|---|---|
committer | jbj <devnull@localhost> | 2003-12-15 21:42:09 +0000 |
commit | 8960e3895f7af91126465368dff8fbb36ab4e853 (patch) | |
tree | 3c515e39dde0e88edeb806ea87d08524ba25c761 /db/qam | |
parent | 752cac72e220dcad4e6fce39508e714e59e3e0a1 (diff) | |
download | rpm-8960e3895f7af91126465368dff8fbb36ab4e853.tar.gz rpm-8960e3895f7af91126465368dff8fbb36ab4e853.tar.bz2 rpm-8960e3895f7af91126465368dff8fbb36ab4e853.zip |
- upgrade to db-4.2.52.
CVS patchset: 6972
CVS date: 2003/12/15 21:42:09
Diffstat (limited to 'db/qam')
-rw-r--r-- | db/qam/qam.c | 1259 | ||||
-rw-r--r-- | db/qam/qam.src | 66 | ||||
-rw-r--r-- | db/qam/qam_auto.c | 1928 | ||||
-rw-r--r-- | db/qam/qam_conv.c | 16 | ||||
-rw-r--r-- | db/qam/qam_files.c | 616 | ||||
-rw-r--r-- | db/qam/qam_method.c | 401 | ||||
-rw-r--r-- | db/qam/qam_open.c | 349 | ||||
-rw-r--r-- | db/qam/qam_rec.c | 446 | ||||
-rw-r--r-- | db/qam/qam_stat.c | 115 | ||||
-rw-r--r-- | db/qam/qam_stub.c | 338 | ||||
-rw-r--r-- | db/qam/qam_upgrade.c | 11 | ||||
-rw-r--r-- | db/qam/qam_verify.c | 402 |
12 files changed, 3711 insertions, 2236 deletions
diff --git a/db/qam/qam.c b/db/qam/qam.c index 0c9f45304..f90ccce07 100644 --- a/db/qam/qam.c +++ b/db/qam/qam.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: qam.c,v 11.72 2001/01/16 20:10:55 ubell Exp $"; +static const char revid[] = "$Id: qam.c,v 11.159 2003/11/18 21:32:17 ubell Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,20 +18,21 @@ static const char revid[] = "$Id: qam.c,v 11.72 2001/01/16 20:10:55 ubell Exp $" #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "db_am.h" -#include "mp.h" -#include "lock.h" -#include "log.h" -#include "btree.h" -#include "qam.h" - +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/btree.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" + +static int __qam_bulk __P((DBC *, DBT *, u_int32_t)); static int __qam_c_close __P((DBC *, db_pgno_t, int *)); static int __qam_c_del __P((DBC *)); static int __qam_c_destroy __P((DBC *)); static int __qam_c_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); static int __qam_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); +static int __qam_consume __P((DBC *, QMETA *, db_recno_t)); static int __qam_getno __P((DB *, const DBT *, db_recno_t *)); /* @@ -61,17 +62,16 @@ __qam_position(dbc, recnop, mode, exactp) pg = QAM_RECNO_PAGE(dbp, *recnop); if ((ret = __db_lget(dbc, 0, pg, mode == QAM_READ ? - DB_LOCK_READ : DB_LOCK_WRITE, 0, &cp->lock)) != 0) + DB_LOCK_READ : DB_LOCK_WRITE, 0, &cp->lock)) != 0) return (ret); cp->page = NULL; *exactp = 0; if ((ret = __qam_fget(dbp, &pg, - mode == QAM_WRITE ? DB_MPOOL_CREATE : 0, - &cp->page)) != 0) { + mode == QAM_WRITE ? DB_MPOOL_CREATE : 0, &cp->page)) != 0) { /* We did not fetch it, we can release the lock. */ (void)__LPUT(dbc, cp->lock); - cp->lock.off = LOCK_INVALID; - if (mode != QAM_WRITE && (ret == EINVAL || ret == ENOENT)) + if (mode != QAM_WRITE && + (ret == DB_PAGE_NOTFOUND || ret == ENOENT)) return (0); return (ret); } @@ -88,7 +88,7 @@ __qam_position(dbc, recnop, mode, exactp) } qp = QAM_GET_RECORD(dbp, cp->page, cp->indx); - *exactp = F_ISSET(qp, QAM_VALID); + *exactp = F_ISSET(qp, QAM_VALID) ? 1 : 0; return (ret); } @@ -112,38 +112,36 @@ __qam_pitem(dbc, pagep, indx, recno, data) db_recno_t recno; DBT *data; { + DB_ENV *dbenv; DB *dbp; DBT olddata, pdata, *datap; QAMDATA *qp; QUEUE *t; - u_int32_t size; u_int8_t *dest, *p; int alloced, ret; - alloced = ret = 0; - dbp = dbc->dbp; + dbenv = dbp->dbenv; t = (QUEUE *)dbp->q_internal; + alloced = ret = 0; if (data->size > t->re_len) - goto len_err; - + return (__db_rec_toobig(dbenv, data->size, t->re_len)); qp = QAM_GET_RECORD(dbp, pagep, indx); p = qp->data; - size = data->size; datap = data; if (F_ISSET(data, DB_DBT_PARTIAL)) { if (data->doff + data->dlen > t->re_len) { - alloced = data->dlen; - goto len_err; - } - if (data->size != data->dlen) { -len_err: __db_err(dbp->dbenv, - "Length improper for fixed length record %lu", - (u_long)(alloced ? alloced : data->size)); + __db_err(dbenv, + "%s: data offset plus length larger than record size of %lu", + "Record length error", (u_long)t->re_len); return (EINVAL); } + + if (data->size != data->dlen) + return (__db_rec_repl(dbenv, data->size, data->dlen)); + if (data->size == t->re_len) goto no_partial; @@ -159,12 +157,12 @@ len_err: __db_err(dbp->dbenv, * to log so that both this and the recovery code is simpler. */ - if (DB_LOGGING(dbc) || !F_ISSET(qp, QAM_VALID)) { + if (DBC_LOGGING(dbc) || !F_ISSET(qp, QAM_VALID)) { datap = &pdata; memset(datap, 0, sizeof(*datap)); - if ((ret = __os_malloc(dbp->dbenv, - t->re_len, NULL, &datap->data)) != 0) + if ((ret = __os_malloc(dbenv, + t->re_len, &datap->data)) != 0) return (ret); alloced = 1; datap->size = t->re_len; @@ -188,14 +186,14 @@ len_err: __db_err(dbp->dbenv, } no_partial: - if (DB_LOGGING(dbc)) { + if (DBC_LOGGING(dbc)) { olddata.size = 0; if (F_ISSET(qp, QAM_SET)) { olddata.data = qp->data; olddata.size = t->re_len; } - if ((ret = __qam_add_log(dbp->dbenv, dbc->txn, &LSN(pagep), - 0, dbp->log_fileid, &LSN(pagep), pagep->pgno, + if ((ret = __qam_add_log(dbp, dbc->txn, &LSN(pagep), + 0, &LSN(pagep), pagep->pgno, indx, recno, datap, qp->flags, olddata.size == 0 ? NULL : &olddata)) != 0) goto err; @@ -207,7 +205,7 @@ no_partial: memset(p + datap->size, t->re_pad, t->re_len - datap->size); err: if (alloced) - __os_free(datap->data, t->re_len); + __os_free(dbenv, datap->data); return (ret); } @@ -223,23 +221,37 @@ __qam_c_put(dbc, key, data, flags, pgnop) u_int32_t flags; db_pgno_t *pgnop; { - QUEUE_CURSOR *cp; DB *dbp; DB_LOCK lock; + DB_MPOOLFILE *mpf; QMETA *meta; + QUEUE_CURSOR *cp; db_pgno_t pg; db_recno_t new_cur, new_first; u_int32_t opcode; int exact, ret, t_ret; - COMPQUIET(key, NULL); - dbp = dbc->dbp; + mpf = dbp->mpf; if (pgnop != NULL) *pgnop = PGNO_INVALID; cp = (QUEUE_CURSOR *)dbc->internal; + switch (flags) { + case DB_KEYFIRST: + case DB_KEYLAST: + if ((ret = __qam_getno(dbp, key, &cp->recno)) != 0) + return (ret); + /* FALLTHROUGH */ + case DB_CURRENT: + break; + default: + /* The interface shouldn't let anything else through. */ + DB_ASSERT(0); + return (__db_ferr(dbp->dbenv, "__qam_c_put", flags)); + } + /* Write lock the record. */ if ((ret = __db_lget(dbc, 0, cp->recno, DB_LOCK_WRITE, DB_LOCK_RECORD, &lock)) != 0) @@ -252,29 +264,14 @@ __qam_c_put(dbc, key, data, flags, pgnop) return (ret); } - if (exact && flags == DB_NOOVERWRITE) { - ret = __TLPUT(dbc, lock); - /* Doing record locking, release the page lock */ - if ((t_ret = __LPUT(dbc, cp->lock)) == 0) - cp->lock.off = LOCK_INVALID; - else - if (ret == 0) - ret = t_ret; - if ((t_ret = - __qam_fput(dbp, cp->pgno, cp->page, 0)) != 0 && ret == 0) - ret = t_ret; - cp->page = NULL; - return (ret == 0 ? DB_KEYEXIST : ret); - } - /* Put the item on the page. */ ret = __qam_pitem(dbc, (QPAGE *)cp->page, cp->indx, cp->recno, data); /* Doing record locking, release the page lock */ if ((t_ret = __LPUT(dbc, cp->lock)) != 0 && ret == 0) ret = t_ret; - if ((t_ret = - __qam_fput(dbp, cp->pgno, cp->page, DB_MPOOL_DIRTY)) && ret == 0) + if ((t_ret = __qam_fput( + dbp, cp->pgno, cp->page, DB_MPOOL_DIRTY)) != 0 && ret == 0) ret = t_ret; cp->page = NULL; cp->lock = lock; @@ -284,11 +281,15 @@ __qam_c_put(dbc, key, data, flags, pgnop) /* We may need to reset the head or tail of the queue. */ pg = ((QUEUE *)dbp->q_internal)->q_meta; - if ((ret = __db_lget(dbc, 0, pg, DB_LOCK_WRITE, 0, &lock)) != 0) + + /* + * Get the meta page first, we don't want to write lock it while + * trying to pin it. + */ + if ((ret = __memp_fget(mpf, &pg, 0, &meta)) != 0) return (ret); - if ((ret = memp_fget(dbp->mpf, &pg, 0, &meta)) != 0) { - /* We did not fetch it, we can release the lock. */ - (void)__LPUT(dbc, lock); + if ((ret = __db_lget(dbc, 0, pg, DB_LOCK_WRITE, 0, &lock)) != 0) { + (void)__memp_fput(mpf, meta, 0); return (ret); } @@ -313,7 +314,8 @@ __qam_c_put(dbc, key, data, flags, pgnop) } else { if (QAM_BEFORE_FIRST(meta, cp->recno) && (meta->first_recno <= meta->cur_recno || - meta->first_recno - cp->recno < cp->recno - meta->cur_recno)) { + meta->first_recno - cp->recno < + cp->recno - meta->cur_recno)) { new_first = cp->recno; opcode |= QAM_SETFIRST; } @@ -321,7 +323,8 @@ __qam_c_put(dbc, key, data, flags, pgnop) if (meta->cur_recno == cp->recno || (QAM_AFTER_CURRENT(meta, cp->recno) && (meta->first_recno <= meta->cur_recno || - cp->recno - meta->cur_recno <= meta->first_recno - cp->recno))) { + cp->recno - meta->cur_recno <= + meta->first_recno - cp->recno))) { new_cur = cp->recno + 1; if (new_cur == RECNO_OOB) new_cur++; @@ -329,10 +332,12 @@ __qam_c_put(dbc, key, data, flags, pgnop) } } - if (opcode != 0 && DB_LOGGING(dbc)) { - ret = __qam_mvptr_log(dbp->dbenv, dbc->txn, &meta->dbmeta.lsn, - 0, opcode, dbp->log_fileid, meta->first_recno, new_first, - meta->cur_recno, new_cur, &meta->dbmeta.lsn); + if (opcode != 0 && DBC_LOGGING(dbc)) { + ret = __qam_mvptr_log(dbp, dbc->txn, &meta->dbmeta.lsn, + 0, opcode, meta->first_recno, new_first, + meta->cur_recno, new_cur, &meta->dbmeta.lsn, PGNO_BASE_MD); + if (ret != 0) + opcode = 0; } if (opcode & QAM_SETCUR) @@ -340,9 +345,8 @@ __qam_c_put(dbc, key, data, flags, pgnop) if (opcode & QAM_SETFIRST) meta->first_recno = new_first; - if ((t_ret = - memp_fput(dbp->mpf, meta, opcode != 0 ? DB_MPOOL_DIRTY : 0)) != 0 && - ret == 0) + if ((t_ret = __memp_fput( + mpf, meta, opcode != 0 ? DB_MPOOL_DIRTY : 0)) != 0 && ret == 0) ret = t_ret; /* Don't hold the meta page long term. */ @@ -352,70 +356,42 @@ __qam_c_put(dbc, key, data, flags, pgnop) } /* - * __qam_put -- - * Add a record to the queue. - * If we are doing anything but appending, just call qam_c_put to do the - * work. Otherwise we fast path things here. + * __qam_append -- + * Perform a put(DB_APPEND) in queue. * - * PUBLIC: int __qam_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); + * PUBLIC: int __qam_append __P((DBC *, DBT *, DBT *)); */ int -__qam_put(dbp, txn, key, data, flags) - DB *dbp; - DB_TXN *txn; +__qam_append(dbc, key, data) + DBC *dbc; DBT *key, *data; - u_int32_t flags; { - QUEUE_CURSOR *cp; - DBC *dbc; + DB *dbp; DB_LOCK lock; + DB_MPOOLFILE *mpf; QMETA *meta; QPAGE *page; QUEUE *qp; + QUEUE_CURSOR *cp; db_pgno_t pg; db_recno_t recno; int ret, t_ret; - PANIC_CHECK(dbp->dbenv); - DB_CHECK_TXN(dbp, txn); - - /* Allocate a cursor. */ - if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, dbc->txn, "qam_put", key, data, flags); - + dbp = dbc->dbp; + mpf = dbp->mpf; cp = (QUEUE_CURSOR *)dbc->internal; - /* Check for invalid flags. */ - if ((ret = __db_putchk(dbp, - key, data, flags, F_ISSET(dbp, DB_AM_RDONLY), 0)) != 0) - goto done; - - /* If not appending, then just call the cursor routine */ - if (flags != DB_APPEND) { - if ((ret = __qam_getno(dbp, key, &cp->recno)) != 0) - goto done; - - ret = __qam_c_put(dbc, NULL, data, flags, NULL); - goto done; - } - - /* Write lock the meta page. */ pg = ((QUEUE *)dbp->q_internal)->q_meta; - if ((ret = __db_lget(dbc, 0, pg, DB_LOCK_WRITE, 0, &lock)) != 0) - goto done; - if ((ret = memp_fget(dbp->mpf, &pg, 0, &meta)) != 0) { - /* We did not fetch it, we can release the lock. */ - (void)__LPUT(dbc, lock); - goto done; - } - - /* Record that we are going to allocate a record. */ - if (DB_LOGGING(dbc)) { - __qam_inc_log(dbp->dbenv, - dbc->txn, &meta->dbmeta.lsn, - 0, dbp->log_fileid, &meta->dbmeta.lsn); + /* + * Get the meta page first, we don't want to write lock it while + * trying to pin it. + */ + if ((ret = __memp_fget(mpf, &pg, 0, &meta)) != 0) + return (ret); + /* Write lock the meta page. */ + if ((ret = __db_lget(dbc, 0, pg, DB_LOCK_WRITE, 0, &lock)) != 0) { + (void)__memp_fput(mpf, meta, 0); + return (ret); } /* Get the next record number. */ @@ -436,16 +412,24 @@ __qam_put(dbp, txn, key, data, flags) meta->first_recno = recno; /* Lock the record and release meta page lock. */ - if ((ret = __db_lget(dbc, - 1, recno, DB_LOCK_WRITE, DB_LOCK_RECORD, &lock)) != 0) - goto err; + ret = __db_lget(dbc, LCK_COUPLE_ALWAYS, + recno, DB_LOCK_WRITE, DB_LOCK_RECORD, &lock); /* * The application may modify the data based on the selected record - * number. + * number. We always want to call this even if we ultimately end + * up aborting, because we are allocating a record number, regardless. */ - if (flags == DB_APPEND && dbc->dbp->db_append_recno != NULL && - (ret = dbc->dbp->db_append_recno(dbc->dbp, data, recno)) != 0) { + if (dbc->dbp->db_append_recno != NULL && + (t_ret = dbc->dbp->db_append_recno(dbc->dbp, data, recno)) != 0 && + ret == 0) + ret = t_ret; + + /* + * Capture errors from either the lock couple or the call to + * dbp->db_append_recno. + */ + if (ret != 0) { (void)__LPUT(dbc, lock); goto err; } @@ -484,16 +468,20 @@ __qam_put(dbp, txn, key, data, flags) /* Return the record number to the user. */ if (ret == 0) - ret = __db_retcopy(dbp, key, - &recno, sizeof(recno), &dbc->rkey.data, &dbc->rkey.ulen); + ret = __db_retcopy(dbp->dbenv, key, + &recno, sizeof(recno), &dbc->rkey->data, &dbc->rkey->ulen); + + /* Position the cursor on this record. */ + cp->recno = recno; /* See if we are leaving the extent. */ qp = (QUEUE *) dbp->q_internal; - if (qp->page_ext != 0 - && (recno % (qp->page_ext * qp->rec_page) == 0 - || recno == UINT32_T_MAX)) { - if ((ret = - __db_lget(dbc, 0, pg, DB_LOCK_WRITE, 0, &lock)) != 0) + if (qp->page_ext != 0 && + (recno % (qp->page_ext * qp->rec_page) == 0 || + recno == UINT32_T_MAX)) { + if ((ret = __db_lget(dbc, + 0, ((QUEUE *)dbp->q_internal)->q_meta, + DB_LOCK_WRITE, 0, &lock)) != 0) goto err; if (!QAM_AFTER_CURRENT(meta, recno)) ret = __qam_fclose(dbp, pg); @@ -502,13 +490,7 @@ __qam_put(dbp, txn, key, data, flags) err: /* Release the meta page. */ - if ((t_ret - = memp_fput(dbp->mpf, meta, DB_MPOOL_DIRTY)) != 0 && ret == 0) - ret = t_ret; - -done: - /* Discard the cursor. */ - if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) + if ((t_ret = __memp_fput(mpf, meta, DB_MPOOL_DIRTY)) != 0 && ret == 0) ret = t_ret; return (ret); @@ -522,50 +504,57 @@ static int __qam_c_del(dbc) DBC *dbc; { - QUEUE_CURSOR *cp; DB *dbp; DBT data; - DB_LOCK lock; + DB_LOCK lock, metalock; + DB_MPOOLFILE *mpf; PAGE *pagep; QAMDATA *qp; QMETA *meta; + QUEUE_CURSOR *cp; db_pgno_t pg; + db_recno_t first; int exact, ret, t_ret; dbp = dbc->dbp; + mpf = dbp->mpf; cp = (QUEUE_CURSOR *)dbc->internal; pg = ((QUEUE *)dbp->q_internal)->q_meta; - if ((ret = __db_lget(dbc, 0, pg, DB_LOCK_READ, 0, &lock)) != 0) + /* + * Get the meta page first, we don't want to write lock it while + * trying to pin it. + */ + if ((ret = __memp_fget(mpf, &pg, 0, &meta)) != 0) return (ret); - if ((ret = memp_fget(dbp->mpf, &pg, 0, &meta)) != 0) { - /* We did not fetch it, we can release the lock. */ - (void)__LPUT(dbc, lock); + /* Write lock the meta page. */ + if ((ret = __db_lget(dbc, 0, pg, DB_LOCK_READ, 0, &metalock)) != 0) { + (void)__memp_fput(mpf, meta, 0); return (ret); } if (QAM_NOT_VALID(meta, cp->recno)) ret = DB_NOTFOUND; + first = meta->first_recno; + /* Don't hold the meta page long term. */ - if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0) - ret = t_ret; - if ((t_ret = memp_fput(dbp->mpf, meta, 0)) != 0 && ret == 0) + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) ret = t_ret; if (ret != 0) - return (ret); + goto err1; if ((ret = __db_lget(dbc, 0, cp->recno, DB_LOCK_WRITE, DB_LOCK_RECORD, &lock)) != 0) - return (ret); + goto err1; cp->lock_mode = DB_LOCK_WRITE; /* Find the record ; delete only deletes exact matches. */ if ((ret = __qam_position(dbc, &cp->recno, QAM_WRITE, &exact)) != 0) { cp->lock = lock; - return (ret); + goto err1; } if (!exact) { ret = DB_NOTFOUND; @@ -575,21 +564,18 @@ __qam_c_del(dbc) pagep = cp->page; qp = QAM_GET_RECORD(dbp, pagep, cp->indx); - if (DB_LOGGING(dbc)) { - if (((QUEUE *)dbp->q_internal)->page_ext == 0 - || ((QUEUE *)dbp->q_internal)->re_len == 0) { - if ((ret = - __qam_del_log(dbp->dbenv, - dbc->txn, &LSN(pagep), 0, - dbp->log_fileid, &LSN(pagep), + if (DBC_LOGGING(dbc)) { + if (((QUEUE *)dbp->q_internal)->page_ext == 0 || + ((QUEUE *)dbp->q_internal)->re_len == 0) { + if ((ret = __qam_del_log(dbp, + dbc->txn, &LSN(pagep), 0, &LSN(pagep), pagep->pgno, cp->indx, cp->recno)) != 0) goto err1; } else { data.size = ((QUEUE *)dbp->q_internal)->re_len; data.data = qp->data; - if ((ret = - __qam_delext_log(dbp->dbenv, dbc->txn, - &LSN(pagep), 0, dbp->log_fileid, &LSN(pagep), + if ((ret = __qam_delext_log(dbp, + dbc->txn, &LSN(pagep), 0, &LSN(pagep), pagep->pgno, cp->indx, cp->recno, &data)) != 0) goto err1; } @@ -597,60 +583,28 @@ __qam_c_del(dbc) F_CLR(qp, QAM_VALID); -err1: - if ((t_ret = __qam_fput( - dbp, cp->pgno, cp->page, ret == 0 ? DB_MPOOL_DIRTY : 0)) != 0) - return (ret ? ret : t_ret); - cp->page = NULL; - /* Doing record locking, release the page lock */ - if ((t_ret = __LPUT(dbc, cp->lock)) != 0) { - cp->lock = lock; - return (ret ? ret : t_ret); + if (cp->recno == first) { + pg = ((QUEUE *)dbp->q_internal)->q_meta; + if ((ret = + __db_lget(dbc, 0, pg, DB_LOCK_WRITE, 0, &metalock)) != 0) + goto err1; + ret = __qam_consume(dbc, meta, first); + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; } - cp->lock = lock; - return (ret); -} -/* - * __qam_delete -- - * Queue db->del function. - * - * PUBLIC: int __qam_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); - */ -int -__qam_delete(dbp, txn, key, flags) - DB *dbp; - DB_TXN *txn; - DBT *key; - u_int32_t flags; -{ - QUEUE_CURSOR *cp; - DBC *dbc; - int ret, t_ret; - - PANIC_CHECK(dbp->dbenv); - DB_CHECK_TXN(dbp, txn); - - /* Check for invalid flags. */ - if ((ret = - __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0) - return (ret); - - /* Acquire a cursor. */ - if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, txn, "qam_delete", key, NULL, flags); - - cp = (QUEUE_CURSOR *)dbc->internal; - if ((ret = __qam_getno(dbp, key, &cp->recno)) != 0) - goto err; - - ret = __qam_c_del(dbc); +err1: + if ((t_ret = __memp_fput(mpf, meta, 0)) != 0 && ret == 0) + ret = t_ret; + if (cp->page != NULL && (t_ret = __qam_fput(dbp, cp->pgno, + cp->page, ret == 0 ? DB_MPOOL_DIRTY : 0)) != 0 && ret == 0) + ret = t_ret; + cp->page = NULL; - /* Release the cursor. */ -err: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) + /* Doing record locking, release the page lock */ + if ((t_ret = __LPUT(dbc, cp->lock)) != 0 && ret == 0) ret = t_ret; + cp->lock = lock; return (ret); } @@ -671,39 +625,41 @@ __qam_c_get(dbc, key, data, flags, pgnop) db_pgno_t *pgnop; { DB *dbp; - DB_LOCK lock, pglock, metalock, save_lock; + DBC *dbcdup; DBT tmp; + DB_ENV *dbenv; + DB_LOCK lock, pglock, metalock; + DB_MPOOLFILE *mpf; PAGE *pg; QAMDATA *qp; QMETA *meta; QUEUE *t; QUEUE_CURSOR *cp; - db_indx_t save_indx; db_lockmode_t lock_mode; - db_pgno_t metapno, save_page; - db_recno_t current, first, save_recno; + db_pgno_t metapno; + db_recno_t first; qam_position_mode mode; - u_int32_t rec_extent; int exact, is_first, locked, ret, t_ret, wait, with_delete; - int put_mode, meta_dirty, retrying, skip_again, wrapped; + int put_mode, retrying; - cp = (QUEUE_CURSOR *)dbc->internal; dbp = dbc->dbp; + dbenv = dbp->dbenv; + mpf = dbp->mpf; + cp = (QUEUE_CURSOR *)dbc->internal; - PANIC_CHECK(dbp->dbenv); + PANIC_CHECK(dbenv); wait = 0; with_delete = 0; retrying = 0; - rec_extent = 0; lock_mode = DB_LOCK_READ; - mode = QAM_READ; + meta = NULL; put_mode = 0; t_ret = 0; *pgnop = 0; pg = NULL; - skip_again = 0; + mode = QAM_READ; if (F_ISSET(dbc, DBC_RMW)) { lock_mode = DB_LOCK_WRITE; mode = QAM_WRITE; @@ -714,7 +670,6 @@ __qam_c_get(dbc, key, data, flags, pgnop) flags = DB_CONSUME; } if (flags == DB_CONSUME) { - DB_CHECK_TXN(dbp, dbc->txn); with_delete = 1; flags = DB_FIRST; lock_mode = DB_LOCK_WRITE; @@ -724,30 +679,29 @@ __qam_c_get(dbc, key, data, flags, pgnop) DEBUG_LREAD(dbc, dbc->txn, "qam_c_get", flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags); + /* Make lint and friends happy. */ + locked = 0; + is_first = 0; t = (QUEUE *)dbp->q_internal; - /* get the meta page */ metapno = t->q_meta; - if ((ret = __db_lget(dbc, 0, metapno, lock_mode, 0, &metalock)) != 0) + + /* + * Get the meta page first, we don't want to write lock it while + * trying to pin it. This is because someone my have it pinned + * but not locked. + */ + if ((ret = __memp_fget(mpf, &metapno, 0, &meta)) != 0) return (ret); + if ((ret = __db_lget(dbc, 0, metapno, lock_mode, 0, &metalock)) != 0) + goto err; locked = 1; - if ((ret = memp_fget(dbp->mpf, &metapno, 0, &meta)) != 0) { - /* We did not fetch it, we can release the lock. */ - (void)__LPUT(dbc, metalock); - return (ret); - } first = 0; - /* Make lint and friends happy. */ - meta_dirty = 0; - /* Release any previous lock if not in a transaction. */ - if (cp->lock.off != LOCK_INVALID) { - (void)__TLPUT(dbc, cp->lock); - cp->lock.off = LOCK_INVALID; - } + (void)__TLPUT(dbc, cp->lock); retry: /* Update the record number. */ switch (flags) { @@ -764,6 +718,87 @@ retry: /* Update the record number. */ /* Wrap around, skipping zero. */ if (cp->recno == RECNO_OOB) cp->recno++; + /* + * Check to see if we are out of data. + */ + if (cp->recno == meta->cur_recno || + QAM_AFTER_CURRENT(meta, cp->recno)) { + pg = NULL; + if (!wait) { + ret = DB_NOTFOUND; + goto err; + } + flags = DB_FIRST; + /* + * If first is not set, then we skipped + * a locked record, go back and find it. + * If we find a locked record again + * wait for it. + */ + if (first == 0) { + retrying = 1; + goto retry; + } + + if (CDB_LOCKING(dbenv)) { + /* Drop the metapage before we wait. */ + if ((ret = + __memp_fput(mpf, meta, 0)) != 0) + goto err; + meta = NULL; + if ((ret = __lock_get( + dbenv, dbc->locker, + DB_LOCK_SWITCH, &dbc->lock_dbt, + DB_LOCK_WAIT, &dbc->mylock)) != 0) + goto err; + + if ((ret = __memp_fget(mpf, + &metapno, 0, &meta)) != 0) + goto err; + if ((ret = __lock_get( + dbenv, dbc->locker, + DB_LOCK_UPGRADE, &dbc->lock_dbt, + DB_LOCK_WRITE, &dbc->mylock)) != 0) + goto err; + goto retry; + } + /* + * Wait for someone to update the meta page. + * This will probably mean there is something + * in the queue. We then go back up and + * try again. + */ + if (locked == 0) { + if ((ret = __db_lget(dbc, 0, metapno, + lock_mode, 0, &metalock)) != 0) + goto err; + locked = 1; + if (cp->recno != RECNO_OOB && + !QAM_AFTER_CURRENT(meta, cp->recno)) + goto retry; + } + /* Drop the metapage before we wait. */ + if ((ret = __memp_fput(mpf, meta, 0)) != 0) + goto err; + meta = NULL; + if ((ret = __db_lget(dbc, + 0, metapno, DB_LOCK_WAIT, + DB_LOCK_SWITCH, &metalock)) != 0) { + if (ret == DB_LOCK_DEADLOCK) + ret = DB_LOCK_NOTGRANTED; + goto err; + } + if ((ret = __memp_fget( + mpf, &metapno, 0, &meta)) != 0) + goto err; + if ((ret = __lock_get(dbenv, + dbc->locker, DB_LOCK_UPGRADE, + &dbc->lock_dbt, DB_LOCK_WRITE, + &metalock)) != 0) + goto err; + locked = 1; + goto retry; + } break; } /* FALLTHROUGH */ @@ -778,8 +813,8 @@ retry: /* Update the record number. */ case DB_PREV: case DB_PREV_NODUP: if (cp->recno != RECNO_OOB) { - if (QAM_BEFORE_FIRST(meta, cp->recno) - || cp->recno == meta->first_recno) { + if (cp->recno == meta->first_recno || + QAM_BEFORE_FIRST(meta, cp->recno)) { ret = DB_NOTFOUND; goto err; } @@ -799,74 +834,20 @@ retry: /* Update the record number. */ if (cp->recno == RECNO_OOB) cp->recno--; break; - case DB_GET_BOTH: case DB_SET: case DB_SET_RANGE: - if ((ret = __qam_getno(dbp, key, &cp->recno)) != 0) + case DB_GET_BOTH: + case DB_GET_BOTH_RANGE: + if ((ret = __qam_getno(dbp, + key, &cp->recno)) != 0) + goto err; + if (QAM_NOT_VALID(meta, cp->recno)) { + ret = DB_NOTFOUND; goto err; + } break; default: - ret = __db_unknown_flag(dbp->dbenv, "__qam_c_get", flags); - goto err; - } - - /* - * Check to see if we are out of data. Current points to - * the first free slot. - */ - if (cp->recno == meta->cur_recno || - QAM_AFTER_CURRENT(meta, cp->recno)) { - ret = DB_NOTFOUND; - pg = NULL; - if (wait) { - flags = DB_FIRST; - /* - * If first is not set, then we skipped a - * locked record, go back and find it. - * If we find a locked record again - * wait for it. - */ - if (first == 0) { - retrying = 1; - goto retry; - } - if (CDB_LOCKING(dbp->dbenv)) { - if ((ret = lock_get(dbp->dbenv, dbc->locker, - DB_LOCK_SWITCH, &dbc->lock_dbt, - DB_LOCK_WAIT, &dbc->mylock)) != 0) - goto err; - if ((ret = lock_get(dbp->dbenv, dbc->locker, - DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, - &dbc->mylock)) != 0) - goto err; - goto retry; - } - /* - * Wait for someone to update the meta page. - * This will probably mean there is something - * in the queue. We then go back up and - * try again. - */ - if (locked == 0) { - if ((ret = __db_lget( dbc, - 0, metapno, lock_mode, 0, &metalock)) != 0) - goto err; - locked = 1; - if (cp->recno != RECNO_OOB && - !QAM_AFTER_CURRENT(meta, cp->recno)) - goto retry; - } - if ((ret = __db_lget(dbc, 0, metapno, - DB_LOCK_WAIT, DB_LOCK_SWITCH, &metalock)) != 0) - goto err; - if ((ret = lock_get(dbp->dbenv, dbc->locker, - DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, - &metalock)) != 0) - goto err; - locked = 1; - goto retry; - } - + ret = __db_unknown_flag(dbenv, "__qam_c_get", flags); goto err; } @@ -881,13 +862,17 @@ retry: /* Update the record number. */ if ((ret = __db_lget(dbc, 0, cp->recno, lock_mode, (with_delete && !retrying) ? DB_LOCK_NOWAIT | DB_LOCK_RECORD : DB_LOCK_RECORD, - &lock)) == DB_LOCK_NOTGRANTED && with_delete) { + &lock)) == DB_LOCK_DEADLOCK && with_delete) { #ifdef QDEBUG - __db_logmsg(dbp->dbenv, + __db_logmsg(dbenv, dbc->txn, "Queue S", 0, "%x %d %d %d", dbc->locker, cp->recno, first, meta->first_recno); #endif first = 0; + if ((ret = + __db_lget(dbc, 0, metapno, lock_mode, 0, &metalock)) != 0) + goto err; + locked = 1; goto retry; } @@ -929,9 +914,9 @@ retry: /* Update the record number. */ cp->lock_mode = lock_mode; if (!exact) { - if (flags == DB_NEXT || flags == DB_NEXT_NODUP - || flags == DB_PREV || flags == DB_PREV_NODUP - || flags == DB_LAST) { + if (flags == DB_NEXT || flags == DB_NEXT_NODUP || + flags == DB_PREV || flags == DB_PREV_NODUP || + flags == DB_LAST) { /* Release locks and try again. */ if (pg != NULL) (void)__qam_fput(dbp, cp->pgno, pg, 0); @@ -951,18 +936,20 @@ retry: /* Update the record number. */ } /* Return the key if the user didn't give us one. */ - if (key != NULL && flags != DB_SET && flags != DB_GET_BOTH && - (ret = __db_retcopy(dbp, key, &cp->recno, sizeof(cp->recno), - &dbc->rkey.data, &dbc->rkey.ulen)) != 0) - goto err1; - - if (key != NULL) + if (key != NULL) { + if (flags != DB_GET_BOTH && flags != DB_GET_BOTH_RANGE && + flags != DB_SET && flags != DB_SET_RANGE && + (ret = __db_retcopy(dbp->dbenv, + key, &cp->recno, sizeof(cp->recno), + &dbc->rkey->data, &dbc->rkey->ulen)) != 0) + goto err1; F_SET(key, DB_DBT_ISSET); + } qp = QAM_GET_RECORD(dbp, pg, cp->indx); /* Return the data item. */ - if (flags == DB_GET_BOTH) { + if (flags == DB_GET_BOTH || flags == DB_GET_BOTH_RANGE) { /* * Need to compare */ @@ -973,8 +960,10 @@ retry: /* Update the record number. */ goto err1; } } - if (data != NULL && (ret = __db_retcopy(dbp, data, - qp->data, t->re_len, &dbc->rdata.data, &dbc->rdata.ulen)) != 0) + if (data != NULL && + !F_ISSET(dbc, DBC_MULTIPLE|DBC_MULTIPLE_KEY) && + (ret = __db_retcopy(dbp->dbenv, data, + qp->data, t->re_len, &dbc->rdata->data, &dbc->rdata->ulen)) != 0) goto err1; if (data != NULL) @@ -982,18 +971,53 @@ retry: /* Update the record number. */ /* Finally, if we are doing DB_CONSUME mark the record. */ if (with_delete) { - if (DB_LOGGING(dbc)) { + /* + * Assert that we're not a secondary index. Doing a DB_CONSUME + * on a secondary makes very little sense, since one can't + * DB_APPEND there; attempting one should be forbidden by + * the interface. + */ + DB_ASSERT(!F_ISSET(dbp, DB_AM_SECONDARY)); + + /* + * Check and see if we *have* any secondary indices. + * If we do, we're a primary, so call __db_c_del_primary + * to delete the references to the item we're about to + * delete. + * + * Note that we work on a duplicated cursor, since the + * __db_ret work has already been done, so it's not safe + * to perform any additional ops on this cursor. + */ + if (LIST_FIRST(&dbp->s_secondaries) != NULL) { + if ((ret = __db_c_idup(dbc, + &dbcdup, DB_POSITION)) != 0) + goto err1; + + if ((ret = __db_c_del_primary(dbcdup)) != 0) { + /* + * The __db_c_del_primary return is more + * interesting. + */ + (void)__db_c_close(dbcdup); + goto err1; + } + + if ((ret = __db_c_close(dbcdup)) != 0) + goto err1; + } + + if (DBC_LOGGING(dbc)) { if (t->page_ext == 0 || t->re_len == 0) { - if ((ret = __qam_del_log(dbp->dbenv, dbc->txn, - &LSN(pg), 0, dbp->log_fileid, &LSN(pg), + if ((ret = __qam_del_log(dbp, dbc->txn, + &LSN(pg), 0, &LSN(pg), pg->pgno, cp->indx, cp->recno)) != 0) goto err1; } else { tmp.data = qp->data; tmp.size = t->re_len; - if ((ret = - __qam_delext_log(dbp->dbenv, dbc->txn, - &LSN(pg), 0, dbp->log_fileid, &LSN(pg), + if ((ret = __qam_delext_log(dbp, + dbc->txn, &LSN(pg), 0, &LSN(pg), pg->pgno, cp->indx, cp->recno, &tmp)) != 0) goto err1; } @@ -1003,7 +1027,7 @@ retry: /* Update the record number. */ put_mode = DB_MPOOL_DIRTY; if ((ret = __LPUT(dbc, pglock)) != 0) - goto err; + goto err1; /* * Now we need to update the metapage @@ -1021,8 +1045,9 @@ retry: /* Update the record number. */ dbc, 0, metapno, lock_mode, 0, &metalock)) != 0) goto err1; locked = 1; + #ifdef QDEBUG - __db_logmsg(dbp->dbenv, + __db_logmsg(dbenv, dbc->txn, "Queue D", 0, "%x %d %d %d", dbc->locker, cp->recno, first, meta->first_recno); #endif @@ -1037,190 +1062,394 @@ retry: /* Update the record number. */ if (first != meta->first_recno) goto done; - save_page = cp->pgno; - save_indx = cp->indx; - save_recno = cp->recno; - save_lock = cp->lock; + if ((ret = __qam_consume(dbc, meta, first)) != 0) + goto err1; + } - /* - * If we skipped some deleted records, we need to - * reposition on the first one. Get a lock - * in case someone is trying to put it back. - */ - if (first != cp->recno) { - ret = __db_lget(dbc, 0, first, DB_LOCK_READ, - DB_LOCK_NOWAIT | DB_LOCK_RECORD, &lock); - if (ret == DB_LOCK_NOTGRANTED) { - ret = 0; - goto done; - } - if (ret != 0) - goto err1; - if ((ret = - __qam_fput(dbp, cp->pgno, cp->page, put_mode)) != 0) - goto err1; - cp->page = NULL; - put_mode = 0; - if ((ret = __qam_position(dbc, - &first, QAM_READ, &exact)) != 0 || exact != 0) { - (void)__LPUT(dbc, lock); - goto err1; - } - if ((ret =__LPUT(dbc, lock)) != 0) - goto err1; - if ((ret = __LPUT(dbc, cp->lock)) != 0) - goto err1; +done: +err1: if (cp->page != NULL) { + t_ret = __qam_fput(dbp, cp->pgno, cp->page, put_mode); + + if (!ret) + ret = t_ret; + /* Doing record locking, release the page lock */ + t_ret = __LPUT(dbc, pglock); + cp->page = NULL; + } + +err: if (!ret) + ret = t_ret; + if (meta) { + + /* release the meta page */ + t_ret = __memp_fput(mpf, meta, 0); + + if (!ret) + ret = t_ret; + + /* Don't hold the meta page long term. */ + if (locked) + t_ret = __LPUT(dbc, metalock); + } + DB_ASSERT(!LOCK_ISSET(metalock)); + + /* + * There is no need to keep the record locked if we are + * not in a transaction. + */ + if (t_ret == 0) + t_ret = __TLPUT(dbc, cp->lock); + + if (!ret) + ret = t_ret; + + return ((ret == DB_LOCK_NOTGRANTED && + !F_ISSET(dbenv, DB_ENV_TIME_NOTGRANTED)) ? + DB_LOCK_DEADLOCK : ret); +} + +/* + * __qam_consume -- try to reset the head of the queue. + * + */ + +static int +__qam_consume(dbc, meta, first) + DBC *dbc; + QMETA *meta; + db_recno_t first; +{ + DB *dbp; + DB_LOCK lock, save_lock; + DB_MPOOLFILE *mpf; + QUEUE_CURSOR *cp; + db_indx_t save_indx; + db_pgno_t save_page; + db_recno_t current, save_recno; + u_int32_t rec_extent; + int exact, put_mode, ret, t_ret, wrapped; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (QUEUE_CURSOR *)dbc->internal; + put_mode = DB_MPOOL_DIRTY; + ret = t_ret = 0; + + save_page = cp->pgno; + save_indx = cp->indx; + save_recno = cp->recno; + save_lock = cp->lock; + + /* + * If we skipped some deleted records, we need to + * reposition on the first one. Get a lock + * in case someone is trying to put it back. + */ + if (first != cp->recno) { + ret = __db_lget(dbc, 0, first, DB_LOCK_READ, + DB_LOCK_NOWAIT | DB_LOCK_RECORD, &lock); + if (ret == DB_LOCK_DEADLOCK) { + ret = 0; + goto done; } + if (ret != 0) + goto done; + if ((ret = + __qam_fput(dbp, cp->pgno, cp->page, put_mode)) != 0) + goto done; + cp->page = NULL; + put_mode = 0; + if ((ret = __qam_position(dbc, + &first, QAM_READ, &exact)) != 0 || exact != 0) { + (void)__LPUT(dbc, lock); + goto done; + } + if ((ret =__LPUT(dbc, lock)) != 0) + goto done; + if ((ret = __LPUT(dbc, cp->lock)) != 0) + goto done; + } - current = meta->cur_recno; - wrapped = 0; - if (first > current) - wrapped = 1; - rec_extent = meta->page_ext * meta->rec_page; + current = meta->cur_recno; + wrapped = 0; + if (first > current) + wrapped = 1; + rec_extent = meta->page_ext * meta->rec_page; - /* Loop until we find a record or hit current */ - for (;;) { - /* - * Check to see if we are moving off the extent - * and remove the extent. - * If we are moving off a page we need to - * get rid of the buffer. - * Wait for the lagging readers to move off the - * page. - */ - if (rec_extent != 0 - && ((exact = first % rec_extent == 0) - || first % meta->rec_page == 0 - || first == UINT32_T_MAX)) { - if (exact == 1 && (ret = __db_lget(dbc, - 0, cp->pgno, DB_LOCK_WRITE, 0, &cp->lock)) != 0) - break; + /* Loop until we find a record or hit current */ + for (;;) { + /* + * Check to see if we are moving off the extent + * and remove the extent. + * If we are moving off a page we need to + * get rid of the buffer. + * Wait for the lagging readers to move off the + * page. + */ + if (cp->page != NULL && rec_extent != 0 && + ((exact = (first % rec_extent == 0)) || + first % meta->rec_page == 0 || + first == UINT32_T_MAX)) { + if (exact == 1 && (ret = __db_lget(dbc, + 0, cp->pgno, DB_LOCK_WRITE, 0, &cp->lock)) != 0) + break; #ifdef QDEBUG - __db_logmsg(dbp->dbenv, - dbc->txn, "Queue R", 0, "%x %d %d %d", - dbc->locker, cp->pgno, first, meta->first_recno); + __db_logmsg(dbp->dbenv, + dbc->txn, "Queue R", 0, "%x %d %d %d", + dbc->locker, cp->pgno, first, meta->first_recno); #endif - put_mode |= DB_MPOOL_DISCARD; - if ((ret = __qam_fput(dbp, - cp->pgno, cp->page, put_mode)) != 0) - break; - cp->page = NULL; - - if (exact == 1) { - ret = __qam_fremove(dbp, cp->pgno); - t_ret = __LPUT(dbc, cp->lock); - } - if (ret != 0) - break; - if (t_ret != 0) { - ret = t_ret; - break; - } - } else if ((ret = - __qam_fput(dbp, cp->pgno, cp->page, put_mode)) != 0) + put_mode |= DB_MPOOL_DISCARD; + if ((ret = __qam_fput(dbp, + cp->pgno, cp->page, put_mode)) != 0) break; cp->page = NULL; - first++; - if (first == RECNO_OOB) { - wrapped = 0; - first++; - } - - /* - * LOOP EXIT when we come move to the current - * pointer. - */ - if (!wrapped && first >= current) - break; - ret = __db_lget(dbc, 0, first, DB_LOCK_READ, - DB_LOCK_NOWAIT | DB_LOCK_RECORD, &lock); - if (ret == DB_LOCK_NOTGRANTED) { - ret = 0; - break; + if (exact == 1) { + ret = __qam_fremove(dbp, cp->pgno); + t_ret = __LPUT(dbc, cp->lock); } if (ret != 0) break; - - if ((ret = __qam_position(dbc, - &first, QAM_READ, &exact)) != 0) { - (void)__LPUT(dbc, lock); - break; - } - put_mode = 0; - if ((ret =__LPUT(dbc, lock)) != 0 - || (ret = __LPUT(dbc, cp->lock)) != 0 ||exact) { - if ((t_ret = __qam_fput(dbp, cp->pgno, - cp->page, put_mode)) != 0 && ret == 0) - ret = t_ret; - cp->page = NULL; + if (t_ret != 0) { + ret = t_ret; break; } + } else if (cp->page != NULL && (ret = + __qam_fput(dbp, cp->pgno, cp->page, put_mode)) != 0) + break; + cp->page = NULL; + first++; + if (first == RECNO_OOB) { + wrapped = 0; + first++; } - cp->pgno = save_page; - cp->indx = save_indx; - cp->recno = save_recno; - cp->lock = save_lock; - /* - * We have advanced as far as we can. - * Advance first_recno to this point. + * LOOP EXIT when we come move to the current + * pointer. */ - if (meta->first_recno != first) { + if (!wrapped && first >= current) + break; + + ret = __db_lget(dbc, 0, first, DB_LOCK_READ, + DB_LOCK_NOWAIT | DB_LOCK_RECORD, &lock); + if (ret == DB_LOCK_DEADLOCK) { + ret = 0; + break; + } + if (ret != 0) + break; + + if ((ret = __qam_position(dbc, + &first, QAM_READ, &exact)) != 0) { + (void)__LPUT(dbc, lock); + break; + } + put_mode = 0; + if ((ret =__LPUT(dbc, lock)) != 0 || + (ret = __LPUT(dbc, cp->lock)) != 0 || exact) { + if ((t_ret = __qam_fput(dbp, cp->pgno, + cp->page, put_mode)) != 0 && ret == 0) + ret = t_ret; + cp->page = NULL; + break; + } + } + + cp->pgno = save_page; + cp->indx = save_indx; + cp->recno = save_recno; + cp->lock = save_lock; + + /* + * We have advanced as far as we can. + * Advance first_recno to this point. + */ + if (ret == 0 && meta->first_recno != first) { #ifdef QDEBUG __db_logmsg(dbp->dbenv, dbc->txn, "Queue M", 0, "%x %d %d %d", dbc->locker, cp->recno, first, meta->first_recno); #endif - if (DB_LOGGING(dbc)) - if ((ret = - __qam_incfirst_log(dbp->dbenv, - dbc->txn, &meta->dbmeta.lsn, 0, - dbp->log_fileid, cp->recno)) != 0) - goto err; - meta->first_recno = first; - meta_dirty = 1; - } + if (DBC_LOGGING(dbc)) + if ((ret = __qam_incfirst_log(dbp, + dbc->txn, &meta->dbmeta.lsn, 0, + cp->recno, PGNO_BASE_MD)) != 0) + goto done; + meta->first_recno = first; + (void)__memp_fset(mpf, meta, DB_MPOOL_DIRTY); } done: -err1: if (cp->page != NULL) { - t_ret = __qam_fput(dbp, cp->pgno, cp->page, put_mode); + return (ret); +} - if (!ret) - ret = t_ret; - /* Doing record locking, release the page lock */ - t_ret = __LPUT(dbc, pglock); - cp->page = NULL; +static int +__qam_bulk(dbc, data, flags) + DBC *dbc; + DBT *data; + u_int32_t flags; +{ + DB *dbp; + DB_LOCK metalock; + DB_MPOOLFILE *mpf; + PAGE *pg; + QMETA *meta; + QAMDATA *qp; + QUEUE_CURSOR *cp; + db_indx_t indx; + db_pgno_t metapno; + qam_position_mode mode; + int32_t *endp, *offp; + u_int8_t *dbuf, *dp, *np; + int exact, recs, re_len, ret, t_ret, valid; + int is_key, need_pg, pagesize, size, space; + + dbp = dbc->dbp; + mpf = dbp->mpf; + cp = (QUEUE_CURSOR *)dbc->internal; + + mode = QAM_READ; + if (F_ISSET(dbc, DBC_RMW)) + mode = QAM_WRITE; + + pagesize = dbp->pgsize; + re_len = ((QUEUE *)dbp->q_internal)->re_len; + recs = ((QUEUE *)dbp->q_internal)->rec_page; + metapno = ((QUEUE *)dbp->q_internal)->q_meta; + + is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0; + size = 0; + + if ((ret = __db_lget(dbc, 0, metapno, DB_LOCK_READ, 0, &metalock)) != 0) + return (ret); + if ((ret = __memp_fget(mpf, &metapno, 0, &meta)) != 0) { + /* We did not fetch it, we can release the lock. */ + (void)__LPUT(dbc, metalock); + return (ret); } -err: if (!ret) - ret = t_ret; - if (meta) { + dbuf = data->data; + np = dp = dbuf; - /* release the meta page */ - t_ret = memp_fput( - dbp->mpf, meta, meta_dirty ? DB_MPOOL_DIRTY : 0); + /* Keep track of space that is left. There is an termination entry */ + space = data->ulen; + space -= sizeof(*offp); - if (!ret) - ret = t_ret; + /* Build the offset/size table form the end up. */ + endp = (int32_t *) ((u_int8_t *)dbuf + data->ulen); + endp--; + offp = endp; - /* Don't hold the meta page long term. */ - if (locked) - t_ret = __LPUT(dbc, metalock); +next_pg: + /* Wrap around, skipping zero. */ + if (cp->recno == RECNO_OOB) + cp->recno++; + if ((ret = __qam_position(dbc, &cp->recno, mode, &exact)) != 0) + goto done; + + pg = cp->page; + indx = cp->indx; + need_pg = 1; + + do { + /* + * If this page is a nonexistent page at the end of an + * extent, pg may be NULL. A NULL page has no valid records, + * so just keep looping as though qp exists and isn't QAM_VALID; + * calling QAM_GET_RECORD is unsafe. + */ + valid = 0; + + if (pg != NULL) { + qp = QAM_GET_RECORD(dbp, pg, indx); + if (F_ISSET(qp, QAM_VALID)) { + valid = 1; + space -= (is_key ? 3 : 2) * sizeof(*offp); + if (space < 0) + goto get_space; + if (need_pg) { + dp = np; + size = pagesize - QPAGE_SZ(dbp); + if (space < size) { +get_space: + if (offp == endp) { + data->size = + ALIGN(size + + pagesize, + sizeof(u_int32_t)); + ret = ENOMEM; + break; + } + if (indx != 0) + indx--; + cp->recno--; + space = 0; + break; + } + memcpy(dp, + (char *)pg + QPAGE_SZ(dbp), size); + need_pg = 0; + space -= size; + np += size; + } + if (is_key) + *offp-- = cp->recno; + *offp-- = (int32_t)((u_int8_t*)qp - + (u_int8_t*)pg - QPAGE_SZ(dbp) + + dp - dbuf + SSZA(QAMDATA, data)); + *offp-- = re_len; + } + } + if (!valid && is_key == 0) { + *offp-- = 0; + *offp-- = 0; + } + cp->recno++; + } while (++indx < recs && cp->recno != RECNO_OOB && + cp->recno != meta->cur_recno && + !QAM_AFTER_CURRENT(meta, cp->recno)); + + if ((t_ret = __TLPUT(dbc, cp->lock)) != 0 && ret == 0) + ret = t_ret; + + if (cp->page != NULL) { + if ((t_ret = + __qam_fput(dbp, cp->pgno, cp->page, 0)) != 0 && ret == 0) + ret = t_ret; + cp->page = NULL; } - DB_ASSERT(metalock.off == LOCK_INVALID); + + if (ret == 0 && space > 0 && + (indx >= recs || cp->recno == RECNO_OOB) && + cp->recno != meta->cur_recno && + !QAM_AFTER_CURRENT(meta, cp->recno)) + goto next_pg; /* - * There is no need to keep the record locked if we are - * not in a transaction. + * Correct recno in two cases: + * 1) If we just wrapped fetch must start at record 1 not a FIRST. + * 2) We ran out of space exactly at the end of a page. */ - if (t_ret == 0) - t_ret = __TLPUT(dbc, cp->lock); + if (cp->recno == RECNO_OOB || (space == 0 && indx == recs)) + cp->recno--; + + if (is_key == 1) + *offp = RECNO_OOB; + else + *offp = -1; - return (ret ? ret : t_ret); +done: + /* release the meta page */ + t_ret = __memp_fput(mpf, meta, 0); + + if (!ret) + ret = t_ret; + + t_ret = __LPUT(dbc, metalock); + + return (ret); } /* @@ -1241,15 +1470,12 @@ __qam_c_close(dbc, root_pgno, rmroot) cp = (QUEUE_CURSOR *)dbc->internal; /* Discard any locks not acquired inside of a transaction. */ - if (cp->lock.off != LOCK_INVALID) { - (void)__TLPUT(dbc, cp->lock); - cp->lock.off = LOCK_INVALID; - } + (void)__TLPUT(dbc, cp->lock); + LOCK_INIT(cp->lock); cp->page = NULL; cp->pgno = PGNO_INVALID; cp->indx = 0; - cp->lock.off = LOCK_INVALID; cp->lock_mode = DB_LOCK_NG; cp->recno = RECNO_OOB; cp->flags = 0; @@ -1277,7 +1503,7 @@ __qam_c_dup(orig_dbc, new_dbc) /* reget the long term lock if we are not in a xact */ if (orig_dbc->txn != NULL || - !STD_LOCKING(orig_dbc) || orig->lock.off == LOCK_INVALID) + !STD_LOCKING(orig_dbc) || !LOCK_ISSET(orig->lock)) return (0); return (__db_lget(new_dbc, @@ -1310,11 +1536,13 @@ __qam_c_init(dbc) /* Initialize methods. */ dbc->c_close = __db_c_close; - dbc->c_count = __db_c_count; - dbc->c_del = __db_c_del; - dbc->c_dup = __db_c_dup; - dbc->c_get = __db_c_get; - dbc->c_put = __db_c_put; + dbc->c_count = __db_c_count_pp; + dbc->c_del = __db_c_del_pp; + dbc->c_dup = __db_c_dup_pp; + dbc->c_get = __db_c_get_pp; + dbc->c_pget = __db_c_pget_pp; + dbc->c_put = __db_c_put_pp; + dbc->c_am_bulk = __qam_bulk; dbc->c_am_close = __qam_c_close; dbc->c_am_del = __qam_c_del; dbc->c_am_destroy = __qam_c_destroy; @@ -1334,7 +1562,7 @@ __qam_c_destroy(dbc) DBC *dbc; { /* Discard the structures. */ - __os_free(dbc->internal, sizeof(QUEUE_CURSOR)); + __os_free(dbc->dbp->dbenv, dbc->internal); return (0); } @@ -1355,3 +1583,72 @@ __qam_getno(dbp, key, rep) } return (0); } + +/* + * __qam_truncate -- + * Truncate a queue database + * + * PUBLIC: int __qam_truncate __P((DBC *, u_int32_t *)); + */ +int +__qam_truncate(dbc, countp) + DBC *dbc; + u_int32_t *countp; +{ + DB *dbp; + DB_LOCK metalock; + DB_MPOOLFILE *mpf; + QMETA *meta; + QUEUE_CURSOR *cp; + db_pgno_t metapno; + int count, ret, t_ret; + + dbp = dbc->dbp; + + /* Walk the queue, counting rows. */ + count = 0; + while ((ret = __qam_c_get(dbc, NULL, NULL, DB_CONSUME, &metapno)) == 0) + count++; + + if (ret == DB_NOTFOUND) + ret = 0; + else + return (ret); + + cp = (QUEUE_CURSOR *)dbc->internal; + /* Remove the last extent file. */ + if (cp->pgno != 0 && + ((QUEUE *)dbp->q_internal)->page_ext != 0 && + (ret = __qam_fremove(dbp, cp->pgno)) != 0) + return (ret); + + /* Update the meta page. */ + metapno = ((QUEUE *)dbp->q_internal)->q_meta; + if ((ret = + __db_lget(dbc, 0, metapno, DB_LOCK_WRITE, 0, &metalock)) != 0) + return (ret); + + mpf = dbp->mpf; + if ((ret = __memp_fget(mpf, &metapno, 0, &meta)) != 0) { + /* We did not fetch it, we can release the lock. */ + (void)__LPUT(dbc, metalock); + return (ret); + } + if (DBC_LOGGING(dbc)) { + ret = __qam_mvptr_log(dbp, dbc->txn, &meta->dbmeta.lsn, 0, + QAM_SETCUR | QAM_SETFIRST | QAM_TRUNCATE, meta->first_recno, + 1, meta->cur_recno, 1, &meta->dbmeta.lsn, PGNO_BASE_MD); + } + if (ret == 0) + meta->first_recno = meta->cur_recno = 1; + + if ((t_ret = __memp_fput(mpf, + meta, ret == 0 ? DB_MPOOL_DIRTY: 0)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) + ret = t_ret; + + *countp = count; + + return (ret); +} diff --git a/db/qam/qam.src b/db/qam/qam.src index 507d7a652..34eada651 100644 --- a/db/qam/qam.src +++ b/db/qam/qam.src @@ -1,13 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2003 * Sleepycat Software. All rights reserved. * - * $Id: qam.src,v 11.15 2001/01/16 20:10:55 ubell Exp $ + * $Id: qam.src,v 11.31 2003/11/14 05:32:38 ubell Exp $ */ -PREFIX qam +PREFIX __qam +DBPRIVATE INCLUDE #include "db_config.h" INCLUDE @@ -15,59 +16,52 @@ INCLUDE #ifndef NO_SYSTEM_INCLUDES INCLUDE #include <sys/types.h> INCLUDE INCLUDE #include <ctype.h> -INCLUDE #include <errno.h> INCLUDE #include <string.h> INCLUDE #endif INCLUDE INCLUDE #include "db_int.h" -INCLUDE #include "db_page.h" -INCLUDE #include "db_dispatch.h" -INCLUDE #include "db_am.h" -INCLUDE #include "qam.h" -INCLUDE #include "txn.h" +INCLUDE #include "dbinc/crypto.h" +INCLUDE #include "dbinc/db_page.h" +INCLUDE #include "dbinc/db_dispatch.h" +INCLUDE #include "dbinc/db_am.h" +INCLUDE #include "dbinc/log.h" +INCLUDE #include "dbinc/qam.h" +INCLUDE #include "dbinc/txn.h" INCLUDE /* - * inc - * Used when we increment a record number. These do not actually - * tell you what record number you got, just that you incremented - * the record number. These operations are never undone. - */ -BEGIN inc 76 -ARG fileid int32_t ld -POINTER lsn DB_LSN * lu -END - -/* * incfirst * Used when we increment first_recno. */ -BEGIN incfirst 77 -ARG fileid int32_t ld +BEGIN incfirst 84 +DB fileid int32_t ld ARG recno db_recno_t lu +ARG meta_pgno db_pgno_t lu END /* * mvptr * Used when we change one or both of cur_recno and first_recno. */ -BEGIN mvptr 78 +BEGIN mvptr 85 ARG opcode u_int32_t lu -ARG fileid int32_t ld +DB fileid int32_t ld ARG old_first db_recno_t lu ARG new_first db_recno_t lu ARG old_cur db_recno_t lu ARG new_cur db_recno_t lu POINTER metalsn DB_LSN * lu +ARG meta_pgno db_pgno_t lu END + /* * del * Used when we delete a record. * recno is the record that is being deleted. */ BEGIN del 79 -ARG fileid int32_t ld +DB fileid int32_t ld POINTER lsn DB_LSN * lu ARG pgno db_pgno_t lu ARG indx u_int32_t lu @@ -81,7 +75,7 @@ END * data is the record itself. */ BEGIN add 80 -ARG fileid int32_t ld +DB fileid int32_t ld POINTER lsn DB_LSN * lu ARG pgno db_pgno_t lu ARG indx u_int32_t lu @@ -92,30 +86,12 @@ DBT olddata DBT s END /* - * delete - * Used when we remove a Queue extent file. - */ -BEGIN delete 81 -DBT name DBT s -POINTER lsn DB_LSN * lu -END - -/* - * rename - * Used when we rename a Queue extent file. - */ -BEGIN rename 82 -DBT name DBT s -DBT newname DBT s -END - -/* * delext * Used when we delete a record in extent based queue. * recno is the record that is being deleted. */ BEGIN delext 83 -ARG fileid int32_t ld +DB fileid int32_t ld POINTER lsn DB_LSN * lu ARG pgno db_pgno_t lu ARG indx u_int32_t lu diff --git a/db/qam/qam_auto.c b/db/qam/qam_auto.c index cfdba3195..a108e347a 100644 --- a/db/qam/qam_auto.c +++ b/db/qam/qam_auto.c @@ -5,189 +5,213 @@ #include <sys/types.h> #include <ctype.h> -#include <errno.h> #include <string.h> #endif #include "db_int.h" -#include "db_page.h" -#include "db_dispatch.h" -#include "db_am.h" -#include "qam.h" -#include "txn.h" - +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_dispatch.h" +#include "dbinc/db_am.h" +#include "dbinc/log.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +/* + * PUBLIC: int __qam_incfirst_log __P((DB *, DB_TXN *, DB_LSN *, + * PUBLIC: u_int32_t, db_recno_t, db_pgno_t)); + */ int -__qam_inc_log(dbenv, txnid, ret_lsnp, flags, - fileid, lsn) - DB_ENV *dbenv; +__qam_incfirst_log(dbp, txnid, ret_lsnp, flags, recno, meta_pgno) + DB *dbp; DB_TXN *txnid; DB_LSN *ret_lsnp; u_int32_t flags; - int32_t fileid; - DB_LSN * lsn; + db_recno_t recno; + db_pgno_t meta_pgno; { DBT logrec; + DB_ENV *dbenv; + DB_TXNLOGREC *lr; DB_LSN *lsnp, null_lsn; - u_int32_t rectype, txn_num; - int ret; + u_int32_t uinttmp, rectype, txn_num; + u_int npad; u_int8_t *bp; - - rectype = DB_qam_inc; - if (txnid != NULL && - TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) - return (ret); - txn_num = txnid == NULL ? 0 : txnid->txnid; + int is_durable, ret; + + dbenv = dbp->dbenv; + rectype = DB___qam_incfirst; + npad = 0; + + is_durable = 1; + if (LF_ISSET(DB_LOG_NOT_DURABLE) || + F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) || + F_ISSET(dbp, DB_AM_NOT_DURABLE)) { + if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL) + return (0); + is_durable = 0; + } if (txnid == NULL) { - ZERO_LSN(null_lsn); + txn_num = 0; + null_lsn.file = 0; + null_lsn.offset = 0; lsnp = &null_lsn; - } else + } else { + if (TAILQ_FIRST(&txnid->kids) != NULL && + (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) + return (ret); + txn_num = txnid->txnid; lsnp = &txnid->last_lsn; + } + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) - + sizeof(*lsn); - if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0) - return (ret); + + sizeof(u_int32_t) + + sizeof(u_int32_t) + + sizeof(u_int32_t); + if (CRYPTO_ON(dbenv)) { + npad = + ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); + logrec.size += npad; + } + + if (!is_durable && txnid != NULL) { + if ((ret = __os_malloc(dbenv, + logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) + return (ret); +#ifdef DIAGNOSTIC + goto do_malloc; +#else + logrec.data = &lr->data; +#endif + } else { +#ifdef DIAGNOSTIC +do_malloc: +#endif + if ((ret = + __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { +#ifdef DIAGNOSTIC + if (!is_durable && txnid != NULL) + (void)__os_free(dbenv, lr); +#endif + return (ret); + } + } + if (npad > 0) + memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - if (lsn != NULL) - memcpy(bp, lsn, sizeof(*lsn)); - else - memset(bp, 0, sizeof(*lsn)); - bp += sizeof(*lsn); - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size); - ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, logrec.size); - return (ret); -} - -int -__qam_inc_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __qam_inc_args *argp; - u_int32_t i; - u_int ch; - int ret; - i = 0; - ch = 0; - notused2 = DB_TXN_ABORT; - notused3 = NULL; - - if ((ret = __qam_inc_read(dbenv, dbtp->data, &argp)) != 0) + DB_ASSERT(dbp->log_filename != NULL); + if (dbp->log_filename->id == DB_LOGFILEID_INVALID && + (ret = __dbreg_lazy_id(dbp)) != 0) return (ret); - printf("[%lu][%lu]qam_inc: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, - (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); - printf("\tfileid: %ld\n", (long)argp->fileid); - printf("\tlsn: [%lu][%lu]\n", - (u_long)argp->lsn.file, (u_long)argp->lsn.offset); - printf("\n"); - __os_free(argp, 0); - return (0); -} -int -__qam_inc_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __qam_inc_args **argpp; -{ - __qam_inc_args *argp; - u_int8_t *bp; - int ret; + uinttmp = (u_int32_t)dbp->log_filename->id; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + uinttmp = (u_int32_t)recno; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + uinttmp = (u_int32_t)meta_pgno; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + +#ifdef DIAGNOSTIC + if (!is_durable && txnid != NULL) { + /* + * We set the debug bit if we are going + * to log non-durable transactions so + * they will be ignored by recovery. + */ + memcpy(lr->data, logrec.data, logrec.size); + rectype |= DB_debug_FLAG; + memcpy(logrec.data, &rectype, sizeof(rectype)); + } +#endif - ret = __os_malloc(dbenv, sizeof(__qam_inc_args) + - sizeof(DB_TXN), NULL, &argp); + if (!is_durable && txnid != NULL) { + ret = 0; + STAILQ_INSERT_HEAD(&txnid->logs, lr, links); +#ifdef DIAGNOSTIC + goto do_put; +#endif + } else{ +#ifdef DIAGNOSTIC +do_put: +#endif + ret = __log_put(dbenv, + ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); + if (ret == 0 && txnid != NULL) + txnid->last_lsn = *ret_lsnp; + } + + if (!is_durable) + LSN_NOT_LOGGED(*ret_lsnp); +#ifdef LOG_DIAGNOSTIC if (ret != 0) - return (ret); - argp->txnid = (DB_TXN *)&argp[1]; - bp = recbuf; - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); - memcpy(&argp->lsn, bp, sizeof(argp->lsn)); - bp += sizeof(argp->lsn); - *argpp = argp; - return (0); + (void)__qam_incfirst_print(dbenv, + (DBT *)&logrec, ret_lsnp, NULL, NULL); +#endif +#ifndef DIAGNOSTIC + if (is_durable || txnid == NULL) +#endif + __os_free(dbenv, logrec.data); + + return (ret); } +#ifdef HAVE_REPLICATION +/* + * PUBLIC: int __qam_incfirst_getpgnos __P((DB_ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ int -__qam_incfirst_log(dbenv, txnid, ret_lsnp, flags, - fileid, recno) +__qam_incfirst_getpgnos(dbenv, rec, lsnp, notused1, summary) DB_ENV *dbenv; - DB_TXN *txnid; - DB_LSN *ret_lsnp; - u_int32_t flags; - int32_t fileid; - db_recno_t recno; + DBT *rec; + DB_LSN *lsnp; + db_recops notused1; + void *summary; { - DBT logrec; - DB_LSN *lsnp, null_lsn; - u_int32_t rectype, txn_num; + TXN_RECS *t; int ret; - u_int8_t *bp; + COMPQUIET(rec, NULL); + COMPQUIET(notused1, DB_TXN_ABORT); - rectype = DB_qam_incfirst; - if (txnid != NULL && - TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) - return (ret); - txn_num = txnid == NULL ? 0 : txnid->txnid; - if (txnid == NULL) { - ZERO_LSN(null_lsn); - lsnp = &null_lsn; - } else - lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) - + sizeof(recno); - if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0) + t = (TXN_RECS *)summary; + + if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0) return (ret); - bp = logrec.data; - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - memcpy(bp, &recno, sizeof(recno)); - bp += sizeof(recno); - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size); - ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, logrec.size); - return (ret); + t->array[t->npages].flags = LSN_PAGE_NOLOCK; + t->array[t->npages].lsn = *lsnp; + t->array[t->npages].fid = DB_LOGFILEID_INVALID; + memset(&t->array[t->npages].pgdesc, 0, + sizeof(t->array[t->npages].pgdesc)); + + t->npages++; + + return (0); } +#endif /* HAVE_REPLICATION */ +/* + * PUBLIC: int __qam_incfirst_print __P((DB_ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ int __qam_incfirst_print(dbenv, dbtp, lsnp, notused2, notused3) DB_ENV *dbenv; @@ -197,31 +221,35 @@ __qam_incfirst_print(dbenv, dbtp, lsnp, notused2, notused3) void *notused3; { __qam_incfirst_args *argp; - u_int32_t i; - u_int ch; int ret; - i = 0; - ch = 0; notused2 = DB_TXN_ABORT; notused3 = NULL; if ((ret = __qam_incfirst_read(dbenv, dbtp->data, &argp)) != 0) return (ret); - printf("[%lu][%lu]qam_incfirst: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (void)printf( + "[%lu][%lu]__qam_incfirst%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", (u_long)lsnp->file, (u_long)lsnp->offset, + (argp->type & DB_debug_FLAG) ? "_debug" : "", (u_long)argp->type, (u_long)argp->txnid->txnid, (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - printf("\tfileid: %ld\n", (long)argp->fileid); - printf("\trecno: %lu\n", (u_long)argp->recno); - printf("\n"); - __os_free(argp, 0); + (void)printf("\tfileid: %ld\n", (long)argp->fileid); + (void)printf("\trecno: %lu\n", (u_long)argp->recno); + (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno); + (void)printf("\n"); + __os_free(dbenv, argp); + return (0); } +/* + * PUBLIC: int __qam_incfirst_read __P((DB_ENV *, void *, + * PUBLIC: __qam_incfirst_args **)); + */ int __qam_incfirst_read(dbenv, recbuf, argpp) DB_ENV *dbenv; @@ -229,105 +257,271 @@ __qam_incfirst_read(dbenv, recbuf, argpp) __qam_incfirst_args **argpp; { __qam_incfirst_args *argp; + u_int32_t uinttmp; u_int8_t *bp; int ret; - ret = __os_malloc(dbenv, sizeof(__qam_incfirst_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) + if ((ret = __os_malloc(dbenv, + sizeof(__qam_incfirst_args) + sizeof(DB_TXN), &argp)) != 0) return (ret); argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; memcpy(&argp->type, bp, sizeof(argp->type)); bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); - memcpy(&argp->recno, bp, sizeof(argp->recno)); - bp += sizeof(argp->recno); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->fileid = (int32_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->recno = (db_recno_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->meta_pgno = (db_pgno_t)uinttmp; + bp += sizeof(uinttmp); + *argpp = argp; return (0); } +/* + * PUBLIC: int __qam_mvptr_log __P((DB *, DB_TXN *, DB_LSN *, + * PUBLIC: u_int32_t, u_int32_t, db_recno_t, db_recno_t, db_recno_t, + * PUBLIC: db_recno_t, DB_LSN *, db_pgno_t)); + */ int -__qam_mvptr_log(dbenv, txnid, ret_lsnp, flags, - opcode, fileid, old_first, new_first, old_cur, new_cur, - metalsn) - DB_ENV *dbenv; +__qam_mvptr_log(dbp, txnid, ret_lsnp, flags, + opcode, old_first, new_first, old_cur, new_cur, + metalsn, meta_pgno) + DB *dbp; DB_TXN *txnid; DB_LSN *ret_lsnp; u_int32_t flags; u_int32_t opcode; - int32_t fileid; db_recno_t old_first; db_recno_t new_first; db_recno_t old_cur; db_recno_t new_cur; DB_LSN * metalsn; + db_pgno_t meta_pgno; { DBT logrec; + DB_ENV *dbenv; + DB_TXNLOGREC *lr; DB_LSN *lsnp, null_lsn; - u_int32_t rectype, txn_num; - int ret; + u_int32_t uinttmp, rectype, txn_num; + u_int npad; u_int8_t *bp; - - rectype = DB_qam_mvptr; - if (txnid != NULL && - TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) - return (ret); - txn_num = txnid == NULL ? 0 : txnid->txnid; + int is_durable, ret; + + dbenv = dbp->dbenv; + rectype = DB___qam_mvptr; + npad = 0; + + is_durable = 1; + if (LF_ISSET(DB_LOG_NOT_DURABLE) || + F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) || + F_ISSET(dbp, DB_AM_NOT_DURABLE)) { + if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL) + return (0); + is_durable = 0; + } if (txnid == NULL) { - ZERO_LSN(null_lsn); + txn_num = 0; + null_lsn.file = 0; + null_lsn.offset = 0; lsnp = &null_lsn; - } else + } else { + if (TAILQ_FIRST(&txnid->kids) != NULL && + (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) + return (ret); + txn_num = txnid->txnid; lsnp = &txnid->last_lsn; + } + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(opcode) - + sizeof(fileid) - + sizeof(old_first) - + sizeof(new_first) - + sizeof(old_cur) - + sizeof(new_cur) - + sizeof(*metalsn); - if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0) - return (ret); + + sizeof(u_int32_t) + + sizeof(u_int32_t) + + sizeof(u_int32_t) + + sizeof(u_int32_t) + + sizeof(u_int32_t) + + sizeof(u_int32_t) + + sizeof(*metalsn) + + sizeof(u_int32_t); + if (CRYPTO_ON(dbenv)) { + npad = + ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); + logrec.size += npad; + } + + if (!is_durable && txnid != NULL) { + if ((ret = __os_malloc(dbenv, + logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) + return (ret); +#ifdef DIAGNOSTIC + goto do_malloc; +#else + logrec.data = &lr->data; +#endif + } else { +#ifdef DIAGNOSTIC +do_malloc: +#endif + if ((ret = + __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { +#ifdef DIAGNOSTIC + if (!is_durable && txnid != NULL) + (void)__os_free(dbenv, lr); +#endif + return (ret); + } + } + if (npad > 0) + memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); - memcpy(bp, &opcode, sizeof(opcode)); - bp += sizeof(opcode); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - memcpy(bp, &old_first, sizeof(old_first)); - bp += sizeof(old_first); - memcpy(bp, &new_first, sizeof(new_first)); - bp += sizeof(new_first); - memcpy(bp, &old_cur, sizeof(old_cur)); - bp += sizeof(old_cur); - memcpy(bp, &new_cur, sizeof(new_cur)); - bp += sizeof(new_cur); + + uinttmp = (u_int32_t)opcode; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + DB_ASSERT(dbp->log_filename != NULL); + if (dbp->log_filename->id == DB_LOGFILEID_INVALID && + (ret = __dbreg_lazy_id(dbp)) != 0) + return (ret); + + uinttmp = (u_int32_t)dbp->log_filename->id; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + uinttmp = (u_int32_t)old_first; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + uinttmp = (u_int32_t)new_first; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + uinttmp = (u_int32_t)old_cur; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + uinttmp = (u_int32_t)new_cur; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + if (metalsn != NULL) memcpy(bp, metalsn, sizeof(*metalsn)); else memset(bp, 0, sizeof(*metalsn)); bp += sizeof(*metalsn); - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size); - ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, logrec.size); + + uinttmp = (u_int32_t)meta_pgno; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + +#ifdef DIAGNOSTIC + if (!is_durable && txnid != NULL) { + /* + * We set the debug bit if we are going + * to log non-durable transactions so + * they will be ignored by recovery. + */ + memcpy(lr->data, logrec.data, logrec.size); + rectype |= DB_debug_FLAG; + memcpy(logrec.data, &rectype, sizeof(rectype)); + } +#endif + + if (!is_durable && txnid != NULL) { + ret = 0; + STAILQ_INSERT_HEAD(&txnid->logs, lr, links); +#ifdef DIAGNOSTIC + goto do_put; +#endif + } else{ +#ifdef DIAGNOSTIC +do_put: +#endif + ret = __log_put(dbenv, + ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); + if (ret == 0 && txnid != NULL) + txnid->last_lsn = *ret_lsnp; + } + + if (!is_durable) + LSN_NOT_LOGGED(*ret_lsnp); +#ifdef LOG_DIAGNOSTIC + if (ret != 0) + (void)__qam_mvptr_print(dbenv, + (DBT *)&logrec, ret_lsnp, NULL, NULL); +#endif +#ifndef DIAGNOSTIC + if (is_durable || txnid == NULL) +#endif + __os_free(dbenv, logrec.data); + return (ret); } +#ifdef HAVE_REPLICATION +/* + * PUBLIC: int __qam_mvptr_getpgnos __P((DB_ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_mvptr_getpgnos(dbenv, rec, lsnp, notused1, summary) + DB_ENV *dbenv; + DBT *rec; + DB_LSN *lsnp; + db_recops notused1; + void *summary; +{ + TXN_RECS *t; + int ret; + COMPQUIET(rec, NULL); + COMPQUIET(notused1, DB_TXN_ABORT); + + t = (TXN_RECS *)summary; + + if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0) + return (ret); + + t->array[t->npages].flags = LSN_PAGE_NOLOCK; + t->array[t->npages].lsn = *lsnp; + t->array[t->npages].fid = DB_LOGFILEID_INVALID; + memset(&t->array[t->npages].pgdesc, 0, + sizeof(t->array[t->npages].pgdesc)); + + t->npages++; + + return (0); +} +#endif /* HAVE_REPLICATION */ + +/* + * PUBLIC: int __qam_mvptr_print __P((DB_ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ int __qam_mvptr_print(dbenv, dbtp, lsnp, notused2, notused3) DB_ENV *dbenv; @@ -337,37 +531,40 @@ __qam_mvptr_print(dbenv, dbtp, lsnp, notused2, notused3) void *notused3; { __qam_mvptr_args *argp; - u_int32_t i; - u_int ch; int ret; - i = 0; - ch = 0; notused2 = DB_TXN_ABORT; notused3 = NULL; if ((ret = __qam_mvptr_read(dbenv, dbtp->data, &argp)) != 0) return (ret); - printf("[%lu][%lu]qam_mvptr: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (void)printf( + "[%lu][%lu]__qam_mvptr%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", (u_long)lsnp->file, (u_long)lsnp->offset, + (argp->type & DB_debug_FLAG) ? "_debug" : "", (u_long)argp->type, (u_long)argp->txnid->txnid, (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - printf("\topcode: %lu\n", (u_long)argp->opcode); - printf("\tfileid: %ld\n", (long)argp->fileid); - printf("\told_first: %lu\n", (u_long)argp->old_first); - printf("\tnew_first: %lu\n", (u_long)argp->new_first); - printf("\told_cur: %lu\n", (u_long)argp->old_cur); - printf("\tnew_cur: %lu\n", (u_long)argp->new_cur); - printf("\tmetalsn: [%lu][%lu]\n", + (void)printf("\topcode: %lu\n", (u_long)argp->opcode); + (void)printf("\tfileid: %ld\n", (long)argp->fileid); + (void)printf("\told_first: %lu\n", (u_long)argp->old_first); + (void)printf("\tnew_first: %lu\n", (u_long)argp->new_first); + (void)printf("\told_cur: %lu\n", (u_long)argp->old_cur); + (void)printf("\tnew_cur: %lu\n", (u_long)argp->new_cur); + (void)printf("\tmetalsn: [%lu][%lu]\n", (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset); - printf("\n"); - __os_free(argp, 0); + (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno); + (void)printf("\n"); + __os_free(dbenv, argp); + return (0); } +/* + * PUBLIC: int __qam_mvptr_read __P((DB_ENV *, void *, __qam_mvptr_args **)); + */ int __qam_mvptr_read(dbenv, recbuf, argpp) DB_ENV *dbenv; @@ -375,106 +572,269 @@ __qam_mvptr_read(dbenv, recbuf, argpp) __qam_mvptr_args **argpp; { __qam_mvptr_args *argp; + u_int32_t uinttmp; u_int8_t *bp; int ret; - ret = __os_malloc(dbenv, sizeof(__qam_mvptr_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) + if ((ret = __os_malloc(dbenv, + sizeof(__qam_mvptr_args) + sizeof(DB_TXN), &argp)) != 0) return (ret); argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; memcpy(&argp->type, bp, sizeof(argp->type)); bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); - memcpy(&argp->opcode, bp, sizeof(argp->opcode)); - bp += sizeof(argp->opcode); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); - memcpy(&argp->old_first, bp, sizeof(argp->old_first)); - bp += sizeof(argp->old_first); - memcpy(&argp->new_first, bp, sizeof(argp->new_first)); - bp += sizeof(argp->new_first); - memcpy(&argp->old_cur, bp, sizeof(argp->old_cur)); - bp += sizeof(argp->old_cur); - memcpy(&argp->new_cur, bp, sizeof(argp->new_cur)); - bp += sizeof(argp->new_cur); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->opcode = (u_int32_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->fileid = (int32_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->old_first = (db_recno_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->new_first = (db_recno_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->old_cur = (db_recno_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->new_cur = (db_recno_t)uinttmp; + bp += sizeof(uinttmp); + memcpy(&argp->metalsn, bp, sizeof(argp->metalsn)); bp += sizeof(argp->metalsn); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->meta_pgno = (db_pgno_t)uinttmp; + bp += sizeof(uinttmp); + *argpp = argp; return (0); } +/* + * PUBLIC: int __qam_del_log __P((DB *, DB_TXN *, DB_LSN *, + * PUBLIC: u_int32_t, DB_LSN *, db_pgno_t, u_int32_t, db_recno_t)); + */ int -__qam_del_log(dbenv, txnid, ret_lsnp, flags, - fileid, lsn, pgno, indx, recno) - DB_ENV *dbenv; +__qam_del_log(dbp, txnid, ret_lsnp, flags, lsn, pgno, indx, recno) + DB *dbp; DB_TXN *txnid; DB_LSN *ret_lsnp; u_int32_t flags; - int32_t fileid; DB_LSN * lsn; db_pgno_t pgno; u_int32_t indx; db_recno_t recno; { DBT logrec; + DB_ENV *dbenv; + DB_TXNLOGREC *lr; DB_LSN *lsnp, null_lsn; - u_int32_t rectype, txn_num; - int ret; + u_int32_t uinttmp, rectype, txn_num; + u_int npad; u_int8_t *bp; - - rectype = DB_qam_del; - if (txnid != NULL && - TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) - return (ret); - txn_num = txnid == NULL ? 0 : txnid->txnid; + int is_durable, ret; + + dbenv = dbp->dbenv; + rectype = DB___qam_del; + npad = 0; + + is_durable = 1; + if (LF_ISSET(DB_LOG_NOT_DURABLE) || + F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) || + F_ISSET(dbp, DB_AM_NOT_DURABLE)) { + if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL) + return (0); + is_durable = 0; + } if (txnid == NULL) { - ZERO_LSN(null_lsn); + txn_num = 0; + null_lsn.file = 0; + null_lsn.offset = 0; lsnp = &null_lsn; - } else + } else { + if (TAILQ_FIRST(&txnid->kids) != NULL && + (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) + return (ret); + txn_num = txnid->txnid; lsnp = &txnid->last_lsn; + } + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) + + sizeof(u_int32_t) + sizeof(*lsn) - + sizeof(pgno) - + sizeof(indx) - + sizeof(recno); - if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0) - return (ret); + + sizeof(u_int32_t) + + sizeof(u_int32_t) + + sizeof(u_int32_t); + if (CRYPTO_ON(dbenv)) { + npad = + ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); + logrec.size += npad; + } + + if (!is_durable && txnid != NULL) { + if ((ret = __os_malloc(dbenv, + logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) + return (ret); +#ifdef DIAGNOSTIC + goto do_malloc; +#else + logrec.data = &lr->data; +#endif + } else { +#ifdef DIAGNOSTIC +do_malloc: +#endif + if ((ret = + __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { +#ifdef DIAGNOSTIC + if (!is_durable && txnid != NULL) + (void)__os_free(dbenv, lr); +#endif + return (ret); + } + } + if (npad > 0) + memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); + + DB_ASSERT(dbp->log_filename != NULL); + if (dbp->log_filename->id == DB_LOGFILEID_INVALID && + (ret = __dbreg_lazy_id(dbp)) != 0) + return (ret); + + uinttmp = (u_int32_t)dbp->log_filename->id; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + if (lsn != NULL) memcpy(bp, lsn, sizeof(*lsn)); else memset(bp, 0, sizeof(*lsn)); bp += sizeof(*lsn); - memcpy(bp, &pgno, sizeof(pgno)); - bp += sizeof(pgno); - memcpy(bp, &indx, sizeof(indx)); - bp += sizeof(indx); - memcpy(bp, &recno, sizeof(recno)); - bp += sizeof(recno); - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size); - ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, logrec.size); + + uinttmp = (u_int32_t)pgno; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + uinttmp = (u_int32_t)indx; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + uinttmp = (u_int32_t)recno; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + +#ifdef DIAGNOSTIC + if (!is_durable && txnid != NULL) { + /* + * We set the debug bit if we are going + * to log non-durable transactions so + * they will be ignored by recovery. + */ + memcpy(lr->data, logrec.data, logrec.size); + rectype |= DB_debug_FLAG; + memcpy(logrec.data, &rectype, sizeof(rectype)); + } +#endif + + if (!is_durable && txnid != NULL) { + ret = 0; + STAILQ_INSERT_HEAD(&txnid->logs, lr, links); +#ifdef DIAGNOSTIC + goto do_put; +#endif + } else{ +#ifdef DIAGNOSTIC +do_put: +#endif + ret = __log_put(dbenv, + ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); + if (ret == 0 && txnid != NULL) + txnid->last_lsn = *ret_lsnp; + } + + if (!is_durable) + LSN_NOT_LOGGED(*ret_lsnp); +#ifdef LOG_DIAGNOSTIC + if (ret != 0) + (void)__qam_del_print(dbenv, + (DBT *)&logrec, ret_lsnp, NULL, NULL); +#endif +#ifndef DIAGNOSTIC + if (is_durable || txnid == NULL) +#endif + __os_free(dbenv, logrec.data); + return (ret); } +#ifdef HAVE_REPLICATION +/* + * PUBLIC: int __qam_del_getpgnos __P((DB_ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_del_getpgnos(dbenv, rec, lsnp, notused1, summary) + DB_ENV *dbenv; + DBT *rec; + DB_LSN *lsnp; + db_recops notused1; + void *summary; +{ + TXN_RECS *t; + int ret; + COMPQUIET(rec, NULL); + COMPQUIET(notused1, DB_TXN_ABORT); + + t = (TXN_RECS *)summary; + + if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0) + return (ret); + + t->array[t->npages].flags = LSN_PAGE_NOLOCK; + t->array[t->npages].lsn = *lsnp; + t->array[t->npages].fid = DB_LOGFILEID_INVALID; + memset(&t->array[t->npages].pgdesc, 0, + sizeof(t->array[t->npages].pgdesc)); + + t->npages++; + + return (0); +} +#endif /* HAVE_REPLICATION */ + +/* + * PUBLIC: int __qam_del_print __P((DB_ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ int __qam_del_print(dbenv, dbtp, lsnp, notused2, notused3) DB_ENV *dbenv; @@ -484,35 +844,37 @@ __qam_del_print(dbenv, dbtp, lsnp, notused2, notused3) void *notused3; { __qam_del_args *argp; - u_int32_t i; - u_int ch; int ret; - i = 0; - ch = 0; notused2 = DB_TXN_ABORT; notused3 = NULL; if ((ret = __qam_del_read(dbenv, dbtp->data, &argp)) != 0) return (ret); - printf("[%lu][%lu]qam_del: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (void)printf( + "[%lu][%lu]__qam_del%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", (u_long)lsnp->file, (u_long)lsnp->offset, + (argp->type & DB_debug_FLAG) ? "_debug" : "", (u_long)argp->type, (u_long)argp->txnid->txnid, (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - printf("\tfileid: %ld\n", (long)argp->fileid); - printf("\tlsn: [%lu][%lu]\n", + (void)printf("\tfileid: %ld\n", (long)argp->fileid); + (void)printf("\tlsn: [%lu][%lu]\n", (u_long)argp->lsn.file, (u_long)argp->lsn.offset); - printf("\tpgno: %lu\n", (u_long)argp->pgno); - printf("\tindx: %lu\n", (u_long)argp->indx); - printf("\trecno: %lu\n", (u_long)argp->recno); - printf("\n"); - __os_free(argp, 0); + (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); + (void)printf("\tindx: %lu\n", (u_long)argp->indx); + (void)printf("\trecno: %lu\n", (u_long)argp->recno); + (void)printf("\n"); + __os_free(dbenv, argp); + return (0); } +/* + * PUBLIC: int __qam_del_read __P((DB_ENV *, void *, __qam_del_args **)); + */ int __qam_del_read(dbenv, recbuf, argpp) DB_ENV *dbenv; @@ -520,44 +882,60 @@ __qam_del_read(dbenv, recbuf, argpp) __qam_del_args **argpp; { __qam_del_args *argp; + u_int32_t uinttmp; u_int8_t *bp; int ret; - ret = __os_malloc(dbenv, sizeof(__qam_del_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) + if ((ret = __os_malloc(dbenv, + sizeof(__qam_del_args) + sizeof(DB_TXN), &argp)) != 0) return (ret); argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; memcpy(&argp->type, bp, sizeof(argp->type)); bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->fileid = (int32_t)uinttmp; + bp += sizeof(uinttmp); + memcpy(&argp->lsn, bp, sizeof(argp->lsn)); bp += sizeof(argp->lsn); - memcpy(&argp->pgno, bp, sizeof(argp->pgno)); - bp += sizeof(argp->pgno); - memcpy(&argp->indx, bp, sizeof(argp->indx)); - bp += sizeof(argp->indx); - memcpy(&argp->recno, bp, sizeof(argp->recno)); - bp += sizeof(argp->recno); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->pgno = (db_pgno_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->indx = (u_int32_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->recno = (db_recno_t)uinttmp; + bp += sizeof(uinttmp); + *argpp = argp; return (0); } +/* + * PUBLIC: int __qam_add_log __P((DB *, DB_TXN *, DB_LSN *, + * PUBLIC: u_int32_t, DB_LSN *, db_pgno_t, u_int32_t, db_recno_t, + * PUBLIC: const DBT *, u_int32_t, const DBT *)); + */ int -__qam_add_log(dbenv, txnid, ret_lsnp, flags, - fileid, lsn, pgno, indx, recno, data, - vflag, olddata) - DB_ENV *dbenv; +__qam_add_log(dbp, txnid, ret_lsnp, flags, lsn, pgno, indx, recno, data, + vflag, olddata) + DB *dbp; DB_TXN *txnid; DB_LSN *ret_lsnp; u_int32_t flags; - int32_t fileid; DB_LSN * lsn; db_pgno_t pgno; u_int32_t indx; @@ -567,55 +945,117 @@ __qam_add_log(dbenv, txnid, ret_lsnp, flags, const DBT *olddata; { DBT logrec; + DB_ENV *dbenv; + DB_TXNLOGREC *lr; DB_LSN *lsnp, null_lsn; - u_int32_t zero; - u_int32_t rectype, txn_num; - int ret; + u_int32_t zero, uinttmp, rectype, txn_num; + u_int npad; u_int8_t *bp; - - rectype = DB_qam_add; - if (txnid != NULL && - TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) - return (ret); - txn_num = txnid == NULL ? 0 : txnid->txnid; + int is_durable, ret; + + dbenv = dbp->dbenv; + rectype = DB___qam_add; + npad = 0; + + is_durable = 1; + if (LF_ISSET(DB_LOG_NOT_DURABLE) || + F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) || + F_ISSET(dbp, DB_AM_NOT_DURABLE)) { + if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL) + return (0); + is_durable = 0; + } if (txnid == NULL) { - ZERO_LSN(null_lsn); + txn_num = 0; + null_lsn.file = 0; + null_lsn.offset = 0; lsnp = &null_lsn; - } else + } else { + if (TAILQ_FIRST(&txnid->kids) != NULL && + (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) + return (ret); + txn_num = txnid->txnid; lsnp = &txnid->last_lsn; + } + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) + + sizeof(u_int32_t) + sizeof(*lsn) - + sizeof(pgno) - + sizeof(indx) - + sizeof(recno) + + sizeof(u_int32_t) + + sizeof(u_int32_t) + + sizeof(u_int32_t) + sizeof(u_int32_t) + (data == NULL ? 0 : data->size) - + sizeof(vflag) + + sizeof(u_int32_t) + sizeof(u_int32_t) + (olddata == NULL ? 0 : olddata->size); - if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0) - return (ret); + if (CRYPTO_ON(dbenv)) { + npad = + ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); + logrec.size += npad; + } + + if (!is_durable && txnid != NULL) { + if ((ret = __os_malloc(dbenv, + logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) + return (ret); +#ifdef DIAGNOSTIC + goto do_malloc; +#else + logrec.data = &lr->data; +#endif + } else { +#ifdef DIAGNOSTIC +do_malloc: +#endif + if ((ret = + __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { +#ifdef DIAGNOSTIC + if (!is_durable && txnid != NULL) + (void)__os_free(dbenv, lr); +#endif + return (ret); + } + } + if (npad > 0) + memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); + + DB_ASSERT(dbp->log_filename != NULL); + if (dbp->log_filename->id == DB_LOGFILEID_INVALID && + (ret = __dbreg_lazy_id(dbp)) != 0) + return (ret); + + uinttmp = (u_int32_t)dbp->log_filename->id; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + if (lsn != NULL) memcpy(bp, lsn, sizeof(*lsn)); else memset(bp, 0, sizeof(*lsn)); bp += sizeof(*lsn); - memcpy(bp, &pgno, sizeof(pgno)); - bp += sizeof(pgno); - memcpy(bp, &indx, sizeof(indx)); - bp += sizeof(indx); - memcpy(bp, &recno, sizeof(recno)); - bp += sizeof(recno); + + uinttmp = (u_int32_t)pgno; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + uinttmp = (u_int32_t)indx; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + + uinttmp = (u_int32_t)recno; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + if (data == NULL) { zero = 0; memcpy(bp, &zero, sizeof(u_int32_t)); @@ -626,8 +1066,11 @@ __qam_add_log(dbenv, txnid, ret_lsnp, flags, memcpy(bp, data->data, data->size); bp += data->size; } - memcpy(bp, &vflag, sizeof(vflag)); - bp += sizeof(vflag); + + uinttmp = (u_int32_t)vflag; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + if (olddata == NULL) { zero = 0; memcpy(bp, &zero, sizeof(u_int32_t)); @@ -638,14 +1081,92 @@ __qam_add_log(dbenv, txnid, ret_lsnp, flags, memcpy(bp, olddata->data, olddata->size); bp += olddata->size; } - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size); - ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, logrec.size); + + DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + +#ifdef DIAGNOSTIC + if (!is_durable && txnid != NULL) { + /* + * We set the debug bit if we are going + * to log non-durable transactions so + * they will be ignored by recovery. + */ + memcpy(lr->data, logrec.data, logrec.size); + rectype |= DB_debug_FLAG; + memcpy(logrec.data, &rectype, sizeof(rectype)); + } +#endif + + if (!is_durable && txnid != NULL) { + ret = 0; + STAILQ_INSERT_HEAD(&txnid->logs, lr, links); +#ifdef DIAGNOSTIC + goto do_put; +#endif + } else{ +#ifdef DIAGNOSTIC +do_put: +#endif + ret = __log_put(dbenv, + ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); + if (ret == 0 && txnid != NULL) + txnid->last_lsn = *ret_lsnp; + } + + if (!is_durable) + LSN_NOT_LOGGED(*ret_lsnp); +#ifdef LOG_DIAGNOSTIC + if (ret != 0) + (void)__qam_add_print(dbenv, + (DBT *)&logrec, ret_lsnp, NULL, NULL); +#endif +#ifndef DIAGNOSTIC + if (is_durable || txnid == NULL) +#endif + __os_free(dbenv, logrec.data); + return (ret); } +#ifdef HAVE_REPLICATION +/* + * PUBLIC: int __qam_add_getpgnos __P((DB_ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__qam_add_getpgnos(dbenv, rec, lsnp, notused1, summary) + DB_ENV *dbenv; + DBT *rec; + DB_LSN *lsnp; + db_recops notused1; + void *summary; +{ + TXN_RECS *t; + int ret; + COMPQUIET(rec, NULL); + COMPQUIET(notused1, DB_TXN_ABORT); + + t = (TXN_RECS *)summary; + + if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0) + return (ret); + + t->array[t->npages].flags = LSN_PAGE_NOLOCK; + t->array[t->npages].lsn = *lsnp; + t->array[t->npages].fid = DB_LOGFILEID_INVALID; + memset(&t->array[t->npages].pgdesc, 0, + sizeof(t->array[t->npages].pgdesc)); + + t->npages++; + + return (0); +} +#endif /* HAVE_REPLICATION */ + +/* + * PUBLIC: int __qam_add_print __P((DB_ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ int __qam_add_print(dbenv, dbtp, lsnp, notused2, notused3) DB_ENV *dbenv; @@ -656,53 +1177,51 @@ __qam_add_print(dbenv, dbtp, lsnp, notused2, notused3) { __qam_add_args *argp; u_int32_t i; - u_int ch; + int ch; int ret; - i = 0; - ch = 0; notused2 = DB_TXN_ABORT; notused3 = NULL; if ((ret = __qam_add_read(dbenv, dbtp->data, &argp)) != 0) return (ret); - printf("[%lu][%lu]qam_add: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (void)printf( + "[%lu][%lu]__qam_add%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", (u_long)lsnp->file, (u_long)lsnp->offset, + (argp->type & DB_debug_FLAG) ? "_debug" : "", (u_long)argp->type, (u_long)argp->txnid->txnid, (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - printf("\tfileid: %ld\n", (long)argp->fileid); - printf("\tlsn: [%lu][%lu]\n", + (void)printf("\tfileid: %ld\n", (long)argp->fileid); + (void)printf("\tlsn: [%lu][%lu]\n", (u_long)argp->lsn.file, (u_long)argp->lsn.offset); - printf("\tpgno: %lu\n", (u_long)argp->pgno); - printf("\tindx: %lu\n", (u_long)argp->indx); - printf("\trecno: %lu\n", (u_long)argp->recno); - printf("\tdata: "); + (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); + (void)printf("\tindx: %lu\n", (u_long)argp->indx); + (void)printf("\trecno: %lu\n", (u_long)argp->recno); + (void)printf("\tdata: "); for (i = 0; i < argp->data.size; i++) { ch = ((u_int8_t *)argp->data.data)[i]; - if (isprint(ch) || ch == 0xa) - putchar(ch); - else - printf("%#x ", ch); + printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); } - printf("\n"); - printf("\tvflag: %lu\n", (u_long)argp->vflag); - printf("\tolddata: "); + (void)printf("\n"); + (void)printf("\tvflag: %lu\n", (u_long)argp->vflag); + (void)printf("\tolddata: "); for (i = 0; i < argp->olddata.size; i++) { ch = ((u_int8_t *)argp->olddata.data)[i]; - if (isprint(ch) || ch == 0xa) - putchar(ch); - else - printf("%#x ", ch); + printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); } - printf("\n"); - printf("\n"); - __os_free(argp, 0); + (void)printf("\n"); + (void)printf("\n"); + __os_free(dbenv, argp); + return (0); } +/* + * PUBLIC: int __qam_add_read __P((DB_ENV *, void *, __qam_add_args **)); + */ int __qam_add_read(dbenv, recbuf, argpp) DB_ENV *dbenv; @@ -710,422 +1229,287 @@ __qam_add_read(dbenv, recbuf, argpp) __qam_add_args **argpp; { __qam_add_args *argp; + u_int32_t uinttmp; u_int8_t *bp; int ret; - ret = __os_malloc(dbenv, sizeof(__qam_add_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) + if ((ret = __os_malloc(dbenv, + sizeof(__qam_add_args) + sizeof(DB_TXN), &argp)) != 0) return (ret); argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; memcpy(&argp->type, bp, sizeof(argp->type)); bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->fileid = (int32_t)uinttmp; + bp += sizeof(uinttmp); + memcpy(&argp->lsn, bp, sizeof(argp->lsn)); bp += sizeof(argp->lsn); - memcpy(&argp->pgno, bp, sizeof(argp->pgno)); - bp += sizeof(argp->pgno); - memcpy(&argp->indx, bp, sizeof(argp->indx)); - bp += sizeof(argp->indx); - memcpy(&argp->recno, bp, sizeof(argp->recno)); - bp += sizeof(argp->recno); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->pgno = (db_pgno_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->indx = (u_int32_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->recno = (db_recno_t)uinttmp; + bp += sizeof(uinttmp); + memset(&argp->data, 0, sizeof(argp->data)); memcpy(&argp->data.size, bp, sizeof(u_int32_t)); bp += sizeof(u_int32_t); argp->data.data = bp; bp += argp->data.size; - memcpy(&argp->vflag, bp, sizeof(argp->vflag)); - bp += sizeof(argp->vflag); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->vflag = (u_int32_t)uinttmp; + bp += sizeof(uinttmp); + memset(&argp->olddata, 0, sizeof(argp->olddata)); memcpy(&argp->olddata.size, bp, sizeof(u_int32_t)); bp += sizeof(u_int32_t); argp->olddata.data = bp; bp += argp->olddata.size; + *argpp = argp; return (0); } +/* + * PUBLIC: int __qam_delext_log __P((DB *, DB_TXN *, DB_LSN *, + * PUBLIC: u_int32_t, DB_LSN *, db_pgno_t, u_int32_t, db_recno_t, + * PUBLIC: const DBT *)); + */ int -__qam_delete_log(dbenv, txnid, ret_lsnp, flags, - name, lsn) - DB_ENV *dbenv; +__qam_delext_log(dbp, txnid, ret_lsnp, flags, lsn, pgno, indx, recno, data) + DB *dbp; DB_TXN *txnid; DB_LSN *ret_lsnp; u_int32_t flags; - const DBT *name; DB_LSN * lsn; + db_pgno_t pgno; + u_int32_t indx; + db_recno_t recno; + const DBT *data; { DBT logrec; + DB_ENV *dbenv; + DB_TXNLOGREC *lr; DB_LSN *lsnp, null_lsn; - u_int32_t zero; - u_int32_t rectype, txn_num; - int ret; + u_int32_t zero, uinttmp, rectype, txn_num; + u_int npad; u_int8_t *bp; - - rectype = DB_qam_delete; - if (txnid != NULL && - TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) - return (ret); - txn_num = txnid == NULL ? 0 : txnid->txnid; + int is_durable, ret; + + dbenv = dbp->dbenv; + rectype = DB___qam_delext; + npad = 0; + + is_durable = 1; + if (LF_ISSET(DB_LOG_NOT_DURABLE) || + F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) || + F_ISSET(dbp, DB_AM_NOT_DURABLE)) { + if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && txnid == NULL) + return (0); + is_durable = 0; + } if (txnid == NULL) { - ZERO_LSN(null_lsn); + txn_num = 0; + null_lsn.file = 0; + null_lsn.offset = 0; lsnp = &null_lsn; - } else + } else { + if (TAILQ_FIRST(&txnid->kids) != NULL && + (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) + return (ret); + txn_num = txnid->txnid; lsnp = &txnid->last_lsn; + } + logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) + (name == NULL ? 0 : name->size) - + sizeof(*lsn); - if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0) - return (ret); + + sizeof(u_int32_t) + + sizeof(*lsn) + + sizeof(u_int32_t) + + sizeof(u_int32_t) + + sizeof(u_int32_t) + + sizeof(u_int32_t) + (data == NULL ? 0 : data->size); + if (CRYPTO_ON(dbenv)) { + npad = + ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); + logrec.size += npad; + } + + if (!is_durable && txnid != NULL) { + if ((ret = __os_malloc(dbenv, + logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) + return (ret); +#ifdef DIAGNOSTIC + goto do_malloc; +#else + logrec.data = &lr->data; +#endif + } else { +#ifdef DIAGNOSTIC +do_malloc: +#endif + if ((ret = + __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { +#ifdef DIAGNOSTIC + if (!is_durable && txnid != NULL) + (void)__os_free(dbenv, lr); +#endif + return (ret); + } + } + if (npad > 0) + memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); bp = logrec.data; + memcpy(bp, &rectype, sizeof(rectype)); bp += sizeof(rectype); + memcpy(bp, &txn_num, sizeof(txn_num)); bp += sizeof(txn_num); + memcpy(bp, lsnp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); - if (name == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &name->size, sizeof(name->size)); - bp += sizeof(name->size); - memcpy(bp, name->data, name->size); - bp += name->size; - } + + DB_ASSERT(dbp->log_filename != NULL); + if (dbp->log_filename->id == DB_LOGFILEID_INVALID && + (ret = __dbreg_lazy_id(dbp)) != 0) + return (ret); + + uinttmp = (u_int32_t)dbp->log_filename->id; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); + if (lsn != NULL) memcpy(bp, lsn, sizeof(*lsn)); else memset(bp, 0, sizeof(*lsn)); bp += sizeof(*lsn); - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size); - ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, logrec.size); - return (ret); -} - -int -__qam_delete_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __qam_delete_args *argp; - u_int32_t i; - u_int ch; - int ret; - - i = 0; - ch = 0; - notused2 = DB_TXN_ABORT; - notused3 = NULL; - - if ((ret = __qam_delete_read(dbenv, dbtp->data, &argp)) != 0) - return (ret); - printf("[%lu][%lu]qam_delete: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, - (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); - printf("\tname: "); - for (i = 0; i < argp->name.size; i++) { - ch = ((u_int8_t *)argp->name.data)[i]; - if (isprint(ch) || ch == 0xa) - putchar(ch); - else - printf("%#x ", ch); - } - printf("\n"); - printf("\tlsn: [%lu][%lu]\n", - (u_long)argp->lsn.file, (u_long)argp->lsn.offset); - printf("\n"); - __os_free(argp, 0); - return (0); -} - -int -__qam_delete_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __qam_delete_args **argpp; -{ - __qam_delete_args *argp; - u_int8_t *bp; - int ret; - ret = __os_malloc(dbenv, sizeof(__qam_delete_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) - return (ret); - argp->txnid = (DB_TXN *)&argp[1]; - bp = recbuf; - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memset(&argp->name, 0, sizeof(argp->name)); - memcpy(&argp->name.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->name.data = bp; - bp += argp->name.size; - memcpy(&argp->lsn, bp, sizeof(argp->lsn)); - bp += sizeof(argp->lsn); - *argpp = argp; - return (0); -} + uinttmp = (u_int32_t)pgno; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); -int -__qam_rename_log(dbenv, txnid, ret_lsnp, flags, - name, newname) - DB_ENV *dbenv; - DB_TXN *txnid; - DB_LSN *ret_lsnp; - u_int32_t flags; - const DBT *name; - const DBT *newname; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn; - u_int32_t zero; - u_int32_t rectype, txn_num; - int ret; - u_int8_t *bp; + uinttmp = (u_int32_t)indx; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); - rectype = DB_qam_rename; - if (txnid != NULL && - TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) - return (ret); - txn_num = txnid == NULL ? 0 : txnid->txnid; - if (txnid == NULL) { - ZERO_LSN(null_lsn); - lsnp = &null_lsn; - } else - lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) + (name == NULL ? 0 : name->size) - + sizeof(u_int32_t) + (newname == NULL ? 0 : newname->size); - if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0) - return (ret); + uinttmp = (u_int32_t)recno; + memcpy(bp, &uinttmp, sizeof(uinttmp)); + bp += sizeof(uinttmp); - bp = logrec.data; - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - if (name == NULL) { + if (data == NULL) { zero = 0; memcpy(bp, &zero, sizeof(u_int32_t)); bp += sizeof(u_int32_t); } else { - memcpy(bp, &name->size, sizeof(name->size)); - bp += sizeof(name->size); - memcpy(bp, name->data, name->size); - bp += name->size; + memcpy(bp, &data->size, sizeof(data->size)); + bp += sizeof(data->size); + memcpy(bp, data->data, data->size); + bp += data->size; } - if (newname == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &newname->size, sizeof(newname->size)); - bp += sizeof(newname->size); - memcpy(bp, newname->data, newname->size); - bp += newname->size; + + DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + +#ifdef DIAGNOSTIC + if (!is_durable && txnid != NULL) { + /* + * We set the debug bit if we are going + * to log non-durable transactions so + * they will be ignored by recovery. + */ + memcpy(lr->data, logrec.data, logrec.size); + rectype |= DB_debug_FLAG; + memcpy(logrec.data, &rectype, sizeof(rectype)); } - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size); - ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, logrec.size); +#endif + + if (!is_durable && txnid != NULL) { + ret = 0; + STAILQ_INSERT_HEAD(&txnid->logs, lr, links); +#ifdef DIAGNOSTIC + goto do_put; +#endif + } else{ +#ifdef DIAGNOSTIC +do_put: +#endif + ret = __log_put(dbenv, + ret_lsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); + if (ret == 0 && txnid != NULL) + txnid->last_lsn = *ret_lsnp; + } + + if (!is_durable) + LSN_NOT_LOGGED(*ret_lsnp); +#ifdef LOG_DIAGNOSTIC + if (ret != 0) + (void)__qam_delext_print(dbenv, + (DBT *)&logrec, ret_lsnp, NULL, NULL); +#endif +#ifndef DIAGNOSTIC + if (is_durable || txnid == NULL) +#endif + __os_free(dbenv, logrec.data); + return (ret); } +#ifdef HAVE_REPLICATION +/* + * PUBLIC: int __qam_delext_getpgnos __P((DB_ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ int -__qam_rename_print(dbenv, dbtp, lsnp, notused2, notused3) +__qam_delext_getpgnos(dbenv, rec, lsnp, notused1, summary) DB_ENV *dbenv; - DBT *dbtp; + DBT *rec; DB_LSN *lsnp; - db_recops notused2; - void *notused3; + db_recops notused1; + void *summary; { - __qam_rename_args *argp; - u_int32_t i; - u_int ch; + TXN_RECS *t; int ret; + COMPQUIET(rec, NULL); + COMPQUIET(notused1, DB_TXN_ABORT); - i = 0; - ch = 0; - notused2 = DB_TXN_ABORT; - notused3 = NULL; + t = (TXN_RECS *)summary; - if ((ret = __qam_rename_read(dbenv, dbtp->data, &argp)) != 0) + if ((ret = __rep_check_alloc(dbenv, t, 1)) != 0) return (ret); - printf("[%lu][%lu]qam_rename: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, - (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); - printf("\tname: "); - for (i = 0; i < argp->name.size; i++) { - ch = ((u_int8_t *)argp->name.data)[i]; - if (isprint(ch) || ch == 0xa) - putchar(ch); - else - printf("%#x ", ch); - } - printf("\n"); - printf("\tnewname: "); - for (i = 0; i < argp->newname.size; i++) { - ch = ((u_int8_t *)argp->newname.data)[i]; - if (isprint(ch) || ch == 0xa) - putchar(ch); - else - printf("%#x ", ch); - } - printf("\n"); - printf("\n"); - __os_free(argp, 0); - return (0); -} -int -__qam_rename_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __qam_rename_args **argpp; -{ - __qam_rename_args *argp; - u_int8_t *bp; - int ret; + t->array[t->npages].flags = LSN_PAGE_NOLOCK; + t->array[t->npages].lsn = *lsnp; + t->array[t->npages].fid = DB_LOGFILEID_INVALID; + memset(&t->array[t->npages].pgdesc, 0, + sizeof(t->array[t->npages].pgdesc)); - ret = __os_malloc(dbenv, sizeof(__qam_rename_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) - return (ret); - argp->txnid = (DB_TXN *)&argp[1]; - bp = recbuf; - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memset(&argp->name, 0, sizeof(argp->name)); - memcpy(&argp->name.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->name.data = bp; - bp += argp->name.size; - memset(&argp->newname, 0, sizeof(argp->newname)); - memcpy(&argp->newname.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->newname.data = bp; - bp += argp->newname.size; - *argpp = argp; - return (0); -} + t->npages++; -int -__qam_delext_log(dbenv, txnid, ret_lsnp, flags, - fileid, lsn, pgno, indx, recno, data) - DB_ENV *dbenv; - DB_TXN *txnid; - DB_LSN *ret_lsnp; - u_int32_t flags; - int32_t fileid; - DB_LSN * lsn; - db_pgno_t pgno; - u_int32_t indx; - db_recno_t recno; - const DBT *data; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn; - u_int32_t zero; - u_int32_t rectype, txn_num; - int ret; - u_int8_t *bp; - - rectype = DB_qam_delext; - if (txnid != NULL && - TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) - return (ret); - txn_num = txnid == NULL ? 0 : txnid->txnid; - if (txnid == NULL) { - ZERO_LSN(null_lsn); - lsnp = &null_lsn; - } else - lsnp = &txnid->last_lsn; - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(fileid) - + sizeof(*lsn) - + sizeof(pgno) - + sizeof(indx) - + sizeof(recno) - + sizeof(u_int32_t) + (data == NULL ? 0 : data->size); - if ((ret = __os_malloc(dbenv, logrec.size, NULL, &logrec.data)) != 0) - return (ret); - - bp = logrec.data; - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - memcpy(bp, &fileid, sizeof(fileid)); - bp += sizeof(fileid); - if (lsn != NULL) - memcpy(bp, lsn, sizeof(*lsn)); - else - memset(bp, 0, sizeof(*lsn)); - bp += sizeof(*lsn); - memcpy(bp, &pgno, sizeof(pgno)); - bp += sizeof(pgno); - memcpy(bp, &indx, sizeof(indx)); - bp += sizeof(indx); - memcpy(bp, &recno, sizeof(recno)); - bp += sizeof(recno); - if (data == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &data->size, sizeof(data->size)); - bp += sizeof(data->size); - memcpy(bp, data->data, data->size); - bp += data->size; - } - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) == logrec.size); - ret = log_put(dbenv, ret_lsnp, (DBT *)&logrec, flags); - if (txnid != NULL) - txnid->last_lsn = *ret_lsnp; - __os_free(logrec.data, logrec.size); - return (ret); + return (0); } +#endif /* HAVE_REPLICATION */ +/* + * PUBLIC: int __qam_delext_print __P((DB_ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ int __qam_delext_print(dbenv, dbtp, lsnp, notused2, notused3) DB_ENV *dbenv; @@ -1136,43 +1520,44 @@ __qam_delext_print(dbenv, dbtp, lsnp, notused2, notused3) { __qam_delext_args *argp; u_int32_t i; - u_int ch; + int ch; int ret; - i = 0; - ch = 0; notused2 = DB_TXN_ABORT; notused3 = NULL; if ((ret = __qam_delext_read(dbenv, dbtp->data, &argp)) != 0) return (ret); - printf("[%lu][%lu]qam_delext: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", + (void)printf( + "[%lu][%lu]__qam_delext%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", (u_long)lsnp->file, (u_long)lsnp->offset, + (argp->type & DB_debug_FLAG) ? "_debug" : "", (u_long)argp->type, (u_long)argp->txnid->txnid, (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - printf("\tfileid: %ld\n", (long)argp->fileid); - printf("\tlsn: [%lu][%lu]\n", + (void)printf("\tfileid: %ld\n", (long)argp->fileid); + (void)printf("\tlsn: [%lu][%lu]\n", (u_long)argp->lsn.file, (u_long)argp->lsn.offset); - printf("\tpgno: %lu\n", (u_long)argp->pgno); - printf("\tindx: %lu\n", (u_long)argp->indx); - printf("\trecno: %lu\n", (u_long)argp->recno); - printf("\tdata: "); + (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); + (void)printf("\tindx: %lu\n", (u_long)argp->indx); + (void)printf("\trecno: %lu\n", (u_long)argp->recno); + (void)printf("\tdata: "); for (i = 0; i < argp->data.size; i++) { ch = ((u_int8_t *)argp->data.data)[i]; - if (isprint(ch) || ch == 0xa) - putchar(ch); - else - printf("%#x ", ch); + printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); } - printf("\n"); - printf("\n"); - __os_free(argp, 0); + (void)printf("\n"); + (void)printf("\n"); + __os_free(dbenv, argp); + return (0); } +/* + * PUBLIC: int __qam_delext_read __P((DB_ENV *, void *, __qam_delext_args **)); + */ int __qam_delext_read(dbenv, recbuf, argpp) DB_ENV *dbenv; @@ -1180,103 +1565,142 @@ __qam_delext_read(dbenv, recbuf, argpp) __qam_delext_args **argpp; { __qam_delext_args *argp; + u_int32_t uinttmp; u_int8_t *bp; int ret; - ret = __os_malloc(dbenv, sizeof(__qam_delext_args) + - sizeof(DB_TXN), NULL, &argp); - if (ret != 0) + if ((ret = __os_malloc(dbenv, + sizeof(__qam_delext_args) + sizeof(DB_TXN), &argp)) != 0) return (ret); argp->txnid = (DB_TXN *)&argp[1]; + bp = recbuf; memcpy(&argp->type, bp, sizeof(argp->type)); bp += sizeof(argp->type); + memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); bp += sizeof(argp->txnid->txnid); + memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); - memcpy(&argp->fileid, bp, sizeof(argp->fileid)); - bp += sizeof(argp->fileid); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->fileid = (int32_t)uinttmp; + bp += sizeof(uinttmp); + memcpy(&argp->lsn, bp, sizeof(argp->lsn)); bp += sizeof(argp->lsn); - memcpy(&argp->pgno, bp, sizeof(argp->pgno)); - bp += sizeof(argp->pgno); - memcpy(&argp->indx, bp, sizeof(argp->indx)); - bp += sizeof(argp->indx); - memcpy(&argp->recno, bp, sizeof(argp->recno)); - bp += sizeof(argp->recno); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->pgno = (db_pgno_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->indx = (u_int32_t)uinttmp; + bp += sizeof(uinttmp); + + memcpy(&uinttmp, bp, sizeof(uinttmp)); + argp->recno = (db_recno_t)uinttmp; + bp += sizeof(uinttmp); + memset(&argp->data, 0, sizeof(argp->data)); memcpy(&argp->data.size, bp, sizeof(u_int32_t)); bp += sizeof(u_int32_t); argp->data.data = bp; bp += argp->data.size; + *argpp = argp; return (0); } +/* + * PUBLIC: int __qam_init_print __P((DB_ENV *, int (***)(DB_ENV *, + * PUBLIC: DBT *, DB_LSN *, db_recops, void *), size_t *)); + */ int -__qam_init_print(dbenv) +__qam_init_print(dbenv, dtabp, dtabsizep) DB_ENV *dbenv; + int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); + size_t *dtabsizep; { int ret; - if ((ret = __db_add_recovery(dbenv, - __qam_inc_print, DB_qam_inc)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_incfirst_print, DB_qam_incfirst)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_mvptr_print, DB_qam_mvptr)) != 0) + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_incfirst_print, DB___qam_incfirst)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_del_print, DB_qam_del)) != 0) + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_mvptr_print, DB___qam_mvptr)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_add_print, DB_qam_add)) != 0) + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_del_print, DB___qam_del)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_delete_print, DB_qam_delete)) != 0) + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_add_print, DB___qam_add)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_rename_print, DB_qam_rename)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_delext_print, DB_qam_delext)) != 0) + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_delext_print, DB___qam_delext)) != 0) return (ret); return (0); } +#ifdef HAVE_REPLICATION +/* + * PUBLIC: int __qam_init_getpgnos __P((DB_ENV *, int (***)(DB_ENV *, + * PUBLIC: DBT *, DB_LSN *, db_recops, void *), size_t *)); + */ int -__qam_init_recover(dbenv) +__qam_init_getpgnos(dbenv, dtabp, dtabsizep) DB_ENV *dbenv; + int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); + size_t *dtabsizep; { int ret; - if ((ret = __db_add_recovery(dbenv, - __qam_inc_recover, DB_qam_inc)) != 0) + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_incfirst_getpgnos, DB___qam_incfirst)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_incfirst_recover, DB_qam_incfirst)) != 0) + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_mvptr_getpgnos, DB___qam_mvptr)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_mvptr_recover, DB_qam_mvptr)) != 0) + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_del_getpgnos, DB___qam_del)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_del_recover, DB_qam_del)) != 0) + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_add_getpgnos, DB___qam_add)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_add_recover, DB_qam_add)) != 0) + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_delext_getpgnos, DB___qam_delext)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_delete_recover, DB_qam_delete)) != 0) + return (0); +} +#endif /* HAVE_REPLICATION */ + +/* + * PUBLIC: int __qam_init_recover __P((DB_ENV *, int (***)(DB_ENV *, + * PUBLIC: DBT *, DB_LSN *, db_recops, void *), size_t *)); + */ +int +__qam_init_recover(dbenv, dtabp, dtabsizep) + DB_ENV *dbenv; + int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); + size_t *dtabsizep; +{ + int ret; + + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_incfirst_recover, DB___qam_incfirst)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_mvptr_recover, DB___qam_mvptr)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_rename_recover, DB_qam_rename)) != 0) + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_del_recover, DB___qam_del)) != 0) return (ret); - if ((ret = __db_add_recovery(dbenv, - __qam_delext_recover, DB_qam_delext)) != 0) + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_add_recover, DB___qam_add)) != 0) + return (ret); + if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, + __qam_delext_recover, DB___qam_delext)) != 0) return (ret); return (0); } - diff --git a/db/qam/qam_conv.c b/db/qam/qam_conv.c index 2eb1c7227..ee5f23b50 100644 --- a/db/qam/qam_conv.c +++ b/db/qam/qam_conv.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: qam_conv.c,v 11.6 2000/11/16 23:40:57 ubell Exp $"; +static const char revid[] = "$Id: qam_conv.c,v 11.16 2003/01/08 05:37:19 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -16,10 +16,10 @@ static const char revid[] = "$Id: qam_conv.c,v 11.6 2000/11/16 23:40:57 ubell Ex #endif #include "db_int.h" -#include "db_page.h" -#include "qam.h" -#include "db_swap.h" -#include "db_am.h" +#include "dbinc/db_page.h" +#include "dbinc/db_swap.h" +#include "dbinc/db_am.h" +#include "dbinc/qam.h" /* * __qam_mswap -- @@ -43,6 +43,8 @@ __qam_mswap(pg) SWAP32(p); /* re_pad */ SWAP32(p); /* rec_page */ SWAP32(p); /* page_ext */ + p += 91 * sizeof(u_int32_t); /* unused */ + SWAP32(p); /* crypto_magic */ return (0); } @@ -68,7 +70,7 @@ __qam_pgin_out(dbenv, pg, pp, cookie) COMPQUIET(pg, 0); COMPQUIET(dbenv, NULL); pginfo = (DB_PGINFO *)cookie->data; - if (!pginfo->needswap) + if (!F_ISSET(pginfo, DB_AM_SWAP)) return (0); h = pp; diff --git a/db/qam/qam_files.c b/db/qam/qam_files.c index e53a3bf24..b6fd42dd0 100644 --- a/db/qam/qam_files.c +++ b/db/qam/qam_files.c @@ -1,67 +1,74 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: qam_files.c,v 1.16 2001/01/19 18:01:59 bostic Exp $"; +static const char revid[] = "$Id: qam_files.c,v 1.72 2003/10/03 21:21:54 ubell Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> +#include <stdlib.h> #include <string.h> +#include <ctype.h> #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "db_am.h" -#include "lock.h" -#include "btree.h" -#include "qam.h" -#include "mp.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/db_am.h" +#include "dbinc/log.h" +#include "dbinc/fop.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" + +#define QAM_EXNAME(Q, I, B, L) \ + snprintf((B), (L), \ + QUEUE_EXTENT, (Q)->dir, PATH_SEPARATOR[0], (Q)->name, (I)) /* - * __qam_fprobe -- calcluate and open extent + * __qam_fprobe -- calculate and open extent * - * Calculate which extent the page is in, open and create - * if necessary. + * Calculate which extent the page is in, open and create if necessary. * - * PUBLIC: int __qam_fprobe __P((DB *, db_pgno_t, void *, qam_probe_mode, int)); + * PUBLIC: int __qam_fprobe + * PUBLIC: __P((DB *, db_pgno_t, void *, qam_probe_mode, u_int32_t)); */ - int __qam_fprobe(dbp, pgno, addrp, mode, flags) DB *dbp; db_pgno_t pgno; void *addrp; qam_probe_mode mode; - int flags; + u_int32_t flags; { DB_ENV *dbenv; DB_MPOOLFILE *mpf; MPFARRAY *array; QUEUE *qp; - u_int32_t extid, maxext; - char buf[256]; - int numext, offset, oldext, openflags, ret; + u_int8_t fid[DB_FILE_ID_LEN]; + u_int32_t extid, maxext, openflags; + char buf[MAXPATHLEN]; + int ftype, numext, offset, oldext, ret; + dbenv = dbp->dbenv; qp = (QUEUE *)dbp->q_internal; + ret = 0; + if (qp->page_ext == 0) { mpf = dbp->mpf; - if (mode == QAM_PROBE_GET) - return (memp_fget(mpf, &pgno, flags, addrp)); - return (memp_fput(mpf, addrp, flags)); + return (mode == QAM_PROBE_GET ? + __memp_fget(mpf, &pgno, flags, addrp) : + __memp_fput(mpf, addrp, flags)); } - dbenv = dbp->dbenv; mpf = NULL; - ret = 0; /* * Need to lock long enough to find the mpf or create the file. @@ -69,7 +76,7 @@ __qam_fprobe(dbp, pgno, addrp, mode, flags) * in that file. */ MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); - extid = (pgno - 1) / qp->page_ext; + extid = QAM_PAGE_EXTENT(dbp, pgno); /* Array1 will always be in use if array2 is in use. */ array = &qp->array1; @@ -92,36 +99,53 @@ __qam_fprobe(dbp, pgno, addrp, mode, flags) /* * Check to see if the requested extent is outside the range of - * extents in the array. This is true by defualt if there are + * extents in the array. This is true by default if there are * no extents here yet. */ if (offset < 0 || (unsigned) offset >= array->n_extent) { oldext = array->n_extent; - numext = array->hi_extent - array->low_extent + 1; - if (offset < 0 - && (unsigned) -offset + numext <= array->n_extent) { - /* If we can fit this one in, move the array up */ + numext = array->hi_extent - array->low_extent + 1; + if (offset < 0 && + (unsigned) -offset + numext <= array->n_extent) { + /* + * If we can fit this one into the existing array by + * shifting the existing entries then we do not have + * to allocate. + */ memmove(&array->mpfarray[-offset], array->mpfarray, numext * sizeof(array->mpfarray[0])); memset(array->mpfarray, 0, -offset - * sizeof(array->mpfarray[0])); + * sizeof(array->mpfarray[0])); offset = 0; } else if ((u_int32_t)offset == array->n_extent && mode != QAM_PROBE_MPF && array->mpfarray[0].pinref == 0) { - /* We can close the bottom extent. */ + /* + * If this is at the end of the array and the file at + * the begining has a zero pin count we can close + * the bottom extent and put this one at the end. + * TODO: If this process is "slow" then it might be + * appending but miss one or more extents. + * We could check to see if all the extents + * are unpinned and close them in the else + * clause below. + */ mpf = array->mpfarray[0].mpf; - if (mpf != NULL && (ret = memp_fclose(mpf)) != 0) + if (mpf != NULL && (ret = __memp_fclose(mpf, 0)) != 0) goto err; memmove(&array->mpfarray[0], &array->mpfarray[1], - (array->n_extent - 1) * sizeof (array->mpfarray[0])); + (array->n_extent - 1) * sizeof(array->mpfarray[0])); array->low_extent++; array->hi_extent++; offset--; array->mpfarray[offset].mpf = NULL; array->mpfarray[offset].pinref = 0; } else { - /* See if we have wrapped around the queue. */ + /* + * See if we have wrapped around the queue. + * If it has then allocate the second array. + * Otherwise just expand the one we are using. + */ maxext = (u_int32_t) UINT32_T_MAX / (qp->page_ext * qp->rec_page); if ((u_int32_t) abs(offset) >= maxext/2) { @@ -143,51 +167,84 @@ __qam_fprobe(dbp, pgno, addrp, mode, flags) alloc: if ((ret = __os_realloc(dbenv, array->n_extent * sizeof(struct __qmpf), - NULL, &array->mpfarray)) != 0) + &array->mpfarray)) != 0) goto err; if (offset < 0) { + /* + * Move the array up and put the new one + * in the first slot. + */ offset = -offset; - memmove(&array->mpfarray[offset], array->mpfarray, + memmove(&array->mpfarray[offset], + array->mpfarray, numext * sizeof(array->mpfarray[0])); memset(array->mpfarray, 0, offset * sizeof(array->mpfarray[0])); memset(&array->mpfarray[numext + offset], 0, - (array->n_extent - (numext + offset)) - * sizeof(array->mpfarray[0])); + (array->n_extent - (numext + offset)) + * sizeof(array->mpfarray[0])); offset = 0; } else + /* Clear the new part of the array. */ memset(&array->mpfarray[oldext], 0, (array->n_extent - oldext) * sizeof(array->mpfarray[0])); } } + /* Update the low and hi range of saved extents. */ if (extid < array->low_extent) array->low_extent = extid; if (extid > array->hi_extent) array->hi_extent = extid; + + /* If the extent file is not yet open, open it. */ if (array->mpfarray[offset].mpf == NULL) { - snprintf(buf, - sizeof(buf), QUEUE_EXTENT, qp->dir, qp->name, extid); + QAM_EXNAME(qp, extid, buf, sizeof(buf)); + if ((ret = __memp_fcreate( + dbenv, &array->mpfarray[offset].mpf)) != 0) + goto err; + mpf = array->mpfarray[offset].mpf; + (void)__memp_set_lsn_offset(mpf, 0); + (void)__memp_set_pgcookie(mpf, &qp->pgcookie); + (void)__memp_get_ftype(dbp->mpf, &ftype); + (void)__memp_set_ftype(mpf, ftype); + + /* Set up the fileid for this extent. */ + __qam_exid(dbp, fid, extid); + (void)__memp_set_fileid(mpf, fid); openflags = DB_EXTENT; if (LF_ISSET(DB_MPOOL_CREATE)) openflags |= DB_CREATE; if (F_ISSET(dbp, DB_AM_RDONLY)) openflags |= DB_RDONLY; - qp->finfo.fileid = NULL; - if ((ret = __memp_fopen(dbenv->mp_handle, - NULL, buf, openflags, qp->mode, dbp->pgsize, - 1, &qp->finfo, &array->mpfarray[offset].mpf)) != 0) + if (F_ISSET(dbenv, DB_ENV_DIRECT_DB)) + openflags |= DB_DIRECT; + if ((ret = __memp_fopen( + mpf, NULL, buf, openflags, qp->mode, dbp->pgsize)) != 0) { + array->mpfarray[offset].mpf = NULL; + (void)__memp_fclose(mpf, 0); goto err; + } } + /* + * We have found the right file. Update its ref count + * before dropping the dbp mutex so it does not go away. + */ mpf = array->mpfarray[offset].mpf; if (mode == QAM_PROBE_GET) array->mpfarray[offset].pinref++; + + /* + * If we may create the page, then we are writing, + * the file may nolonger be empty after this operation + * so we clear the UNLINK flag. + */ if (LF_ISSET(DB_MPOOL_CREATE)) - __memp_clear_unlink(mpf); + (void)__memp_set_flags(mpf, DB_MPOOL_UNLINK, 0); err: MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); @@ -200,10 +257,12 @@ err: pgno--; pgno %= qp->page_ext; if (mode == QAM_PROBE_GET) - return (memp_fget(mpf, - &pgno, flags | DB_MPOOL_EXTENT, addrp)); - ret = memp_fput(mpf, addrp, flags); + return (__memp_fget(mpf, &pgno, flags, addrp)); + ret = __memp_fput(mpf, addrp, flags); MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); + /* Recalculate because we dropped the lock. */ + offset = extid - array->low_extent; + DB_ASSERT(array->mpfarray[offset].pinref > 0); array->mpfarray[offset].pinref--; MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); } @@ -218,7 +277,6 @@ err: * * PUBLIC: int __qam_fclose __P((DB *, db_pgno_t)); */ - int __qam_fclose(dbp, pgnoaddr) DB *dbp; @@ -237,7 +295,7 @@ __qam_fclose(dbp, pgnoaddr) MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); - extid = (pgnoaddr - 1) / qp->page_ext; + extid = QAM_PAGE_EXTENT(dbp, pgnoaddr); array = &qp->array1; if (array->low_extent > extid || array->hi_extent < extid) array = &qp->array2; @@ -251,12 +309,13 @@ __qam_fclose(dbp, pgnoaddr) mpf = array->mpfarray[offset].mpf; array->mpfarray[offset].mpf = NULL; - ret = memp_fclose(mpf); + ret = __memp_fclose(mpf, 0); done: MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); return (ret); } + /* * __qam_fremove -- remove an extent. * @@ -266,7 +325,6 @@ done: * * PUBLIC: int __qam_fremove __P((DB *, db_pgno_t)); */ - int __qam_fremove(dbp, pgnoaddr) DB *dbp; @@ -278,7 +336,7 @@ __qam_fremove(dbp, pgnoaddr) QUEUE *qp; u_int32_t extid; #if CONFIG_TEST - char buf[256], *real_name; + char buf[MAXPATHLEN], *real_name; #endif int offset, ret; @@ -288,7 +346,7 @@ __qam_fremove(dbp, pgnoaddr) MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); - extid = (pgnoaddr - 1) / qp->page_ext; + extid = QAM_PAGE_EXTENT(dbp, pgnoaddr); array = &qp->array1; if (array->low_extent > extid || array->hi_extent < extid) array = &qp->array2; @@ -299,23 +357,34 @@ __qam_fremove(dbp, pgnoaddr) #if CONFIG_TEST real_name = NULL; /* Find the real name of the file. */ - snprintf(buf, sizeof(buf), - QUEUE_EXTENT, qp->dir, qp->name, extid); + QAM_EXNAME(qp, extid, buf, sizeof(buf)); if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, buf, 0, NULL, &real_name)) != 0) + DB_APP_DATA, buf, 0, NULL, &real_name)) != 0) goto err; #endif + /* + * The log must be flushed before the file is deleted. We depend on + * the log record of the last delete to recreate the file if we crash. + */ + if (LOGGING_ON(dbenv) && (ret = __log_flush(dbenv, NULL)) != 0) + goto err; + mpf = array->mpfarray[offset].mpf; array->mpfarray[offset].mpf = NULL; - __memp_set_unlink(mpf); - if ((ret = memp_fclose(mpf)) != 0) + (void)__memp_set_flags(mpf, DB_MPOOL_UNLINK, 1); + if ((ret = __memp_fclose(mpf, 0)) != 0) goto err; + /* + * If the file is at the bottom of the array + * shift things down and adjust the end points. + */ if (offset == 0) { memmove(array->mpfarray, &array->mpfarray[1], (array->hi_extent - array->low_extent) * sizeof(array->mpfarray[0])); - array->mpfarray[array->hi_extent - array->low_extent].mpf = NULL; + array->mpfarray[ + array->hi_extent - array->low_extent].mpf = NULL; if (array->low_extent != array->hi_extent) array->low_extent++; } else { @@ -327,7 +396,7 @@ err: MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); #if CONFIG_TEST if (real_name != NULL) - __os_freestr(real_name); + __os_free(dbenv, real_name); #endif return (ret); } @@ -336,86 +405,31 @@ err: * __qam_sync -- * Flush the database cache. * - * PUBLIC: int __qam_sync __P((DB *, u_int32_t)); + * PUBLIC: int __qam_sync __P((DB *)); */ int -__qam_sync(dbp, flags) +__qam_sync(dbp) DB *dbp; - u_int32_t flags; { DB_ENV *dbenv; DB_MPOOLFILE *mpf; - MPFARRAY *array; - QUEUE *qp; - QUEUE_FILELIST *filelist; - struct __qmpf *mpfp; - u_int32_t i; - int done, ret; dbenv = dbp->dbenv; + mpf = dbp->mpf; - PANIC_CHECK(dbenv); - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync"); - - if ((ret = __db_syncchk(dbp, flags)) != 0) - return (ret); - - /* Read-only trees never need to be sync'd. */ - if (F_ISSET(dbp, DB_AM_RDONLY)) - return (0); - - /* If the tree was never backed by a database file, we're done. */ - if (F_ISSET(dbp, DB_AM_INMEM)) - return (0); - - /* Flush any dirty pages from the cache to the backing file. */ - if ((ret = memp_fsync(dbp->mpf)) != 0) - return (ret); - - qp = (QUEUE *)dbp->q_internal; - if (qp->page_ext == 0) - return (0); - - /* We do this for the side effect of opening all active extents. */ - if ((ret = __qam_gen_filelist(dbp, &filelist)) != 0) - return (ret); - - if (filelist == NULL) - return (0); - - __os_free(filelist, 0); - - done = 0; - qp = (QUEUE *)dbp->q_internal; - array = &qp->array1; - - MUTEX_THREAD_LOCK(dbenv, dbp->mutexp); -again: - mpfp = array->mpfarray; - for (i = array->low_extent; i <= array->hi_extent; i++, mpfp++) - if ((mpf = mpfp->mpf) != NULL) { - if ((ret = memp_fsync(mpf)) != 0) - goto err; - /* - * If we are the only ones with this file open - * then close it so it might be removed. - */ - if (mpfp->pinref == 0) { - mpfp->mpf = NULL; - if ((ret = memp_fclose(mpf)) != 0) - goto err; - } - } - - if (done == 0 && qp->array2.n_extent != 0) { - array = &qp->array2; - done = 1; - goto again; - } + /* + * We need to flush all extent files. There is no easy way to find + * all the extents for this queue which are currently open. For now + * just flush the whole cache. An alternative would be to have a + * call into the cache layer that would flush all of the queue extent + * files it has open (there's a flag when we open a queue extent file, + * so the cache layer can identify them). + */ -err: - MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp); - return (ret); + if (((QUEUE *)dbp->q_internal)->page_ext == 0) + return (__memp_fsync(mpf)); + else + return (__memp_sync(dbenv, NULL)); } /* @@ -431,16 +445,19 @@ __qam_gen_filelist(dbp, filelistp) QUEUE_FILELIST **filelistp; { DB_ENV *dbenv; + DB_MPOOLFILE *mpf; QUEUE *qp; QMETA *meta; - db_pgno_t i, last, start, stop; - db_recno_t current, first; + size_t extent_cnt; + db_recno_t i, current, first, stop, rec_extent; QUEUE_FILELIST *fp; int ret; dbenv = dbp->dbenv; + mpf = dbp->mpf; qp = (QUEUE *)dbp->q_internal; *filelistp = NULL; + if (qp->page_ext == 0) return (0); @@ -448,56 +465,333 @@ __qam_gen_filelist(dbp, filelistp) if (qp->name == NULL) return (0); - /* Find out the page number of the last page in the database. */ + /* Find out the first and last record numbers in the database. */ i = PGNO_BASE_MD; - if ((ret = memp_fget(dbp->mpf, &i, 0, &meta)) != 0) { - (void)dbp->close(dbp, 0); + if ((ret = __memp_fget(mpf, &i, 0, &meta)) != 0) return (ret); - } current = meta->cur_recno; first = meta->first_recno; - if ((ret = memp_fput(dbp->mpf, meta, 0)) != 0) { - (void)dbp->close(dbp, 0); + if ((ret = __memp_fput(mpf, meta, 0)) != 0) return (ret); - } - last = QAM_RECNO_PAGE(dbp, current); - start = QAM_RECNO_PAGE(dbp, first); - - /* Allocate the worst case plus 1 for null termination. */ - if (last >= start) - ret = last - start + 2; + /* + * Allocate the extent array. Calculate the worst case number of + * pages and convert that to a count of extents. The count of + * extents has 3 or 4 extra slots: + * roundoff at first (e.g., current record in extent); + * roundoff at current (e.g., first record in extent); + * NULL termination; and + * UINT32_T_MAX wraparound (the last extent can be small). + */ + rec_extent = qp->rec_page * qp->page_ext; + if (current >= first) + extent_cnt = (current - first) / rec_extent + 3; else - ret = last + (QAM_RECNO_PAGE(dbp, UINT32_T_MAX) - start) + 1; + extent_cnt = + (current + (UINT32_T_MAX - first)) / rec_extent + 4; if ((ret = __os_calloc(dbenv, - ret, sizeof(QUEUE_FILELIST), filelistp)) != 0) + extent_cnt, sizeof(QUEUE_FILELIST), filelistp)) != 0) return (ret); fp = *filelistp; - i = start; - if (last >= start) - stop = last; - else - stop = QAM_RECNO_PAGE(dbp, UINT32_T_MAX); + again: - for (; i <= last; i += qp->page_ext) { - if ((ret = __qam_fprobe(dbp, - i, &fp->mpf, QAM_PROBE_MPF, 0)) != 0) { + if (current >= first) + stop = current; + else + stop = UINT32_T_MAX; + + /* + * Make sure that first is at the same offset in the extent as stop. + * This guarantees that the stop will be reached in the loop below, + * even if it is the only record in its extent. This calculation is + * safe because first won't move out of its extent. + */ + first -= first % rec_extent; + first += stop % rec_extent; + + for (i = first; i >= first && i <= stop; i += rec_extent) { + if ((ret = __qam_fprobe(dbp, QAM_RECNO_PAGE(dbp, i), &fp->mpf, + QAM_PROBE_MPF, 0)) != 0) { if (ret == ENOENT) continue; return (ret); } - fp->id = (i - 1) / qp->page_ext; + fp->id = QAM_RECNO_EXTENT(dbp, i); fp++; + DB_ASSERT((size_t)(fp - *filelistp) < extent_cnt); } - if (last < start) { - i = 1; - stop = last; - start = 0; + if (current < first) { + first = 1; goto again; } return (0); } + +/* + * __qam_extent_names -- generate a list of extent files names. + * + * PUBLIC: int __qam_extent_names __P((DB_ENV *, char *, char ***)); + */ +int +__qam_extent_names(dbenv, name, namelistp) + DB_ENV *dbenv; + char *name; + char ***namelistp; +{ + DB *dbp; + QUEUE *qp; + QUEUE_FILELIST *filelist, *fp; + char buf[MAXPATHLEN], **cp, *freep; + int cnt, len, ret; + + *namelistp = NULL; + filelist = NULL; + if ((ret = db_create(&dbp, dbenv, 0)) != 0) + return (ret); + if ((ret = __db_open(dbp, + NULL, name, NULL, DB_QUEUE, DB_RDONLY, 0, PGNO_BASE_MD)) != 0) + return (ret); + qp = dbp->q_internal; + if (qp->page_ext == 0) + goto done; + + if ((ret = __qam_gen_filelist(dbp, &filelist)) != 0) + goto done; + + if (filelist == NULL) + goto done; + + cnt = 0; + for (fp = filelist; fp->mpf != NULL; fp++) + cnt++; + + /* QUEUE_EXTENT contains extra chars, but add 6 anyway for the int. */ + len = (u_int32_t)(cnt * (sizeof(**namelistp) + + strlen(QUEUE_EXTENT) + strlen(qp->dir) + strlen(qp->name) + 6)); + + if ((ret = + __os_malloc(dbp->dbenv, len, namelistp)) != 0) + goto done; + cp = *namelistp; + freep = (char *)(cp + cnt + 1); + for (fp = filelist; fp->mpf != NULL; fp++) { + QAM_EXNAME(qp, fp->id, buf, sizeof(buf)); + len = (u_int32_t)strlen(buf); + *cp++ = freep; + strcpy(freep, buf); + freep += len + 1; + } + *cp = NULL; + +done: + if (filelist != NULL) + __os_free(dbp->dbenv, filelist); + (void)__db_close(dbp, NULL, DB_NOSYNC); + + return (ret); +} + +/* + * __qam_exid -- + * Generate a fileid for an extent based on the fileid of the main + * file. Since we do not log schema creates/deletes explicitly, the log + * never captures the fileid of an extent file. In order that masters and + * replicas have the same fileids (so they can explicitly delete them), we + * use computed fileids for the extent files of Queue files. + * + * An extent file id retains the low order 12 bytes of the file id and + * overwrites the dev/inode fields, placing a 0 in the inode field, and + * the extent number in the dev field. + * + * PUBLIC: void __qam_exid __P((DB *, u_int8_t *, u_int32_t)); + */ +void +__qam_exid(dbp, fidp, exnum) + DB *dbp; + u_int8_t *fidp; + u_int32_t exnum; +{ + int i; + u_int8_t *p; + + /* Copy the fileid from the master. */ + memcpy(fidp, dbp->fileid, DB_FILE_ID_LEN); + + /* The first four bytes are the inode or the FileIndexLow; 0 it. */ + for (i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = 0; + + /* The next four bytes are the dev/FileIndexHigh; insert the exnum . */ + for (p = (u_int8_t *)&exnum, i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = *p++; +} + +/* + * __qam_nameop -- + * Remove or rename extent files associated with a particular file. + * This is to remove or rename (both in mpool and the file system) any + * extent files associated with the given dbp. + * This is either called from the QUEUE remove or rename methods or + * when undoing a transaction that created the database. + * + * PUBLIC: int __qam_nameop __P((DB *, DB_TXN *, const char *, qam_name_op)); + */ +int __qam_nameop(dbp, txn, newname, op) + DB *dbp; + DB_TXN *txn; + const char *newname; + qam_name_op op; +{ + DB_ENV *dbenv; + QUEUE *qp; + char buf[MAXPATHLEN], nbuf[MAXPATHLEN], sepsave; + char *endname, *endpath, *exname, *fullname, **names; + char *ndir, *namep, *new, *cp; + int cnt, exlen, fulllen, i, len, ret, t_ret; + u_int8_t fid[DB_FILE_ID_LEN]; + u_int32_t exid; + + ret = t_ret = 0; + dbenv = dbp->dbenv; + qp = (QUEUE *)dbp->q_internal; + namep = exname = fullname = NULL; + + /* If this isn't a queue with extents, we're done. */ + if (qp->page_ext == 0) + return (0); + + /* + * Generate the list of all queue extents for this file (from the + * file system) and then cycle through removing them and evicting + * from mpool. We have two modes of operation here. If we are + * undoing log operations, then do not write log records and try + * to keep going even if we encounter failures in nameop. If we + * are in mainline code, then return as soon as we have a problem. + * Memory allocation errors (__db_appname, __os_malloc) are always + * considered failure. + */ + + /* + * Set buf to : dir/__dbq.NAME.0 and fullname to HOME/dir/__dbq.NAME.0 + * or, in the case of an absolute path: /dir/__dbq.NAME.0 + */ + QAM_EXNAME(qp, 0, buf, sizeof(buf)); + if ((ret = + __db_appname(dbenv, DB_APP_DATA, buf, 0, NULL, &fullname)) != 0) + return (ret); + + /* We should always have a path separator here. */ + if ((endpath = __db_rpath(fullname)) == NULL) { + ret = EINVAL; + goto err; + } + sepsave = *endpath; + *endpath = '\0'; + + /* + * Get the list of all names in the directory and restore the + * path separator. + */ + if ((ret = __os_dirlist(dbenv, fullname, &names, &cnt)) != 0) + goto err; + *endpath = sepsave; + + /* If there aren't any names, don't allocate any space. */ + if (cnt == 0) + goto err; + + /* + * Now, make endpath reference the queue extent names upon which + * we can match. Then we set the end of the path to be the + * beginning of the extent number, and we can compare the bytes + * between endpath and endname (__dbq.NAME.). + */ + endpath++; + endname = strrchr(endpath, '.'); + if (endname == NULL) { + ret = EINVAL; + goto err; + } + ++endname; + *endname = '\0'; + len = strlen(endpath); + fulllen = strlen(fullname); + + /* Allocate space for a full extent name. */ + exlen = fulllen + 20; + if ((ret = __os_malloc(dbenv, exlen, &exname)) != 0) + goto err; + + ndir = new = NULL; + if (newname != NULL) { + if ((ret = __os_strdup(dbenv, newname, &namep)) != 0) + goto err; + ndir = namep; + if ((new = __db_rpath(namep)) != NULL) + *new++ = '\0'; + else { + new = namep; + ndir = PATH_DOT; + } + } + for (i = 0; i < cnt; i++) { + /* Check if this is a queue extent file. */ + if (strncmp(names[i], endpath, len) != 0) + continue; + /* Make sure we have all numbers. foo.db vs. foo.db.0. */ + for (cp = &names[i][len]; *cp != '\0'; cp++) + if (!isdigit(*cp)) + break; + if (*cp != '\0') + continue; + + /* + * We have a queue extent file. We need to generate its + * name and its fileid. + */ + + exid = atol(names[i] + len); + __qam_exid(dbp, fid, exid); + + switch (op) { + case QAM_NAME_DISCARD: + snprintf(exname, exlen, + "%s%s", fullname, names[i] + len); + if ((t_ret = __memp_nameop(dbenv, + fid, NULL, exname, NULL)) != 0 && ret == 0) + ret = t_ret; + break; + + case QAM_NAME_RENAME: + snprintf(nbuf, sizeof(nbuf), QUEUE_EXTENT, + ndir, PATH_SEPARATOR[0], new, exid); + QAM_EXNAME(qp, exid, buf, sizeof(buf)); + if ((ret = __fop_rename(dbenv, + txn, buf, nbuf, fid, DB_APP_DATA, + F_ISSET(dbp, DB_AM_NOT_DURABLE) ? + DB_LOG_NOT_DURABLE : 0)) != 0) + goto err; + break; + + case QAM_NAME_REMOVE: + QAM_EXNAME(qp, exid, buf, sizeof(buf)); + if ((ret = __fop_remove(dbenv, txn, fid, buf, + DB_APP_DATA, F_ISSET(dbp, DB_AM_NOT_DURABLE) ? + DB_LOG_NOT_DURABLE : 0)) != 0) + goto err; + break; + } + } + +err: if (fullname != NULL) + __os_free(dbenv, fullname); + if (exname != NULL) + __os_free(dbenv, exname); + if (namep != NULL) + __os_free(dbenv, namep); + return (ret); +} diff --git a/db/qam/qam_method.c b/db/qam/qam_method.c index 1c94f4b8d..b0270f767 100644 --- a/db/qam/qam_method.c +++ b/db/qam/qam_method.c @@ -1,34 +1,34 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: qam_method.c,v 11.17 2001/01/10 04:50:54 ubell Exp $"; +static const char revid[] = "$Id: qam_method.c,v 11.64 2003/10/01 20:03:43 ubell Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> + #include <string.h> #endif #include "db_int.h" -#include "db_page.h" -#include "db_int.h" -#include "db_shash.h" -#include "db_am.h" -#include "qam.h" -#include "db.h" -#include "mp.h" -#include "lock.h" -#include "log.h" - +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/db_am.h" +#include "dbinc/fop.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +static int __qam_get_extentsize __P((DB *, u_int32_t *)); static int __qam_set_extentsize __P((DB *, u_int32_t)); -static int __qam_remove_callback __P((DB *, void *)); struct __qam_cookie { DB_LSN lsn; @@ -52,6 +52,7 @@ __qam_db_create(dbp) if ((ret = __os_calloc(dbp->dbenv, 1, sizeof(QUEUE), &t)) != 0) return (ret); dbp->q_internal = t; + dbp->get_q_extentsize = __qam_get_extentsize; dbp->set_q_extentsize = __qam_set_extentsize; t->re_pad = ' '; @@ -63,11 +64,12 @@ __qam_db_create(dbp) * __qam_db_close -- * Queue specific discard of the DB structure. * - * PUBLIC: int __qam_db_close __P((DB *)); + * PUBLIC: int __qam_db_close __P((DB *, u_int32_t)); */ int -__qam_db_close(dbp) +__qam_db_close(dbp, flags) DB *dbp; + u_int32_t flags; { DB_MPOOLFILE *mpf; MPFARRAY *array; @@ -77,7 +79,8 @@ __qam_db_close(dbp) int ret, t_ret; ret = 0; - t = dbp->q_internal; + if ((t = dbp->q_internal) == NULL) + return (0); array = &t->array1; again: @@ -87,11 +90,12 @@ again: i <= array->hi_extent; i++, mpfp++) { mpf = mpfp->mpf; mpfp->mpf = NULL; - if (mpf != NULL && - (t_ret = memp_fclose(mpf)) != 0 && ret == 0) + if (mpf != NULL && (t_ret = __memp_fclose(mpf, + LF_ISSET(DB_AM_DISCARD) ? DB_MPOOL_DISCARD : 0)) + != 0 && ret == 0) ret = t_ret; } - __os_free(array->mpfarray, 0); + __os_free(dbp->dbenv, array->mpfarray); } if (t->array2.n_extent != 0) { array = &t->array2; @@ -99,23 +103,37 @@ again: goto again; } + if (LF_ISSET(DB_AM_DISCARD) && + (t_ret = __qam_nameop(dbp, NULL, + NULL, QAM_NAME_DISCARD)) != 0 && ret == 0) + ret = t_ret; + if (t->path != NULL) - __os_free(t->path, 0); - __os_free(t, sizeof(QUEUE)); + __os_free(dbp->dbenv, t->path); + __os_free(dbp->dbenv, t); dbp->q_internal = NULL; return (ret); } static int +__qam_get_extentsize(dbp, q_extentsizep) + DB *dbp; + u_int32_t *q_extentsizep; +{ + *q_extentsizep = ((QUEUE*)dbp->q_internal)->page_ext; + return (0); +} + +static int __qam_set_extentsize(dbp, extentsize) DB *dbp; u_int32_t extentsize; { - DB_ILLEGAL_AFTER_OPEN(dbp, "set_extentsize"); + DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_extentsize"); if (extentsize < 1) { - __db_err(dbp->dbenv, "Extent size must be at least 1."); + __db_err(dbp->dbenv, "Extent size must be at least 1"); return (EINVAL); } @@ -128,29 +146,35 @@ __qam_set_extentsize(dbp, extentsize) * __db_prqueue -- * Print out a queue * - * PUBLIC: int __db_prqueue __P((DB *, u_int32_t)); + * PUBLIC: int __db_prqueue __P((DB *, FILE *, u_int32_t)); */ int -__db_prqueue(dbp, flags) +__db_prqueue(dbp, fp, flags) DB *dbp; + FILE *fp; u_int32_t flags; { + DB_MPOOLFILE *mpf; PAGE *h; QMETA *meta; db_pgno_t first, i, last, pg_ext, stop; - int ret; + int ret, t_ret; + + mpf = dbp->mpf; /* Find out the page number of the last page in the database. */ i = PGNO_BASE_MD; - if ((ret = memp_fget(dbp->mpf, &i, 0, &meta)) != 0) + if ((ret = __memp_fget(mpf, &i, 0, &meta)) != 0) return (ret); first = QAM_RECNO_PAGE(dbp, meta->first_recno); last = QAM_RECNO_PAGE(dbp, meta->cur_recno); - if ((ret = __db_prpage(dbp, (PAGE *)meta, flags)) != 0) - return (ret); - if ((ret = memp_fput(dbp->mpf, meta, 0)) != 0) + ret = __db_prpage(dbp, (PAGE *)meta, fp, flags); + if ((t_ret = __memp_fput(mpf, meta, 0)) != 0 && ret == 0) + ret = t_ret; + + if (ret != 0) return (ret); i = first; @@ -162,20 +186,20 @@ __db_prqueue(dbp, flags) /* Dump each page. */ begin: for (; i <= stop; ++i) { - if ((ret = __qam_fget(dbp, &i, DB_MPOOL_EXTENT, &h)) != 0) { + if ((ret = __qam_fget(dbp, &i, 0, &h)) != 0) { pg_ext = ((QUEUE *)dbp->q_internal)->page_ext; if (pg_ext == 0) { - if (ret == EINVAL && first == last) + if (ret == DB_PAGE_NOTFOUND && first == last) return (0); return (ret); } - if (ret == ENOENT || ret == EINVAL) { + if (ret == ENOENT || ret == DB_PAGE_NOTFOUND) { i += pg_ext - ((i - 1) % pg_ext) - 1; continue; } return (ret); } - (void)__db_prpage(dbp, h, flags); + (void)__db_prpage(dbp, h, fp, flags); if ((ret = __qam_fput(dbp, i, h, 0)) != 0) return (ret); } @@ -193,31 +217,26 @@ begin: * __qam_remove * Remove method for a Queue. * - * PUBLIC: int __qam_remove __P((DB *, const char *, - * PUBLIC: const char *, DB_LSN *, int (**)(DB *, void*), void **)); + * PUBLIC: int __qam_remove __P((DB *, + * PUBLIC: DB_TXN *, const char *, const char *, DB_LSN *)); */ int -__qam_remove(dbp, name, subdb, lsnp, callbackp, cookiep) +__qam_remove(dbp, txn, name, subdb, lsnp) DB *dbp; + DB_TXN *txn; const char *name, *subdb; DB_LSN *lsnp; - int (**callbackp) __P((DB *, void *)); - void **cookiep; { - DBT namedbt; DB_ENV *dbenv; - DB_LSN lsn; - MPFARRAY *ap; + DB *tmpdbp; QUEUE *qp; - int ret; - char *backup, buf[256], *real_back, *real_name; - QUEUE_FILELIST *filelist, *fp; - struct __qam_cookie *qam_cookie; + int ret, needclose, t_ret; + + COMPQUIET(lsnp, NULL); dbenv = dbp->dbenv; ret = 0; - backup = real_back = real_name = NULL; - filelist = NULL; + needclose = 0; PANIC_CHECK(dbenv); @@ -226,148 +245,62 @@ __qam_remove(dbp, name, subdb, lsnp, callbackp, cookiep) */ if (subdb != NULL) { __db_err(dbenv, - "Queue does not support multiple databases per file."); + "Queue does not support multiple databases per file"); ret = EINVAL; - goto done; + goto err; } - qp = (QUEUE *)dbp->q_internal; - - if (qp->page_ext != 0 && - (ret = __qam_gen_filelist(dbp, &filelist)) != 0) - goto done; - - if (filelist == NULL) - goto done; - - for (fp = filelist; fp->mpf != NULL; fp++) { - snprintf(buf, - sizeof(buf), QUEUE_EXTENT, qp->dir, qp->name, fp->id); - if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, buf, 0, NULL, &real_name)) != 0) - goto done; - if (LOGGING_ON(dbenv)) { - memset(&namedbt, 0, sizeof(namedbt)); - namedbt.data = (char *)buf; - namedbt.size = strlen(buf) + 1; - - if ((ret = - __qam_delete_log(dbenv, dbp->open_txn, - &lsn, DB_FLUSH, &namedbt, lsnp)) != 0) { - __db_err(dbenv, - "%s: %s", name, db_strerror(ret)); - goto done; - } - } - (void)__memp_fremove(fp->mpf); - if ((ret = memp_fclose(fp->mpf)) != 0) - goto done; - if (qp->array2.n_extent == 0 || qp->array2.low_extent > fp->id) - ap = &qp->array1; - else - ap = &qp->array2; - ap->mpfarray[fp->id - ap->low_extent].mpf = NULL; - - /* Create name for backup file. */ - if (TXN_ON(dbenv)) { - if ((ret = __db_backup_name(dbenv, - buf, &backup, lsnp)) != 0) - goto done; - if ((ret = __db_appname(dbenv, DB_APP_DATA, - NULL, backup, 0, NULL, &real_back)) != 0) - goto done; - if ((ret = __os_rename(dbenv, - real_name, real_back)) != 0) - goto done; - __os_freestr(real_back); - real_back = NULL; - } - else - if ((ret = __os_unlink(dbenv, real_name)) != 0) - goto done; - __os_freestr(real_name); - real_name = NULL; + /* + * Since regular remove no longer opens the database, we may have + * to do it here. + */ + if (F_ISSET(dbp, DB_AM_OPEN_CALLED)) + tmpdbp = dbp; + else { + if ((ret = db_create(&tmpdbp, dbenv, 0)) != 0) + return (ret); + /* + * We need to make sure we don't self-deadlock, so give + * this dbp the same locker as the incoming one. + */ + tmpdbp->lid = dbp->lid; + + /* + * If this is a transactional dbp and the open fails, then + * the transactional abort will close the dbp. If it's not + * a transactional open, then we always have to close it + * even if the open fails. Once the open has succeeded, + * then we will always want to close it. + */ + if (txn == NULL) + needclose = 1; + if ((ret = __db_open(tmpdbp, + txn, name, NULL, DB_QUEUE, 0, 0, PGNO_BASE_MD)) != 0) + goto err; + needclose = 1; } - if ((ret= __os_malloc(dbenv, - sizeof(struct __qam_cookie), NULL, &qam_cookie)) != 0) - goto done; - qam_cookie->lsn = *lsnp; - qam_cookie->filelist = filelist; - *cookiep = qam_cookie; - *callbackp = __qam_remove_callback; - -done: - if (ret != 0 && filelist != NULL) - __os_free(filelist, 0); - if (real_back != NULL) - __os_freestr(real_back); - if (real_name != NULL) - __os_freestr(real_name); - if (backup != NULL) - __os_freestr(backup); - - return (ret); -} - -static int -__qam_remove_callback(dbp, cookie) - DB *dbp; - void *cookie; -{ - DB_ENV *dbenv; - DB_LSN *lsnp; - QUEUE *qp; - QUEUE_FILELIST *filelist, *fp; - char *backup, buf[256], *real_back; - int ret; - qp = (QUEUE *)dbp->q_internal; - if (qp->page_ext == 0) - return (__os_unlink(dbp->dbenv, cookie)); + qp = (QUEUE *)tmpdbp->q_internal; - dbenv = dbp->dbenv; - lsnp = &((struct __qam_cookie *)cookie)->lsn; - filelist = fp = ((struct __qam_cookie *)cookie)->filelist; - real_back = backup = NULL; - if ((ret = - __db_backup_name(dbenv, qp->name, &backup, lsnp)) != 0) - goto err; - if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, backup, 0, NULL, &real_back)) != 0) - goto err; - if ((ret = __os_unlink(dbp->dbenv, real_back)) != 0) - goto err; + if (qp->page_ext != 0) + ret = __qam_nameop(tmpdbp, txn, NULL, QAM_NAME_REMOVE); - __os_freestr(backup); - __os_freestr(real_back); +err: if (needclose) { + /* + * Since we copied the lid from the dbp, we'd better not + * free it here. + */ + tmpdbp->lid = DB_LOCK_INVALIDID; - if (fp == NULL) - return (0); + /* We need to remove the lockevent we associated with this. */ + if (txn != NULL) + __txn_remlock(dbenv, + txn, &tmpdbp->handle_lock, DB_LOCK_INVALIDID); - for (; fp->mpf != NULL; fp++) { - snprintf(buf, - sizeof(buf), QUEUE_EXTENT, qp->dir, qp->name, fp->id); - real_back = backup = NULL; - if ((ret = __db_backup_name(dbenv, buf, &backup, lsnp)) != 0) - goto err; - if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, backup, 0, NULL, &real_back)) != 0) - goto err; - ret = __os_unlink(dbenv, real_back); - __os_freestr(real_back); - __os_freestr(backup); + if ((t_ret = + __db_close(tmpdbp, txn, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; } - __os_free(filelist, 0); - __os_free(cookie, sizeof (struct __qam_cookie)); - - return (0); - -err: - if (backup != NULL) - __os_freestr(backup); - - if (real_back != NULL) - __os_freestr(real_back); return (ret); } @@ -376,97 +309,65 @@ err: * __qam_rename * Rename method for Queue. * - * PUBLIC: int __qam_rename __P((DB *, + * PUBLIC: int __qam_rename __P((DB *, DB_TXN *, * PUBLIC: const char *, const char *, const char *)); */ int -__qam_rename(dbp, filename, subdb, newname) +__qam_rename(dbp, txn, filename, subdb, newname) DB *dbp; + DB_TXN *txn; const char *filename, *subdb, *newname; { - DBT namedbt, newnamedbt; DB_ENV *dbenv; - DB_LSN newlsn; - MPFARRAY *ap; + DB *tmpdbp; QUEUE *qp; - QUEUE_FILELIST *fp, *filelist; - char buf[256], nbuf[256], *namep, *real_name, *real_newname; - int ret; + int ret, needclose, t_ret; dbenv = dbp->dbenv; ret = 0; - real_name = real_newname = NULL; - filelist = NULL; - - qp = (QUEUE *)dbp->q_internal; + needclose = 0; if (subdb != NULL) { __db_err(dbenv, - "Queue does not support multiple databases per file."); + "Queue does not support multiple databases per file"); ret = EINVAL; goto err; } - if (qp->page_ext != 0 && - (ret = __qam_gen_filelist(dbp, &filelist)) != 0) - goto err; - if ((namep = __db_rpath(newname)) != NULL) - newname = namep + 1; - - for (fp = filelist; fp != NULL && fp->mpf != NULL; fp++) { - if ((ret = __memp_fremove(fp->mpf)) != 0) - goto err; - if ((ret = memp_fclose(fp->mpf)) != 0) - goto err; - if (qp->array2.n_extent == 0 || qp->array2.low_extent > fp->id) - ap = &qp->array1; - else - ap = &qp->array2; - ap->mpfarray[fp->id - ap->low_extent].mpf = NULL; - snprintf(buf, - sizeof(buf), QUEUE_EXTENT, qp->dir, qp->name, fp->id); - if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, buf, 0, NULL, &real_name)) != 0) - goto err; - snprintf(nbuf, - sizeof(nbuf), QUEUE_EXTENT, qp->dir, newname, fp->id); - if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, nbuf, 0, NULL, &real_newname)) != 0) - goto err; - if (LOGGING_ON(dbenv)) { - memset(&namedbt, 0, sizeof(namedbt)); - namedbt.data = (char *)buf; - namedbt.size = strlen(buf) + 1; - - memset(&newnamedbt, 0, sizeof(namedbt)); - newnamedbt.data = (char *)nbuf; - newnamedbt.size = strlen(nbuf) + 1; - - if ((ret = - __qam_rename_log(dbenv, - dbp->open_txn, &newlsn, 0, - &namedbt, &newnamedbt)) != 0) { - __db_err(dbenv, "%s: %s", filename, db_strerror(ret)); - goto err; - } - if ((ret = __log_filelist_update(dbenv, dbp, - dbp->log_fileid, newname, NULL)) != 0) - goto err; - } - if ((ret = __os_rename(dbenv, real_name, real_newname)) != 0) + /* + * Since regular rename no longer opens the database, we may have + * to do it here. + */ + if (F_ISSET(dbp, DB_AM_OPEN_CALLED)) + tmpdbp = dbp; + else { + if ((ret = db_create(&tmpdbp, dbenv, 0)) != 0) + return (ret); + /* Copy the incoming locker so we don't self-deadlock. */ + tmpdbp->lid = dbp->lid; + needclose = 1; + if ((ret = __db_open(tmpdbp, + txn, filename, NULL, DB_QUEUE, 0, 0, PGNO_BASE_MD)) != 0) goto err; - __os_freestr(real_name); - __os_freestr(real_newname); - real_name = real_newname = NULL; } -err: - if (real_name != NULL) - __os_freestr(real_name); - if (real_newname != NULL) - __os_freestr(real_newname); - if (filelist != NULL) - __os_free(filelist, 0); + qp = (QUEUE *)tmpdbp->q_internal; + if (qp->page_ext != 0) + ret = __qam_nameop(tmpdbp, txn, newname, QAM_NAME_RENAME); + +err: if (needclose) { + /* We copied this, so we mustn't free it. */ + tmpdbp->lid = DB_LOCK_INVALIDID; + + /* We need to remove the lockevent we associated with this. */ + if (txn != NULL) + __txn_remlock(dbenv, + txn, &tmpdbp->handle_lock, DB_LOCK_INVALIDID); + + if ((t_ret = + __db_close(tmpdbp, txn, DB_NOSYNC)) != 0 && ret == 0) + ret = t_ret; + } return (ret); } diff --git a/db/qam/qam_open.c b/db/qam/qam_open.c index 73346439f..53b9e17a1 100644 --- a/db/qam/qam_open.c +++ b/db/qam/qam_open.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: qam_open.c,v 11.31 2000/12/20 17:59:29 ubell Exp $"; +static const char revid[] = "$Id: qam_open.c,v 11.66 2003/09/25 01:35:38 margo Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,198 +18,150 @@ static const char revid[] = "$Id: qam_open.c,v 11.31 2000/12/20 17:59:29 ubell E #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "db_swap.h" -#include "db_am.h" -#include "lock.h" -#include "qam.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/db_swap.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +#include "dbinc/fop.h" + +static int __qam_init_meta __P((DB *, QMETA *)); /* * __qam_open * - * PUBLIC: int __qam_open __P((DB *, const char *, db_pgno_t, int, u_int32_t)); + * PUBLIC: int __qam_open __P((DB *, + * PUBLIC: DB_TXN *, const char *, db_pgno_t, int, u_int32_t)); */ int -__qam_open(dbp, name, base_pgno, mode, flags) +__qam_open(dbp, txn, name, base_pgno, mode, flags) DB *dbp; + DB_TXN *txn; const char *name; db_pgno_t base_pgno; int mode; u_int32_t flags; { - QUEUE *t; DBC *dbc; + DB_ENV *dbenv; DB_LOCK metalock; - DB_LSN orig_lsn; + DB_MPOOLFILE *mpf; QMETA *qmeta; - int locked; + QUEUE *t; int ret, t_ret; - ret = 0; - locked = 0; + dbenv = dbp->dbenv; + mpf = dbp->mpf; t = dbp->q_internal; + ret = 0; + qmeta = NULL; if (name == NULL && t->page_ext != 0) { - __db_err(dbp->dbenv, - "Extent size may not be specified for in-memory queue database."); + __db_err(dbenv, + "Extent size may not be specified for in-memory queue database"); return (EINVAL); } + /* Initialize the remaining fields/methods of the DB. */ - dbp->del = __qam_delete; - dbp->put = __qam_put; - dbp->stat = __qam_stat; - dbp->sync = __qam_sync; dbp->db_am_remove = __qam_remove; dbp->db_am_rename = __qam_rename; - metalock.off = LOCK_INVALID; - /* * Get a cursor. If DB_CREATE is specified, we may be creating * pages, and to do that safely in CDB we need a write cursor. * In STD_LOCKING mode, we'll synchronize using the meta page * lock instead. */ - if ((ret = dbp->cursor(dbp, dbp->open_txn, - &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbp->dbenv) ? + if ((ret = __db_cursor(dbp, txn, &dbc, + LF_ISSET(DB_CREATE) && CDB_LOCKING(dbenv) ? DB_WRITECURSOR : 0)) != 0) return (ret); - /* Get, and optionally create the metadata page. */ + /* + * Get the meta data page. It must exist, because creates of + * files/databases come in through the __qam_new_file interface + * and queue doesn't support subdatabases. + */ if ((ret = __db_lget(dbc, 0, base_pgno, DB_LOCK_READ, 0, &metalock)) != 0) goto err; - if ((ret = memp_fget( - dbp->mpf, &base_pgno, DB_MPOOL_CREATE, (PAGE **)&qmeta)) != 0) + if ((ret = __memp_fget(mpf, &base_pgno, 0, &qmeta)) != 0) goto err; - /* - * If the magic number is correct, we're not creating the tree. - * Correct any fields that may not be right. Note, all of the - * local flags were set by DB->open. - */ -again: if (qmeta->dbmeta.magic == DB_QAMMAGIC) { - t->re_pad = qmeta->re_pad; - t->re_len = qmeta->re_len; - t->rec_page = qmeta->rec_page; - t->page_ext = qmeta->page_ext; - - (void)memp_fput(dbp->mpf, (PAGE *)qmeta, 0); - goto done; - } - - /* If we're doing CDB; we now have to get the write lock. */ - if (CDB_LOCKING(dbp->dbenv)) { - DB_ASSERT(LF_ISSET(DB_CREATE)); - if ((ret = lock_get(dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE, - &dbc->lock_dbt, DB_LOCK_WRITE, &dbc->mylock)) != 0) - goto err; - } - - /* - * If we are doing locking, relase the read lock - * and get a write lock. We want to avoid deadlock. - */ - if (locked == 0 && STD_LOCKING(dbc)) { - if ((ret = __LPUT(dbc, metalock)) != 0) - goto err; - if ((ret = __db_lget(dbc, - 0, base_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) - goto err; - locked = 1; - goto again; - } - /* Initialize the tree structure metadata information. */ - orig_lsn = qmeta->dbmeta.lsn; - memset(qmeta, 0, sizeof(QMETA)); - ZERO_LSN(qmeta->dbmeta.lsn); - qmeta->dbmeta.pgno = base_pgno; - qmeta->dbmeta.magic = DB_QAMMAGIC; - qmeta->dbmeta.version = DB_QAMVERSION; - qmeta->dbmeta.pagesize = dbp->pgsize; - qmeta->dbmeta.type = P_QAMMETA; - qmeta->re_pad = t->re_pad; - qmeta->re_len = t->re_len; - qmeta->rec_page = CALC_QAM_RECNO_PER_PAGE(dbp); - qmeta->cur_recno = 1; - qmeta->first_recno = 1; - qmeta->page_ext = t->page_ext; - t->rec_page = qmeta->rec_page; - memcpy(qmeta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); - - /* Verify that we can fit at least one record per page. */ - if (QAM_RECNO_PER_PAGE(dbp) < 1) { - __db_err(dbp->dbenv, - "Record size of %lu too large for page size of %lu", - (u_long)t->re_len, (u_long)dbp->pgsize); - (void)memp_fput(dbp->mpf, (PAGE *)qmeta, 0); + /* If the magic number is incorrect, that's a fatal error. */ + if (qmeta->dbmeta.magic != DB_QAMMAGIC) { + __db_err(dbenv, "%s: unexpected file type or format", name); ret = EINVAL; goto err; } - if ((ret = __db_log_page(dbp, - name, &orig_lsn, base_pgno, (PAGE *)qmeta)) != 0) - goto err; + /* Setup information needed to open extents. */ + t->page_ext = qmeta->page_ext; - /* Release the metadata page. */ - if ((ret = memp_fput(dbp->mpf, (PAGE *)qmeta, DB_MPOOL_DIRTY)) != 0) + if (t->page_ext != 0 && (ret = __qam_set_ext_data(dbp, name)) != 0) goto err; - DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, name); - /* - * Flush the metadata page to disk. - * - * !!! - * It's not useful to return not-yet-flushed here -- convert it to - * an error. - */ - if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE) { - __db_err(dbp->dbenv, "Flush of metapage failed"); - ret = EINVAL; - } - DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name); + if (mode == 0) + mode = __db_omode("rwrw--"); + t->mode = mode; + t->re_pad = qmeta->re_pad; + t->re_len = qmeta->re_len; + t->rec_page = qmeta->rec_page; -done: t->q_meta = base_pgno; + t->q_meta = base_pgno; t->q_root = base_pgno + 1; - /* Setup information needed to open extents. */ - if (t->page_ext != 0) { - t->finfo.pgcookie = &t->pgcookie; - t->finfo.fileid = NULL; - t->finfo.lsn_offset = 0; - - t->pginfo.db_pagesize = dbp->pgsize; - t->pginfo.needswap = F_ISSET(dbp, DB_AM_SWAP); - t->pgcookie.data = &t->pginfo; - t->pgcookie.size = sizeof(DB_PGINFO); - - if ((ret = __os_strdup(dbp->dbenv, name, &t->path)) != 0) - goto err; - t->dir = t->path; - if ((t->name = __db_rpath(t->path)) == NULL) { - t->name = t->path; - t->dir = PATH_DOT; - } else - *t->name++ = '\0'; - - if (mode == 0) - mode = __db_omode("rwrw--"); - t->mode = mode; - } +err: if (qmeta != NULL && + (t_ret = __memp_fput(mpf, qmeta, 0)) != 0 && ret == 0) + ret = t_ret; -err: -DB_TEST_RECOVERY_LABEL /* Don't hold the meta page long term. */ (void)__LPUT(dbc, metalock); - if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) + if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0) ret = t_ret; return (ret); } /* + * __qam_set_ext_data -- + * Setup DBP data for opening queue extents. + * + * PUBLIC: int __qam_set_ext_data __P((DB*, const char *)); + */ +int +__qam_set_ext_data(dbp, name) + DB *dbp; + const char *name; +{ + QUEUE *t; + int ret; + + t = dbp->q_internal; + t->pginfo.db_pagesize = dbp->pgsize; + t->pginfo.flags = + F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); + t->pginfo.type = dbp->type; + t->pgcookie.data = &t->pginfo; + t->pgcookie.size = sizeof(DB_PGINFO); + + if ((ret = __os_strdup(dbp->dbenv, name, &t->path)) != 0) + return (ret); + t->dir = t->path; + if ((t->name = __db_rpath(t->path)) == NULL) { + t->name = t->path; + t->dir = PATH_DOT; + } else + *t->name++ = '\0'; + + return (0); +} + +/* * __qam_metachk -- * * PUBLIC: int __qam_metachk __P((DB *, const char *, QMETA *)); @@ -225,6 +177,7 @@ __qam_metachk(dbp, name, qmeta) int ret; dbenv = dbp->dbenv; + ret = 0; /* * At this point, all we know is that the magic number is for a Queue. @@ -241,6 +194,7 @@ __qam_metachk(dbp, name, qmeta) name, (u_long)vers); return (DB_OLD_VERSION); case 3: + case 4: break; default: __db_err(dbenv, @@ -264,5 +218,132 @@ __qam_metachk(dbp, name, qmeta) /* Copy the file's ID. */ memcpy(dbp->fileid, qmeta->dbmeta.uid, DB_FILE_ID_LEN); + /* Set up AM-specific methods that do not require an open. */ + dbp->db_am_rename = __qam_rename; + dbp->db_am_remove = __qam_remove; + + return (ret); +} + +/* + * __qam_init_meta -- + * Initialize the meta-data for a Queue database. + */ +static int +__qam_init_meta(dbp, meta) + DB *dbp; + QMETA *meta; +{ + QUEUE *t; + + t = dbp->q_internal; + + memset(meta, 0, sizeof(QMETA)); + LSN_NOT_LOGGED(meta->dbmeta.lsn); + meta->dbmeta.pgno = PGNO_BASE_MD; + meta->dbmeta.last_pgno = 0; + meta->dbmeta.magic = DB_QAMMAGIC; + meta->dbmeta.version = DB_QAMVERSION; + meta->dbmeta.pagesize = dbp->pgsize; + if (F_ISSET(dbp, DB_AM_CHKSUM)) + FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM); + if (F_ISSET(dbp, DB_AM_ENCRYPT)) { + meta->dbmeta.encrypt_alg = + ((DB_CIPHER *)dbp->dbenv->crypto_handle)->alg; + DB_ASSERT(meta->dbmeta.encrypt_alg != 0); + meta->crypto_magic = meta->dbmeta.magic; + } + meta->dbmeta.type = P_QAMMETA; + meta->re_pad = t->re_pad; + meta->re_len = t->re_len; + meta->rec_page = CALC_QAM_RECNO_PER_PAGE(dbp); + meta->cur_recno = 1; + meta->first_recno = 1; + meta->page_ext = t->page_ext; + t->rec_page = meta->rec_page; + memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); + + /* Verify that we can fit at least one record per page. */ + if (QAM_RECNO_PER_PAGE(dbp) < 1) { + __db_err(dbp->dbenv, + "Record size of %lu too large for page size of %lu", + (u_long)t->re_len, (u_long)dbp->pgsize); + return (EINVAL); + } + return (0); } + +/* + * __qam_new_file -- + * Create the necessary pages to begin a new queue database file. + * + * This code appears more complex than it is because of the two cases (named + * and unnamed). The way to read the code is that for each page being created, + * there are three parts: 1) a "get page" chunk (which either uses malloc'd + * memory or calls __memp_fget), 2) the initialization, and 3) the "put page" + * chunk which either does a fop write or an __memp_fput. + * + * PUBLIC: int __qam_new_file __P((DB *, DB_TXN *, DB_FH *, const char *)); + */ +int +__qam_new_file(dbp, txn, fhp, name) + DB *dbp; + DB_TXN *txn; + DB_FH *fhp; + const char *name; +{ + QMETA *meta; + DB_ENV *dbenv; + DB_MPOOLFILE *mpf; + DB_PGINFO pginfo; + DBT pdbt; + db_pgno_t pgno; + int ret; + void *buf; + + dbenv = dbp->dbenv; + mpf = dbp->mpf; + buf = NULL; + meta = NULL; + + /* Build meta-data page. */ + + if (name == NULL) { + pgno = PGNO_BASE_MD; + ret = __memp_fget(mpf, &pgno, DB_MPOOL_CREATE, &meta); + } else { + ret = __os_calloc(dbp->dbenv, 1, dbp->pgsize, &buf); + meta = (QMETA *)buf; + } + if (ret != 0) + return (ret); + + if ((ret = __qam_init_meta(dbp, meta)) != 0) + goto err; + + if (name == NULL) + ret = __memp_fput(mpf, meta, DB_MPOOL_DIRTY); + else { + pginfo.db_pagesize = dbp->pgsize; + pginfo.flags = + F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); + pginfo.type = DB_QUEUE; + pdbt.data = &pginfo; + pdbt.size = sizeof(pginfo); + if ((ret = __db_pgout(dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0) + goto err; + ret = __fop_write(dbenv, txn, name, + DB_APP_DATA, fhp, dbp->pgsize, 0, 0, buf, dbp->pgsize, 1, + F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0); + } + if (ret != 0) + goto err; + meta = NULL; + +err: if (name != NULL) + __os_free(dbenv, buf); + else if (meta != NULL) + (void)__memp_fput(mpf, meta, 0); + return (ret); +} diff --git a/db/qam/qam_rec.c b/db/qam/qam_rec.c index 4d330f586..d846118ac 100644 --- a/db/qam/qam_rec.c +++ b/db/qam/qam_rec.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: qam_rec.c,v 11.34 2001/01/19 18:01:59 bostic Exp $"; +static const char revid[] = "$Id: qam_rec.c,v 11.75 2003/08/17 23:38:14 ubell Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,91 +18,13 @@ static const char revid[] = "$Id: qam_rec.c,v 11.34 2001/01/19 18:01:59 bostic E #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "lock.h" -#include "db_am.h" -#include "qam.h" -#include "log.h" - -/* - * __qam_inc_recover -- - * Recovery function for inc. - * - * PUBLIC: int __qam_inc_recover __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__qam_inc_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __qam_inc_args *argp; - DB *file_dbp; - DBC *dbc; - DB_LOCK lock; - DB_MPOOLFILE *mpf; - QMETA *meta; - db_pgno_t metapg; - int cmp_p, modified, ret; - - COMPQUIET(info, NULL); - REC_PRINT(__qam_inc_print); - REC_INTRO(__qam_inc_read, 1); - - metapg = ((QUEUE *)file_dbp->q_internal)->q_meta; - - if ((ret = __db_lget(dbc, - LCK_ROLLBACK, metapg, DB_LOCK_WRITE, 0, &lock)) != 0) - goto done; - if ((ret = memp_fget(mpf, &metapg, 0, &meta)) != 0) { - if (DB_REDO(op)) { - if ((ret = memp_fget(mpf, - &metapg, DB_MPOOL_CREATE, &meta)) != 0) { - (void)__LPUT(dbc, lock); - goto out; - } - meta->dbmeta.pgno = metapg; - meta->dbmeta.type = P_QAMMETA; - - } else { - *lsnp = argp->prev_lsn; - ret = 0; - (void)__LPUT(dbc, lock); - goto out; - } - } - - modified = 0; - cmp_p = log_compare(&LSN(meta), &argp->lsn); - CHECK_LSN(op, cmp_p, &LSN(meta), &argp->lsn); - - /* - * The cur_recno never goes backwards. It is a point of - * contention among appenders. If one fails cur_recno will - * most likely be beyond that one when it aborts. - * We move it ahead on either an abort or a commit - * and make the LSN reflect that fact. - */ - if (cmp_p == 0) { - modified = 1; - meta->cur_recno++; - if (meta->cur_recno == RECNO_OOB) - meta->cur_recno++; - meta->dbmeta.lsn = *lsnp; - } - if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0))) - goto out; - - (void)__LPUT(dbc, lock); - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: REC_CLOSE; -} +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" /* * __qam_incfirst_recover -- @@ -123,13 +45,13 @@ __qam_incfirst_recover(dbenv, dbtp, lsnp, op, info) DB *file_dbp; DBC *dbc; DB_LOCK lock; + DB_LSN trunc_lsn; DB_MPOOLFILE *mpf; QMETA *meta; QUEUE_CURSOR *cp; db_pgno_t metapg; int exact, modified, ret, rec_ext; - COMPQUIET(info, NULL); REC_PRINT(__qam_incfirst_print); REC_INTRO(__qam_incfirst_read, 1); @@ -138,9 +60,9 @@ __qam_incfirst_recover(dbenv, dbtp, lsnp, op, info) if ((ret = __db_lget(dbc, LCK_ROLLBACK, metapg, DB_LOCK_WRITE, 0, &lock)) != 0) goto done; - if ((ret = memp_fget(mpf, &metapg, 0, &meta)) != 0) { + if ((ret = __memp_fget(mpf, &metapg, 0, &meta)) != 0) { if (DB_REDO(op)) { - if ((ret = memp_fget(mpf, + if ((ret = __memp_fget(mpf, &metapg, DB_MPOOL_CREATE, &meta)) != 0) { (void)__LPUT(dbc, lock); goto out; @@ -167,6 +89,14 @@ __qam_incfirst_recover(dbenv, dbtp, lsnp, op, info) meta->first_recno = argp->recno; modified = 1; } + + trunc_lsn = ((DB_TXNHEAD *)info)->trunc_lsn; + /* if we are truncating, update the LSN */ + if (!IS_ZERO_LSN(trunc_lsn) && + log_compare(&LSN(meta), &trunc_lsn) > 0) { + LSN(meta) = trunc_lsn; + modified = 1; + } } else { if (log_compare(&LSN(meta), lsnp) < 0) { LSN(meta) = *lsnp; @@ -178,11 +108,11 @@ __qam_incfirst_recover(dbenv, dbtp, lsnp, op, info) cp = (QUEUE_CURSOR *)dbc->internal; if (meta->first_recno == RECNO_OOB) meta->first_recno++; - while (meta->first_recno != meta->cur_recno - && !QAM_BEFORE_FIRST(meta, argp->recno + 1)) { + while (meta->first_recno != meta->cur_recno && + !QAM_BEFORE_FIRST(meta, argp->recno + 1)) { if ((ret = __qam_position(dbc, &meta->first_recno, QAM_READ, &exact)) != 0) - goto out; + goto err; if (cp->page != NULL) __qam_fput(file_dbp, cp->pgno, cp->page, 0); @@ -192,7 +122,7 @@ __qam_incfirst_recover(dbenv, dbtp, lsnp, op, info) rec_ext != 0 && meta->first_recno % rec_ext == 0) if ((ret = __qam_fremove(file_dbp, cp->pgno)) != 0) - goto out; + goto err; meta->first_recno++; if (meta->first_recno == RECNO_OOB) meta->first_recno++; @@ -200,14 +130,19 @@ __qam_incfirst_recover(dbenv, dbtp, lsnp, op, info) } } - if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0))) - goto out; + if ((ret = __memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) + goto err1; (void)__LPUT(dbc, lock); done: *lsnp = argp->prev_lsn; ret = 0; + if (0) { +err: (void)__memp_fput(mpf, meta, 0); +err1: (void)__LPUT(dbc, lock); + } + out: REC_CLOSE; } @@ -229,13 +164,13 @@ __qam_mvptr_recover(dbenv, dbtp, lsnp, op, info) __qam_mvptr_args *argp; DB *file_dbp; DBC *dbc; + DB_LSN trunc_lsn; DB_LOCK lock; DB_MPOOLFILE *mpf; QMETA *meta; db_pgno_t metapg; - int cmp_p, modified, ret; + int cmp_n, cmp_p, modified, ret; - COMPQUIET(info, NULL); REC_PRINT(__qam_mvptr_print); REC_INTRO(__qam_mvptr_read, 1); @@ -244,9 +179,9 @@ __qam_mvptr_recover(dbenv, dbtp, lsnp, op, info) if ((ret = __db_lget(dbc, LCK_ROLLBACK, metapg, DB_LOCK_WRITE, 0, &lock)) != 0) goto done; - if ((ret = memp_fget(mpf, &metapg, 0, &meta)) != 0) { + if ((ret = __memp_fget(mpf, &metapg, 0, &meta)) != 0) { if (DB_REDO(op)) { - if ((ret = memp_fget(mpf, + if ((ret = __memp_fget(mpf, &metapg, DB_MPOOL_CREATE, &meta)) != 0) { (void)__LPUT(dbc, lock); goto out; @@ -262,13 +197,31 @@ __qam_mvptr_recover(dbenv, dbtp, lsnp, op, info) } modified = 0; - cmp_p = log_compare(&meta->dbmeta.lsn, &argp->metalsn); + cmp_n = log_compare(lsnp, &LSN(meta)); + cmp_p = log_compare(&LSN(meta), &argp->metalsn); /* - * We never undo a movement of one of the pointers. - * Just move them along regardless of abort/commit. + * Under normal circumstances, we never undo a movement of one of + * the pointers. Just move them along regardless of abort/commit. + * + * If we're undoing a truncate, we need to reset the pointers to + * their state before the truncate. */ - if (cmp_p == 0) { + if (DB_UNDO(op)) { + if ((argp->opcode & QAM_TRUNCATE) && cmp_n <= 0) { + meta->first_recno = argp->old_first; + meta->cur_recno = argp->old_cur; + LSN(meta) = argp->metalsn; + modified = 1; + } + /* If the page lsn is beyond the truncate point, move it back */ + trunc_lsn = ((DB_TXNHEAD *)info)->trunc_lsn; + if (!IS_ZERO_LSN(trunc_lsn) && + log_compare(&trunc_lsn, &LSN(meta)) < 0) { + LSN(meta) = argp->metalsn; + modified = 1; + } + } else if (op == DB_TXN_APPLY || cmp_p == 0) { if (argp->opcode & QAM_SETFIRST) meta->first_recno = argp->new_first; @@ -279,7 +232,7 @@ __qam_mvptr_recover(dbenv, dbtp, lsnp, op, info) meta->dbmeta.lsn = *lsnp; } - if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0))) + if ((ret = __memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; (void)__LPUT(dbc, lock); @@ -289,6 +242,7 @@ done: *lsnp = argp->prev_lsn; out: REC_CLOSE; } + /* * __qam_del_recover -- * Recovery function for del. @@ -321,7 +275,7 @@ __qam_del_recover(dbenv, dbtp, lsnp, op, info) REC_INTRO(__qam_del_read, 1); if ((ret = __qam_fget(file_dbp, - &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) goto out; modified = 0; @@ -338,20 +292,20 @@ __qam_del_recover(dbenv, dbtp, lsnp, op, info) metapg = ((QUEUE *)file_dbp->q_internal)->q_meta; if ((ret = __db_lget(dbc, LCK_ROLLBACK, metapg, DB_LOCK_WRITE, 0, &lock)) != 0) - return (ret); - if ((ret = memp_fget(file_dbp->mpf, &metapg, 0, &meta)) != 0) { + goto err; + if ((ret = __memp_fget(mpf, &metapg, 0, &meta)) != 0) { (void)__LPUT(dbc, lock); - goto done; + goto err; } if (meta->first_recno == RECNO_OOB || - (QAM_BEFORE_FIRST(meta, argp->recno) - && (meta->first_recno <= meta->cur_recno - || meta->first_recno - + (QAM_BEFORE_FIRST(meta, argp->recno) && + (meta->first_recno <= meta->cur_recno || + meta->first_recno - argp->recno < argp->recno - meta->cur_recno))) { meta->first_recno = argp->recno; - (void)memp_fput(file_dbp->mpf, meta, DB_MPOOL_DIRTY); + (void)__memp_fput(mpf, meta, DB_MPOOL_DIRTY); } else - (void)memp_fput(file_dbp->mpf, meta, 0); + (void)__memp_fput(mpf, meta, 0); (void)__LPUT(dbc, lock); /* Need to undo delete - mark the record as present */ @@ -366,10 +320,10 @@ __qam_del_recover(dbenv, dbtp, lsnp, op, info) * is harmless in queue except when we're determining * what we need to roll forward during recovery. [#2588] */ - if (op == DB_TXN_BACKWARD_ROLL && cmp_n < 0) + if (op == DB_TXN_BACKWARD_ROLL && cmp_n <= 0) LSN(pagep) = argp->lsn; modified = 1; - } else if (cmp_n > 0 && DB_REDO(op)) { + } else if (op == DB_TXN_APPLY || (cmp_n > 0 && DB_REDO(op))) { /* Need to redo delete - clear the valid bit */ qp = QAM_GET_RECORD(file_dbp, pagep, argp->indx); F_CLR(qp, QAM_VALID); @@ -377,14 +331,18 @@ __qam_del_recover(dbenv, dbtp, lsnp, op, info) modified = 1; } if ((ret = __qam_fput(file_dbp, - argp->pgno, pagep, modified ? DB_MPOOL_DIRTY : 0))) + argp->pgno, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; done: *lsnp = argp->prev_lsn; ret = 0; + if (0) { +err: (void)__qam_fput(file_dbp, argp->pgno, pagep, 0); + } out: REC_CLOSE; } + /* * __qam_delext_recover -- * Recovery function for del in an extent based queue. @@ -415,9 +373,19 @@ __qam_delext_recover(dbenv, dbtp, lsnp, op, info) REC_PRINT(__qam_delext_print); REC_INTRO(__qam_delext_read, 1); - if ((ret = __qam_fget(file_dbp, - &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; + if ((ret = __qam_fget(file_dbp, &argp->pgno, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND && ret != ENOENT) + goto out; + /* + * If we are redoing a delete and the page is not there + * we are done. + */ + if (DB_REDO(op)) + goto done; + if ((ret = __qam_fget(file_dbp, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } modified = 0; if (pagep->pgno == PGNO_INVALID) { @@ -433,25 +401,25 @@ __qam_delext_recover(dbenv, dbtp, lsnp, op, info) metapg = ((QUEUE *)file_dbp->q_internal)->q_meta; if ((ret = __db_lget(dbc, LCK_ROLLBACK, metapg, DB_LOCK_WRITE, 0, &lock)) != 0) - return (ret); - if ((ret = memp_fget(file_dbp->mpf, &metapg, 0, &meta)) != 0) { + goto err; + if ((ret = __memp_fget(mpf, &metapg, 0, &meta)) != 0) { (void)__LPUT(dbc, lock); - goto done; + goto err; } if (meta->first_recno == RECNO_OOB || - (QAM_BEFORE_FIRST(meta, argp->recno) - && (meta->first_recno <= meta->cur_recno - || meta->first_recno - + (QAM_BEFORE_FIRST(meta, argp->recno) && + (meta->first_recno <= meta->cur_recno || + meta->first_recno - argp->recno < argp->recno - meta->cur_recno))) { meta->first_recno = argp->recno; - (void)memp_fput(file_dbp->mpf, meta, DB_MPOOL_DIRTY); + (void)__memp_fput(mpf, meta, DB_MPOOL_DIRTY); } else - (void)memp_fput(file_dbp->mpf, meta, 0); + (void)__memp_fput(mpf, meta, 0); (void)__LPUT(dbc, lock); if ((ret = __qam_pitem(dbc, pagep, argp->indx, argp->recno, &argp->data)) != 0) - goto done; + goto err; /* * Move the LSN back to this point; do not move it forward. @@ -461,10 +429,10 @@ __qam_delext_recover(dbenv, dbtp, lsnp, op, info) * is harmless in queue except when we're determining * what we need to roll forward during recovery. [#2588] */ - if (op == DB_TXN_BACKWARD_ROLL && cmp_n < 0) + if (op == DB_TXN_BACKWARD_ROLL && cmp_n <= 0) LSN(pagep) = argp->lsn; modified = 1; - } else if (cmp_n > 0 && DB_REDO(op)) { + } else if (op == DB_TXN_APPLY || (cmp_n > 0 && DB_REDO(op))) { /* Need to redo delete - clear the valid bit */ qp = QAM_GET_RECORD(file_dbp, pagep, argp->indx); F_CLR(qp, QAM_VALID); @@ -472,12 +440,15 @@ __qam_delext_recover(dbenv, dbtp, lsnp, op, info) modified = 1; } if ((ret = __qam_fput(file_dbp, - argp->pgno, pagep, modified ? DB_MPOOL_DIRTY : 0))) + argp->pgno, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; done: *lsnp = argp->prev_lsn; ret = 0; + if (0) { +err: (void)__qam_fput(file_dbp, argp->pgno, pagep, 0); + } out: REC_CLOSE; } @@ -485,7 +456,8 @@ out: REC_CLOSE; * __qam_add_recover -- * Recovery function for add. * - * PUBLIC: int __qam_add_recover __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); + * PUBLIC: int __qam_add_recover + * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); */ int __qam_add_recover(dbenv, dbtp, lsnp, op, info) @@ -503,16 +475,26 @@ __qam_add_recover(dbenv, dbtp, lsnp, op, info) QMETA *meta; QPAGE *pagep; db_pgno_t metapg; - int cmp_n, modified, ret; + int cmp_n, meta_dirty, modified, ret; COMPQUIET(info, NULL); REC_PRINT(__qam_add_print); REC_INTRO(__qam_add_read, 1); modified = 0; - if ((ret = __qam_fget(file_dbp, - &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; + if ((ret = __qam_fget(file_dbp, &argp->pgno, 0, &pagep)) != 0) { + if (ret != DB_PAGE_NOTFOUND && ret != ENOENT) + goto out; + /* + * If we are undoing an append and the page is not there + * we are done. + */ + if (DB_UNDO(op)) + goto done; + if ((ret = __qam_fget(file_dbp, + &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) + goto out; + } if (pagep->pgno == PGNO_INVALID) { pagep->pgno = argp->pgno; @@ -522,25 +504,34 @@ __qam_add_recover(dbenv, dbtp, lsnp, op, info) cmp_n = log_compare(lsnp, &LSN(pagep)); - if (cmp_n > 0 && DB_REDO(op)) { - /* Need to redo add - put the record on page */ - if ((ret = __qam_pitem(dbc, pagep, argp->indx, argp->recno, - &argp->data)) != 0) - goto err; - LSN(pagep) = *lsnp; - modified = 1; - /* Make sure first pointer includes this record. */ + if (DB_REDO(op)) { + /* Fix meta-data page. */ metapg = ((QUEUE *)file_dbp->q_internal)->q_meta; - if ((ret = memp_fget(mpf, &metapg, 0, &meta)) != 0) + if ((ret = __memp_fget(mpf, &metapg, 0, &meta)) != 0) goto err; + meta_dirty = 0; if (QAM_BEFORE_FIRST(meta, argp->recno)) { meta->first_recno = argp->recno; - if ((ret = memp_fput(mpf, meta, DB_MPOOL_DIRTY)) != 0) - goto err; - } else - if ((ret = memp_fput(mpf, meta, 0)) != 0) - goto err; + meta_dirty = 1; + } + if (argp->recno == meta->cur_recno || + QAM_AFTER_CURRENT(meta, argp->recno)) { + meta->cur_recno = argp->recno + 1; + meta_dirty = 1; + } + if ((ret = __memp_fput(mpf, + meta, meta_dirty? DB_MPOOL_DIRTY : 0)) != 0) + goto err; + /* Now update the actual page if necessary. */ + if (op == DB_TXN_APPLY || cmp_n > 0) { + /* Need to redo add - put the record on page */ + if ((ret = __qam_pitem(dbc, + pagep, argp->indx, argp->recno, &argp->data)) != 0) + goto err; + LSN(pagep) = *lsnp; + modified = 1; + } } else if (DB_UNDO(op)) { /* * Need to undo add @@ -572,161 +563,20 @@ __qam_add_recover(dbenv, dbtp, lsnp, op, info) * is harmless in queue except when we're determining * what we need to roll forward during recovery. [#2588] */ - if (op == DB_TXN_BACKWARD_ROLL && cmp_n < 0) + if (op == DB_TXN_BACKWARD_ROLL && cmp_n <= 0) LSN(pagep) = argp->lsn; } -err: if ((ret = __qam_fput(file_dbp, - argp->pgno, pagep, modified ? DB_MPOOL_DIRTY : 0))) + if ((ret = __qam_fput(file_dbp, + argp->pgno, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) goto out; done: *lsnp = argp->prev_lsn; ret = 0; -out: REC_CLOSE; -} -/* - * __qam_delete_recover -- - * Recovery function for delete of an extent. - * - * PUBLIC: int __qam_delete_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__qam_delete_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __qam_delete_args *argp; - int ret; - char *backup, *real_back, *real_name; - - COMPQUIET(info, NULL); - - REC_PRINT(__qam_delete_print); - - backup = real_back = real_name = NULL; - if ((ret = __qam_delete_read(dbenv, dbtp->data, &argp)) != 0) - goto out; - - if (DB_REDO(op)) { - /* - * On a recovery, as we recreate what was going on, we - * recreate the creation of the file. And so, even though - * it committed, we need to delete it. Try to delete it, - * but it is not an error if that delete fails. - */ - if ((ret = __db_appname(dbenv, DB_APP_DATA, - NULL, argp->name.data, 0, NULL, &real_name)) != 0) - goto out; - if (__os_exists(real_name, NULL) == 0) { - if ((ret = __os_unlink(dbenv, real_name)) != 0) - goto out; - } - } else if (DB_UNDO(op)) { - /* - * Trying to undo. File may or may not have been deleted. - * Try to move the backup to the original. If the backup - * exists, then this is right. If it doesn't exist, then - * nothing will happen and that's OK. - */ - if ((ret = __db_backup_name(dbenv, argp->name.data, - &backup, &argp->lsn)) != 0) - goto out; - if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, backup, 0, NULL, &real_back)) != 0) - goto out; - if ((ret = __db_appname(dbenv, DB_APP_DATA, - NULL, argp->name.data, 0, NULL, &real_name)) != 0) - goto out; - if (__os_exists(real_back, NULL) == 0) - if ((ret = - __os_rename(dbenv, real_back, real_name)) != 0) - goto out; - } - *lsnp = argp->prev_lsn; - ret = 0; - -out: if (argp != NULL) - __os_free(argp, 0); - if (backup != NULL) - __os_freestr(backup); - if (real_back != NULL) - __os_freestr(real_back); - if (real_name != NULL) - __os_freestr(real_name); - return (ret); -} -/* - * __qam_rename_recover -- - * Recovery function for rename. - * - * PUBLIC: int __qam_rename_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__qam_rename_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __qam_rename_args *argp; - char *new_name, *real_name; - int ret; - - COMPQUIET(info, NULL); - - REC_PRINT(__qam_rename_print); - - new_name = real_name = NULL; - - if ((ret = __qam_rename_read(dbenv, dbtp->data, &argp)) != 0) - goto out; - - if (DB_REDO(op)) { - if ((ret = __db_appname(dbenv, DB_APP_DATA, - NULL, argp->name.data, 0, NULL, &real_name)) != 0) - goto out; - if (__os_exists(real_name, NULL) == 0) { - if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, argp->newname.data, - 0, NULL, &new_name)) != 0) - goto out; - if ((ret = __os_rename(dbenv, - real_name, new_name)) != 0) - goto out; - } - } else { - if ((ret = __db_appname(dbenv, DB_APP_DATA, - NULL, argp->newname.data, 0, NULL, &new_name)) != 0) - goto out; - if (__os_exists(new_name, NULL) == 0) { - if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, argp->name.data, - 0, NULL, &real_name)) != 0) - goto out; - if ((ret = __os_rename(dbenv, - new_name, real_name)) != 0) - goto out; - } + if (0) { +err: (void)__qam_fput(file_dbp, argp->pgno, pagep, 0); } - *lsnp = argp->prev_lsn; - ret = 0; - -out: if (argp != NULL) - __os_free(argp, 0); - - if (new_name != NULL) - __os_free(new_name, 0); - - if (real_name != NULL) - __os_free(real_name, 0); - - return (ret); +out: REC_CLOSE; } diff --git a/db/qam/qam_stat.c b/db/qam/qam_stat.c index 865f477c1..bc6409e2f 100644 --- a/db/qam/qam_stat.c +++ b/db/qam/qam_stat.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: qam_stat.c,v 11.16 2001/01/10 04:50:54 ubell Exp $"; +static const char revid[] = "$Id: qam_stat.c,v 11.38 2003/09/04 18:06:48 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,87 +18,71 @@ static const char revid[] = "$Id: qam_stat.c,v 11.16 2001/01/10 04:50:54 ubell E #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "db_am.h" -#include "lock.h" -#include "qam.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" /* * __qam_stat -- * Gather/print the qam statistics * - * PUBLIC: int __qam_stat __P((DB *, void *, void *(*)(size_t), u_int32_t)); + * PUBLIC: int __qam_stat __P((DBC *, void *, u_int32_t)); */ int -__qam_stat(dbp, spp, db_malloc, flags) - DB *dbp; +__qam_stat(dbc, spp, flags) + DBC *dbc; void *spp; - void *(*db_malloc) __P((size_t)); u_int32_t flags; { - QUEUE *t; - DBC *dbc; + DB *dbp; DB_LOCK lock; + DB_MPOOLFILE *mpf; DB_QUEUE_STAT *sp; PAGE *h; QAMDATA *qp, *ep; QMETA *meta; + QUEUE *t; db_indx_t indx; db_pgno_t first, last, pgno, pg_ext, stop; u_int32_t re_len; - int ret, t_ret; + int ret; - PANIC_CHECK(dbp->dbenv); - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat"); + dbp = dbc->dbp; - t = dbp->q_internal; + LOCK_INIT(lock); + mpf = dbp->mpf; sp = NULL; - lock.off = LOCK_INVALID; - - /* Check for invalid flags. */ - if ((ret = __db_statchk(dbp, flags)) != 0) - return (ret); + t = dbp->q_internal; if (spp == NULL) return (0); - /* Acquire a cursor. */ - if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, NULL, "qam_stat", NULL, NULL, flags); - /* Allocate and clear the structure. */ - if ((ret = __os_malloc(dbp->dbenv, sizeof(*sp), db_malloc, &sp)) != 0) + if ((ret = __os_umalloc(dbp->dbenv, sizeof(*sp), &sp)) != 0) goto err; memset(sp, 0, sizeof(*sp)); re_len = ((QUEUE *)dbp->q_internal)->re_len; - if (flags == DB_CACHED_COUNTS) { - if ((ret = __db_lget(dbc, - 0, t->q_meta, DB_LOCK_READ, 0, &lock)) != 0) - goto err; - if ((ret = - memp_fget(dbp->mpf, &t->q_meta, 0, (PAGE **)&meta)) != 0) - goto err; - sp->qs_nkeys = meta->dbmeta.key_count; - sp->qs_ndata = meta->dbmeta.record_count; - - goto done; - } /* Determine the last page of the database. */ - if ((ret = __db_lget(dbc, - 0, t->q_meta, DB_LOCK_READ, 0, &lock)) != 0) + if ((ret = __db_lget(dbc, 0, t->q_meta, DB_LOCK_READ, 0, &lock)) != 0) goto err; - if ((ret = memp_fget(dbp->mpf, &t->q_meta, 0, (PAGE **)&meta)) != 0) + if ((ret = __memp_fget(mpf, &t->q_meta, 0, &meta)) != 0) goto err; + if (flags == DB_FAST_STAT || flags == DB_CACHED_COUNTS) { + sp->qs_nkeys = meta->dbmeta.key_count; + sp->qs_ndata = meta->dbmeta.record_count; + goto meta_only; + } + first = QAM_RECNO_PAGE(dbp, meta->first_recno); last = QAM_RECNO_PAGE(dbp, meta->cur_recno); - if ((ret = memp_fput(dbp->mpf, meta, 0)) != 0) + if ((ret = __memp_fput(mpf, meta, 0)) != 0) goto err; (void)__LPUT(dbc, lock); @@ -114,20 +98,23 @@ begin: /* Walk through the pages and count. */ for (; pgno <= stop; ++pgno) { if ((ret = - __db_lget(dbc, - 0, pgno, DB_LOCK_READ, 0, &lock)) != 0) + __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lock)) != 0) goto err; - ret = __qam_fget(dbp, &pgno, DB_MPOOL_EXTENT, &h); + ret = __qam_fget(dbp, &pgno, 0, &h); if (ret == ENOENT) { pgno += pg_ext - 1; continue; } - if (ret == EINVAL) { + if (ret == DB_PAGE_NOTFOUND) { + if (pg_ext == 0) { + if (pgno != stop && first != last) + goto err; + ret = 0; + break; + } pgno += pg_ext - ((pgno - 1) % pg_ext) - 1; continue; } - if (ret == EIO && first == last && pg_ext == 0) - break; if (ret != 0) goto err; @@ -147,6 +134,8 @@ begin: goto err; (void)__LPUT(dbc, lock); } + + (void)__LPUT(dbc, lock); if (first > last) { pgno = 1; stop = last; @@ -159,26 +148,28 @@ begin: 0, t->q_meta, F_ISSET(dbp, DB_AM_RDONLY) ? DB_LOCK_READ : DB_LOCK_WRITE, 0, &lock)) != 0) goto err; - if ((ret = memp_fget(dbp->mpf, &t->q_meta, 0, (PAGE **)&meta)) != 0) + if ((ret = __memp_fget(mpf, &t->q_meta, 0, &meta)) != 0) goto err; + if (!F_ISSET(dbp, DB_AM_RDONLY)) + meta->dbmeta.key_count = + meta->dbmeta.record_count = sp->qs_ndata; + sp->qs_nkeys = sp->qs_ndata; + +meta_only: /* Get the metadata fields. */ sp->qs_magic = meta->dbmeta.magic; sp->qs_version = meta->dbmeta.version; sp->qs_metaflags = meta->dbmeta.flags; sp->qs_pagesize = meta->dbmeta.pagesize; + sp->qs_extentsize = meta->page_ext; sp->qs_re_len = meta->re_len; sp->qs_re_pad = meta->re_pad; sp->qs_first_recno = meta->first_recno; sp->qs_cur_recno = meta->cur_recno; - sp->qs_nkeys = sp->qs_ndata; - if (!F_ISSET(dbp, DB_AM_RDONLY)) - meta->dbmeta.key_count = - meta->dbmeta.record_count = sp->qs_ndata; -done: /* Discard the meta-data page. */ - if ((ret = memp_fput(dbp->mpf, + if ((ret = __memp_fput(mpf, meta, F_ISSET(dbp, DB_AM_RDONLY) ? 0 : DB_MPOOL_DIRTY)) != 0) goto err; (void)__LPUT(dbc, lock); @@ -188,14 +179,10 @@ done: if (0) { err: if (sp != NULL) - __os_free(sp, sizeof(*sp)); + __os_ufree(dbp->dbenv, sp); } - if (lock.off != LOCK_INVALID) - (void)__LPUT(dbc, lock); - - if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) - ret = t_ret; + (void)__LPUT(dbc, lock); return (ret); } diff --git a/db/qam/qam_stub.c b/db/qam/qam_stub.c new file mode 100644 index 000000000..941aacfb4 --- /dev/null +++ b/db/qam/qam_stub.c @@ -0,0 +1,338 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996-2003 + * Sleepycat Software. All rights reserved. + */ +#include "db_config.h" + +#ifndef lint +static const char revid[] = "$Id: qam_stub.c,v 1.7 2003/10/28 18:52:34 bostic Exp $"; +#endif /* not lint */ + +#ifndef NO_SYSTEM_INCLUDES +#include <sys/types.h> +#endif + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/qam.h" + +/* + * If the library wasn't compiled with the Queue access method, various + * routines aren't available. Stub them here, returning an appropriate + * error. + */ + +/* + * __db_no_queue_am -- + * Error when a Berkeley DB build doesn't include the access method. + * + * PUBLIC: int __db_no_queue_am __P((DB_ENV *)); + */ +int +__db_no_queue_am(dbenv) + DB_ENV *dbenv; +{ + __db_err(dbenv, + "library build did not include support for the Queue access method"); + return (DB_OPNOTSUP); +} + +int +__db_prqueue(dbp, fp, flags) + DB *dbp; + FILE *fp; + u_int32_t flags; +{ + COMPQUIET(fp, NULL); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_31_qammeta(dbp, real_name, buf) + DB *dbp; + char *real_name; + u_int8_t *buf; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(buf, NULL); + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_32_qammeta(dbp, real_name, buf) + DB *dbp; + char *real_name; + u_int8_t *buf; +{ + COMPQUIET(real_name, NULL); + COMPQUIET(buf, NULL); + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_append(dbc, key, data) + DBC *dbc; + DBT *key, *data; +{ + COMPQUIET(key, NULL); + COMPQUIET(data, NULL); + return (__db_no_queue_am(dbc->dbp->dbenv)); +} + +int +__qam_c_dup(orig_dbc, new_dbc) + DBC *orig_dbc, *new_dbc; +{ + COMPQUIET(new_dbc, NULL); + return (__db_no_queue_am(orig_dbc->dbp->dbenv)); +} + +int +__qam_c_init(dbc) + DBC *dbc; +{ + return (__db_no_queue_am(dbc->dbp->dbenv)); +} + +int +__qam_db_close(dbp, flags) + DB *dbp; + u_int32_t flags; +{ + COMPQUIET(dbp, NULL); + COMPQUIET(flags, 0); + return (0); +} + +int +__qam_db_create(dbp) + DB *dbp; +{ + COMPQUIET(dbp, NULL); + return (0); +} + +int +__qam_extent_names(dbenv, name, namelistp) + DB_ENV *dbenv; + char *name; + char ***namelistp; +{ + COMPQUIET(name, NULL); + COMPQUIET(namelistp, NULL); + return (__db_no_queue_am(dbenv)); +} + +int +__qam_gen_filelist(dbp, filelistp) + DB *dbp; + QUEUE_FILELIST **filelistp; +{ + COMPQUIET(filelistp, NULL); + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_init_getpgnos(dbenv, dtabp, dtabsizep) + DB_ENV *dbenv; + int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); + size_t *dtabsizep; +{ + COMPQUIET(dbenv, NULL); + COMPQUIET(dtabp, NULL); + COMPQUIET(dtabsizep, NULL); + return (0); +} + +int +__qam_init_print(dbenv, dtabp, dtabsizep) + DB_ENV *dbenv; + int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); + size_t *dtabsizep; +{ + COMPQUIET(dbenv, NULL); + COMPQUIET(dtabp, NULL); + COMPQUIET(dtabsizep, NULL); + return (0); +} + +int +__qam_init_recover(dbenv, dtabp, dtabsizep) + DB_ENV *dbenv; + int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); + size_t *dtabsizep; +{ + COMPQUIET(dbenv, NULL); + COMPQUIET(dtabp, NULL); + COMPQUIET(dtabsizep, NULL); + return (0); +} + +int +__qam_metachk(dbp, name, qmeta) + DB *dbp; + const char *name; + QMETA *qmeta; +{ + COMPQUIET(name, NULL); + COMPQUIET(qmeta, NULL); + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_new_file(dbp, txn, fhp, name) + DB *dbp; + DB_TXN *txn; + DB_FH *fhp; + const char *name; +{ + COMPQUIET(txn, NULL); + COMPQUIET(fhp, NULL); + COMPQUIET(name, NULL); + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_open(dbp, txn, name, base_pgno, mode, flags) + DB *dbp; + DB_TXN *txn; + const char *name; + db_pgno_t base_pgno; + int mode; + u_int32_t flags; +{ + COMPQUIET(txn, NULL); + COMPQUIET(name, NULL); + COMPQUIET(base_pgno, 0); + COMPQUIET(mode, 0); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_pgin_out(dbenv, pg, pp, cookie) + DB_ENV *dbenv; + db_pgno_t pg; + void *pp; + DBT *cookie; +{ + COMPQUIET(pg, 0); + COMPQUIET(pp, NULL); + COMPQUIET(cookie, NULL); + return (__db_no_queue_am(dbenv)); +} + +int +__qam_salvage(dbp, vdp, pgno, h, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + PAGE *h; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(h, NULL); + COMPQUIET(handle, NULL); + COMPQUIET(callback, NULL); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_set_ext_data(dbp, name) + DB *dbp; + const char *name; +{ + COMPQUIET(name, NULL); + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_stat(dbc, spp, flags) + DBC *dbc; + void *spp; + u_int32_t flags; +{ + COMPQUIET(spp, NULL); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbc->dbp->dbenv)); +} + +int +__qam_sync(dbp) + DB *dbp; +{ + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_truncate(dbc, countp) + DBC *dbc; + u_int32_t *countp; +{ + COMPQUIET(dbc, NULL); + COMPQUIET(countp, NULL); + return (__db_no_queue_am(dbc->dbp->dbenv)); +} + +int +__qam_vrfy_data(dbp, vdp, h, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + QPAGE *h; + db_pgno_t pgno; + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(h, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_vrfy_meta(dbp, vdp, meta, pgno, flags) + DB *dbp; + VRFY_DBINFO *vdp; + QMETA *meta; + db_pgno_t pgno; + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(meta, NULL); + COMPQUIET(pgno, 0); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_vrfy_structure(dbp, vdp, flags) + DB *dbp; + VRFY_DBINFO *vdp; + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->dbenv)); +} + +int +__qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + COMPQUIET(vdp, NULL); + COMPQUIET(handle, NULL); + COMPQUIET(callback, NULL); + COMPQUIET(flags, 0); + return (__db_no_queue_am(dbp->dbenv)); +} diff --git a/db/qam/qam_upgrade.c b/db/qam/qam_upgrade.c index f49bfe88d..b10b4696a 100644 --- a/db/qam/qam_upgrade.c +++ b/db/qam/qam_upgrade.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: qam_upgrade.c,v 11.7 2000/11/30 00:58:44 ubell Exp $"; +static const char revid[] = "$Id: qam_upgrade.c,v 11.14 2003/01/08 05:37:44 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,10 +18,9 @@ static const char revid[] = "$Id: qam_upgrade.c,v 11.7 2000/11/30 00:58:44 ubell #endif #include "db_int.h" -#include "db_page.h" -#include "db_swap.h" -#include "db_am.h" -#include "db_upgrade.h" +#include "dbinc/db_upgrade.h" +#include "dbinc/db_page.h" +#include "dbinc/qam.h" /* * __qam_31_qammeta -- diff --git a/db/qam/qam_verify.c b/db/qam/qam_verify.c index a9a467d67..ddbc9525f 100644 --- a/db/qam/qam_verify.c +++ b/db/qam/qam_verify.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: qam_verify.c,v 1.17 2000/12/12 17:39:35 bostic Exp $"; +static const char revid[] = "$Id: qam_verify.c,v 1.45 2003/08/12 19:51:55 ubell Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -17,10 +17,14 @@ static const char revid[] = "$Id: qam_verify.c,v 1.17 2000/12/12 17:39:35 bostic #endif #include "db_int.h" -#include "db_page.h" -#include "db_verify.h" -#include "qam.h" -#include "db_ext.h" +#include "dbinc/db_page.h" +#include "dbinc/db_verify.h" +#include "dbinc/db_am.h" +#include "dbinc/db_shash.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +#include <stdlib.h> +#include <string.h> /* * __qam_vrfy_meta -- @@ -37,8 +41,19 @@ __qam_vrfy_meta(dbp, vdp, meta, pgno, flags) db_pgno_t pgno; u_int32_t flags; { + DB_ENV *dbenv; + QUEUE *qp; VRFY_PAGEINFO *pip; - int isbad, ret, t_ret; + db_pgno_t *extents, extid, first, last; + size_t len; + int count, i, isbad, nextents, ret, t_ret; + char *buf, **names; + + dbenv = dbp->dbenv; + first = last = 0; + buf = NULL; + names = NULL; + qp = (QUEUE *)dbp->q_internal; if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) return (ret); @@ -49,39 +64,121 @@ __qam_vrfy_meta(dbp, vdp, meta, pgno, flags) * something very odd is going on. */ if (!F_ISSET(pip, VRFY_INCOMPLETE)) - EPRINT((dbp->dbenv, "Queue databases must be one-per-file.")); + EPRINT((dbenv, "Page %lu: queue databases must be one-per-file", + (u_long)pgno)); /* - * cur_recno/rec_page - * Cur_recno may be one beyond the end of the page and - * we start numbering from 1. + * Because the metapage pointers are rolled forward by + * aborting transactions, the extent of the queue may + * extend beyond the allocated pages, so we do + * not check that meta_current is within the allocated + * pages. */ - if (vdp->last_pgno > 0 && meta->cur_recno > 0 && - meta->cur_recno - 1 > meta->rec_page * vdp->last_pgno) { - EPRINT((dbp->dbenv, - "Current recno %lu references record past last page number %lu", - meta->cur_recno, vdp->last_pgno)); - isbad = 1; - } /* * re_len: If this is bad, we can't safely verify queue data pages, so * return DB_VERIFY_FATAL */ if (ALIGN(meta->re_len + sizeof(QAMDATA) - 1, sizeof(u_int32_t)) * - meta->rec_page + sizeof(QPAGE) > dbp->pgsize) { - EPRINT((dbp->dbenv, - "Queue record length %lu impossibly high for page size and records per page", - meta->re_len)); + meta->rec_page + QPAGE_SZ(dbp) > dbp->pgsize) { + EPRINT((dbenv, + "Page %lu: queue record length %lu too high for page size and recs/page", + (u_long)pgno, (u_long)meta->re_len)); ret = DB_VERIFY_FATAL; goto err; } else { - vdp->re_len = meta->re_len; - vdp->rec_page = meta->rec_page; + /* + * We initialize the Queue internal pointer; we may need + * it when handling extents. It would get set up in open, + * if we called open normally, but we don't. + */ + qp->re_len = vdp->re_len = meta->re_len; + qp->rec_page = vdp->rec_page = meta->rec_page; + qp->page_ext = vdp->page_ext = meta->page_ext; } -err: if ((t_ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0) + /* + * There's no formal maximum extentsize, and a 0 value represents + * no extents, so there's nothing to verify. + * + * Note that since QUEUE databases can't have subdatabases, it's an + * error to see more than one QUEUE metadata page in a single + * verifier run. Theoretically, this should really be a structure + * rather than a per-page check, but since we're setting qp fields + * here (and have only one qp to set) we raise the alarm now if + * this assumption fails. (We need the qp info to be reasonable + * before we do per-page verification of queue extents.) + */ + if (F_ISSET(vdp, VRFY_QMETA_SET)) { + isbad = 1; + EPRINT((dbenv, + "Page %lu: database contains multiple Queue metadata pages", + (u_long)pgno)); + goto err; + } + F_SET(vdp, VRFY_QMETA_SET); + qp->page_ext = meta->page_ext; + dbp->pgsize = meta->dbmeta.pagesize; + qp->q_meta = pgno; + qp->q_root = pgno + 1; + vdp->first_recno = meta->first_recno; + vdp->last_recno = meta->cur_recno; + if (qp->page_ext != 0) { + first = QAM_RECNO_EXTENT(dbp, vdp->first_recno); + last = QAM_RECNO_EXTENT(dbp, vdp->last_recno); + } + + /* + * Look in the data directory to see if there are any extents + * around that are not in the range of the queue. If so, + * then report that and look there if we are salvaging. + */ + + if ((ret = __db_appname(dbenv, + DB_APP_DATA, qp->dir, 0, NULL, &buf)) != 0) + goto err; + if ((ret = __os_dirlist(dbenv, buf, &names, &count)) != 0) + goto err; + __os_free(dbenv, buf); + buf = NULL; + + len = strlen(QUEUE_EXTENT_HEAD) + strlen(qp->name) + 1; + if ((ret = __os_malloc(dbenv, len, &buf)) != 0) + goto err; + len = snprintf(buf, len, QUEUE_EXTENT_HEAD, qp->name); + nextents = 0; + extents = NULL; + for (i = 0; i < count; i++) { + if (strncmp(names[i], buf, len) == 0) { + /* Only save extents out of bounds. */ + extid = atoi(&names[i][len]); + if (qp->page_ext != 0 && + (last > first ? + (extid >= first && extid <= last) : + (extid >= first || extid <= last))) + continue; + if (extents == NULL && + (ret = __os_malloc(dbenv, + (count - i) * sizeof(extid), &extents)) != 0) + goto err; + extents[nextents] = extid; + nextents++; + } + } + if (nextents > 0) + __db_err(dbenv, + "Warning: %d extra extent files found", nextents); + vdp->nextents = nextents; + vdp->extents = extents; + +err: if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0) ret = t_ret; + if (names != NULL) + __os_dirfree(dbenv, names, count); + if (buf != NULL) + __os_free(dbenv, buf); + if (ret != 0 && extents != NULL) + __os_free(dbenv, extents); return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret); } @@ -104,7 +201,6 @@ __qam_vrfy_data(dbp, vdp, h, pgno, flags) struct __queue fakeq; QAMDATA *qp; db_recno_t i; - u_int8_t qflags; /* * Not much to do here, except make sure that flags are reasonable. @@ -114,23 +210,22 @@ __qam_vrfy_data(dbp, vdp, h, pgno, flags) * some gross games to fake it out. */ fakedb.q_internal = &fakeq; + fakedb.flags = dbp->flags; fakeq.re_len = vdp->re_len; for (i = 0; i < vdp->rec_page; i++) { qp = QAM_GET_RECORD(&fakedb, h, i); if ((u_int8_t *)qp >= (u_int8_t *)h + dbp->pgsize) { EPRINT((dbp->dbenv, - "Queue record %lu extends past end of page %lu", - i, pgno)); + "Page %lu: queue record %lu extends past end of page", + (u_long)pgno, (u_long)i)); return (DB_VERIFY_BAD); } - qflags = qp->flags; - qflags &= !(QAM_VALID | QAM_SET); - if (qflags != 0) { + if (qp->flags & ~(QAM_VALID | QAM_SET)) { EPRINT((dbp->dbenv, - "Queue record %lu on page %lu has bad flags", - i, pgno)); + "Page %lu: queue record %lu has bad flags (%#lx)", + (u_long)pgno, (u_long)i, (u_long)qp->flags)); return (DB_VERIFY_BAD); } } @@ -161,7 +256,8 @@ __qam_vrfy_structure(dbp, vdp, flags) if (pip->type != P_QAMMETA) { EPRINT((dbp->dbenv, - "Queue database has no meta page")); + "Page %lu: queue database has no meta page", + (u_long)PGNO_BASE_MD)); isbad = 1; goto err; } @@ -174,21 +270,251 @@ __qam_vrfy_structure(dbp, vdp, flags) if (!LF_ISSET(DB_SALVAGE)) __db_vrfy_struct_feedback(dbp, vdp); - if ((ret = __db_vrfy_putpageinfo(vdp, pip)) != 0 || + if ((ret = __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 || (ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0) return (ret); if (!F_ISSET(pip, VRFY_IS_ALLZEROES) && pip->type != P_QAMDATA) { EPRINT((dbp->dbenv, - "Queue database page %lu of incorrect type %lu", - i, pip->type)); + "Page %lu: queue database page of incorrect type %lu", + (u_long)i, (u_long)pip->type)); isbad = 1; goto err; } else if ((ret = __db_vrfy_pgset_inc(vdp->pgset, i)) != 0) goto err; } -err: if ((ret = __db_vrfy_putpageinfo(vdp, pip)) != 0) +err: if ((ret = __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0) return (ret); return (isbad == 1 ? DB_VERIFY_BAD : 0); } + +/* + * __qam_vrfy_walkqueue -- + * Do a "walkpages" per-page verification pass over the set of Queue + * extent pages. + * + * PUBLIC: int __qam_vrfy_walkqueue __P((DB *, VRFY_DBINFO *, void *, + * PUBLIC: int (*)(void *, const void *), u_int32_t)); + */ +int +__qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + DB_ENV *dbenv; + PAGE *h; + QUEUE *qp; + VRFY_PAGEINFO *pip; + db_pgno_t first, i, last, pg_ext, stop; + int isbad, nextents, ret, t_ret; + + isbad = ret = t_ret = 0; + + pip = NULL; + dbenv = dbp->dbenv; + qp = dbp->q_internal; + + pg_ext = qp->page_ext; + + /* If this database has no extents, we've seen all the pages already. */ + if (pg_ext == 0) + return (0); + + first = QAM_RECNO_PAGE(dbp, vdp->first_recno); + last = QAM_RECNO_PAGE(dbp, vdp->last_recno); + + i = first; + if (first > last) + stop = QAM_RECNO_PAGE(dbp, UINT32_T_MAX); + else + stop = last; + nextents = vdp->nextents; + + /* Verify/salvage each page. */ +begin: + for (; i <= stop; i++) { + /* + * If DB_SALVAGE is set, we inspect our database of completed + * pages, and skip any we've already printed in the subdb pass. + */ + if (LF_ISSET(DB_SALVAGE) && (__db_salvage_isdone(vdp, i) != 0)) + continue; + if ((t_ret = __qam_fget(dbp, &i, 0, &h)) != 0) { + if (t_ret == ENOENT || t_ret == DB_PAGE_NOTFOUND) { + i += pg_ext - ((i - 1) % pg_ext) - 1; + continue; + } + + /* + * If an individual page get fails, keep going iff + * we're salvaging. + */ + if (LF_ISSET(DB_SALVAGE)) { + if (ret == 0) + ret = t_ret; + continue; + } else + return (t_ret); + } + + if (LF_ISSET(DB_SALVAGE)) { + /* + * We pretty much don't want to quit unless a + * bomb hits. May as well return that something + * was screwy, however. + */ + if ((t_ret = __db_salvage(dbp, + vdp, i, h, handle, callback, flags)) != 0) { + if (ret == 0) + ret = t_ret; + isbad = 1; + } + } else { + /* + * If we are not salvaging, and we get any error + * other than DB_VERIFY_BAD, return immediately; + * it may not be safe to proceed. If we get + * DB_VERIFY_BAD, keep going; listing more errors + * may make it easier to diagnose problems and + * determine the magnitude of the corruption. + */ + + if ((ret = __db_vrfy_common(dbp, + vdp, h, i, flags)) == DB_VERIFY_BAD) + isbad = 1; + else if (ret != 0) + goto err; + + __db_vrfy_struct_feedback(dbp, vdp); + + if ((ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0) + return (ret); + if (F_ISSET(pip, VRFY_IS_ALLZEROES)) + goto put; + if (pip->type != P_QAMDATA) { + EPRINT((dbenv, + "Page %lu: queue database page of incorrect type %lu", + (u_long)i, (u_long)pip->type)); + isbad = 1; + goto err; + } + if ((ret = + __db_vrfy_pgset_inc(vdp->pgset, i)) != 0) + goto err; + if ((ret = __qam_vrfy_data(dbp, vdp, + (QPAGE *)h, i, flags)) == DB_VERIFY_BAD) + isbad = 1; + else if (ret != 0) + goto err; + +put: if ((ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0) + goto err; + pip = NULL; + } + + /* Again, keep going iff we're salvaging. */ + if ((t_ret = __qam_fput(dbp, i, h, 0)) != 0) { + if (LF_ISSET(DB_SALVAGE)) { + if (ret == 0) + ret = t_ret; + continue; + } else + return (t_ret); + } + } + + if (first > last) { + i = 1; + stop = last; + first = last; + goto begin; + } + + /* + * Now check to see if there were any lingering + * extents and dump their data. + */ + + if (LF_ISSET(DB_SALVAGE) && nextents != 0) { + nextents--; + i = 1 + + vdp->extents[nextents] * vdp->page_ext; + stop = i + vdp->page_ext; + goto begin; + } + + if (0) { +err: if ((t_ret = __qam_fput(dbp, i, h, 0)) != 0) + return (ret == 0 ? t_ret : ret); + if (pip != NULL && + (t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0) + return (ret == 0 ? t_ret : ret); + } + return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret); +} + +/* + * __qam_salvage -- + * Safely dump out all recnos and data on a queue page. + * + * PUBLIC: int __qam_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, + * PUBLIC: void *, int (*)(void *, const void *), u_int32_t)); + */ +int +__qam_salvage(dbp, vdp, pgno, h, handle, callback, flags) + DB *dbp; + VRFY_DBINFO *vdp; + db_pgno_t pgno; + PAGE *h; + void *handle; + int (*callback) __P((void *, const void *)); + u_int32_t flags; +{ + DBT dbt, key; + QAMDATA *qp, *qep; + db_recno_t recno; + int ret, err_ret, t_ret; + u_int32_t pagesize, qlen; + u_int32_t i; + + memset(&dbt, 0, sizeof(DBT)); + memset(&key, 0, sizeof(DBT)); + + err_ret = ret = 0; + + pagesize = (u_int32_t)dbp->mpf->mfp->stat.st_pagesize; + qlen = ((QUEUE *)dbp->q_internal)->re_len; + dbt.size = qlen; + key.data = &recno; + key.size = sizeof(recno); + recno = (pgno - 1) * QAM_RECNO_PER_PAGE(dbp) + 1; + i = 0; + qep = (QAMDATA *)((u_int8_t *)h + pagesize - qlen); + for (qp = QAM_GET_RECORD(dbp, h, i); qp < qep; + recno++, i++, qp = QAM_GET_RECORD(dbp, h, i)) { + if (F_ISSET(qp, ~(QAM_VALID|QAM_SET))) + continue; + if (!F_ISSET(qp, QAM_SET)) + continue; + + if (!LF_ISSET(DB_AGGRESSIVE) && !F_ISSET(qp, QAM_VALID)) + continue; + + dbt.data = qp->data; + if ((ret = __db_prdbt(&key, + 0, " ", handle, callback, 1, vdp)) != 0) + err_ret = ret; + + if ((ret = __db_prdbt(&dbt, + 0, " ", handle, callback, 0, vdp)) != 0) + err_ret = ret; + } + + if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) + return (t_ret); + return ((ret == 0 && err_ret != 0) ? err_ret : ret); +} |