diff options
Diffstat (limited to 'db/db_dispatch.c')
-rw-r--r-- | db/db_dispatch.c | 953 |
1 files changed, 953 insertions, 0 deletions
diff --git a/db/db_dispatch.c b/db/db_dispatch.c new file mode 100644 index 0000000..65dc260 --- /dev/null +++ b/db/db_dispatch.c @@ -0,0 +1,953 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996-2009 Oracle. All rights reserved. + */ +/* + * Copyright (c) 1995, 1996 + * The President and Fellows of Harvard University. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/db_page.h" +#include "dbinc/hash.h" +#include "dbinc/fop.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __db_txnlist_find_internal __P((ENV *, DB_TXNHEAD *, + db_txnlist_type, u_int32_t, DB_TXNLIST **, + int, u_int32_t *)); + +/* + * __db_dispatch -- + * + * This is the transaction dispatch function used by the db access methods. + * It is designed to handle the record format used by all the access + * methods (the one automatically generated by the db_{h,log,read}.sh + * scripts in the tools directory). An application using a different + * recovery paradigm will supply a different dispatch function to txn_open. + * + * PUBLIC: int __db_dispatch __P((ENV *, + * PUBLIC: DB_DISTAB *, DBT *, DB_LSN *, db_recops, DB_TXNHEAD *)); + */ +int +__db_dispatch(env, dtab, db, lsnp, redo, info) + ENV *env; /* The environment. */ + DB_DISTAB *dtab; + DBT *db; /* The log record upon which to dispatch. */ + DB_LSN *lsnp; /* The lsn of the record being dispatched. */ + db_recops redo; /* Redo this op (or undo it). */ + DB_TXNHEAD *info; /* Transaction list. */ +{ + DB_ENV *dbenv; + DB_LSN prev_lsn; + u_int32_t rectype, status, txnid, urectype; + int make_call, ret; + + dbenv = env->dbenv; + LOGCOPY_32(env, &rectype, db->data); + LOGCOPY_32(env, &txnid, (u_int8_t *)db->data + sizeof(rectype)); + + make_call = ret = 0; + + /* If we don't have a dispatch table, it's hard to dispatch. */ + DB_ASSERT(env, dtab != NULL); + + /* + * If we find a record that is in the user's number space and they + * have specified a recovery routine, let them handle it. If they + * didn't specify a recovery routine, then we expect that they've + * followed all our rules and registered new recovery functions. + */ + switch (redo) { + case DB_TXN_ABORT: + case DB_TXN_APPLY: + case DB_TXN_PRINT: + make_call = 1; + break; + case DB_TXN_OPENFILES: + /* + * We collect all the transactions that have + * "begin" records, those with no previous LSN, + * so that we do not abort partial transactions. + * These are known to be undone, otherwise the + * log would not have been freeable. + */ + LOGCOPY_TOLSN(env, &prev_lsn, (u_int8_t *)db->data + + sizeof(rectype) + sizeof(txnid)); + if (txnid != 0 && prev_lsn.file == 0 && (ret = + __db_txnlist_add(env, info, txnid, TXN_OK, NULL)) != 0) + return (ret); + + /* FALLTHROUGH */ + case DB_TXN_POPENFILES: + if (rectype == DB___dbreg_register || + rectype == DB___txn_child || + rectype == DB___txn_ckp || rectype == DB___txn_recycle) + return ((dtab->int_dispatch[rectype])(env, + db, lsnp, redo, info)); + break; + case DB_TXN_BACKWARD_ROLL: + /* + * Running full recovery in the backward pass. In general, + * we only process records during this pass that belong + * to aborted transactions. Unfortunately, there are several + * exceptions: + * 1. If this is a meta-record, one not associated with + * a transaction, then we must always process it. + * 2. If this is a transaction commit/abort, we must + * always process it, so that we know the status of + * every transaction. + * 3. If this is a child commit, we need to process it + * because the outcome of the child transaction depends + * on the outcome of the parent. + * 4. If this is a dbreg_register record, we must always + * process is because they contain non-transactional + * closes that must be properly handled. + * 5. If this is a noop, we must always undo it so that we + * properly handle any aborts before a file was closed. + * 6. If this a file remove, we need to process it to + * determine if the on-disk file is the same as the + * one being described. + */ + switch (rectype) { + /* + * These either do not belong to a transaction or (regop) + * must be processed regardless of the status of the + * transaction. + */ + case DB___txn_regop: + case DB___txn_recycle: + case DB___txn_ckp: + make_call = 1; + break; + /* + * These belong to a transaction whose status must be + * checked. + */ + case DB___txn_child: + case DB___db_noop: + case DB___fop_file_remove: + case DB___dbreg_register: + make_call = 1; + + /* FALLTHROUGH */ + default: + if (txnid == 0) + break; + + ret = __db_txnlist_find(env, info, txnid, &status); + + /* If not found, this is an incomplete abort. */ + if (ret == DB_NOTFOUND) + return (__db_txnlist_add(env, + info, txnid, TXN_IGNORE, lsnp)); + if (ret != 0) + return (ret); + + /* + * If we ignore the transaction, ignore the operation + * UNLESS this is a child commit in which case we need + * to make sure that the child also gets marked as + * ignore. + */ + if (status == TXN_IGNORE && rectype != DB___txn_child) { + make_call = 0; + break; + } + if (status == TXN_COMMIT) + break; + + /* Set make_call in case we came through default */ + make_call = 1; + if (status == TXN_OK && + (ret = __db_txnlist_update(env, + info, txnid, rectype == DB___txn_prepare ? + TXN_PREPARE : TXN_ABORT, NULL, &status, 0)) != 0) + return (ret); + } + break; + case DB_TXN_FORWARD_ROLL: + /* + * In the forward pass, if we haven't seen the transaction, + * do nothing, else recover it. + * + * We need to always redo DB___db_noop records, so that we + * properly handle any commits after the file was closed. + */ + switch (rectype) { + case DB___txn_recycle: + case DB___txn_ckp: + case DB___db_noop: + case DB___dbreg_register: + make_call = 1; + break; + + default: + if (txnid == 0) + status = 0; + else { + ret = __db_txnlist_find(env, + info, txnid, &status); + + if (ret == DB_NOTFOUND) + /* Break out out of if clause. */ + ; + else if (ret != 0) + return (ret); + else if (status == TXN_COMMIT) { + make_call = 1; + break; + } + } + + } + break; + default: + return (__db_unknown_flag( + env, "__db_dispatch", (u_int32_t)redo)); + } + + if (make_call) { + /* + * If the debug flag is set then we are logging + * records for a non-durable update so that they + * may be examined for diagnostic purposes. + * So only make the call if we are printing, + * otherwise we need to extract the previous + * lsn so undo will work properly. + */ + if (rectype & DB_debug_FLAG) { + if (redo == DB_TXN_PRINT) + rectype &= ~DB_debug_FLAG; + else { + LOGCOPY_TOLSN(env, lsnp, + (u_int8_t *)db->data + + sizeof(rectype) + + sizeof(txnid)); + return (0); + } + } + if (rectype >= DB_user_BEGIN) { + if (dbenv->app_dispatch != NULL) + return (dbenv->app_dispatch(dbenv, + db, lsnp, redo)); + + /* No application-specific dispatch */ + urectype = rectype - DB_user_BEGIN; + if (urectype > dtab->ext_size || + dtab->ext_dispatch[urectype] == NULL) { + __db_errx(env, + "Illegal application-specific record type %lu in log", + (u_long)rectype); + return (EINVAL); + } + return ((dtab->ext_dispatch[urectype])(dbenv, + db, lsnp, redo)); + } else { + if (rectype > dtab->int_size || + dtab->int_dispatch[rectype] == NULL) { + __db_errx(env, + "Illegal record type %lu in log", + (u_long)rectype); + return (EINVAL); + } + return ((dtab->int_dispatch[rectype])(env, + db, lsnp, redo, info)); + } + } + + return (0); +} + +/* + * __db_add_recovery -- Add recovery functions to the dispatch table. + * + * We have two versions of this, an external one and an internal one, + * because application-specific functions take different arguments + * for dispatch (ENV versus DB_ENV). + * + * This is the external version. + * + * PUBLIC: int __db_add_recovery __P((DB_ENV *, DB_DISTAB *, + * PUBLIC: int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops), u_int32_t)); + */ +int +__db_add_recovery(dbenv, dtab, func, ndx) + DB_ENV *dbenv; + DB_DISTAB *dtab; + int (*func) __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); + u_int32_t ndx; +{ + size_t i, nsize; + int ret; + + /* Make sure this is an application-specific record. */ + if (ndx < DB_user_BEGIN) { + __db_errx(dbenv->env, + "Attempting to add application-specific record with invalid type %lu", + (u_long)ndx); + return (EINVAL); + } + ndx -= DB_user_BEGIN; + + /* Check if we have to grow the table. */ + if (ndx >= dtab->ext_size) { + nsize = ndx + 40; + if ((ret = + __os_realloc(dbenv->env, nsize * + sizeof((dtab->ext_dispatch)[0]), &dtab->ext_dispatch)) + != 0) + return (ret); + for (i = dtab->ext_size; i < nsize; ++i) + (dtab->ext_dispatch)[i] = NULL; + dtab->ext_size = nsize; + } + + (dtab->ext_dispatch)[ndx] = func; + return (0); +} + +/* + * __db_add_recovery_int -- + * + * Internal version of dispatch addition function. + * + * + * PUBLIC: int __db_add_recovery_int __P((ENV *, DB_DISTAB *, + * PUBLIC: int (*)(ENV *, DBT *, DB_LSN *, db_recops, void *), u_int32_t)); + */ +int +__db_add_recovery_int(env, dtab, func, ndx) + ENV *env; + DB_DISTAB *dtab; + int (*func) __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + u_int32_t ndx; +{ + size_t i, nsize; + int ret; + + if (ndx >= DB_user_BEGIN) { + __db_errx(env, + "Attempting to add internal record with invalid type %lu", + (u_long)ndx); + return (EINVAL); + } + + /* Check if we have to grow the table. */ + if (ndx >= dtab->int_size) { + nsize = ndx + 40; + if ((ret = + __os_realloc(env, nsize * sizeof((dtab->int_dispatch)[0]), + &dtab->int_dispatch)) != 0) + return (ret); + for (i = dtab->int_size; i < nsize; ++i) + (dtab->int_dispatch)[i] = NULL; + dtab->int_size = nsize; + } + + (dtab->int_dispatch)[ndx] = func; + return (0); +} + +/* + * __db_txnlist_init -- + * Initialize transaction linked list. + * + * PUBLIC: int __db_txnlist_init __P((ENV *, DB_THREAD_INFO *, + * PUBLIC: u_int32_t, u_int32_t, DB_LSN *, DB_TXNHEAD **)); + */ +int +__db_txnlist_init(env, ip, low_txn, hi_txn, trunc_lsn, retp) + ENV *env; + DB_THREAD_INFO *ip; + u_int32_t low_txn, hi_txn; + DB_LSN *trunc_lsn; + DB_TXNHEAD **retp; +{ + DB_TXNHEAD *headp; + u_int32_t size, tmp; + int ret; + + /* + * Size a hash table. + * If low is zero then we are being called during rollback + * and we need only one slot. + * Hi maybe lower than low if we have recycled txnid's. + * The numbers here are guesses about txn density, we can afford + * to look at a few entries in each slot. + */ + if (low_txn == 0) + size = 1; + else { + if (hi_txn < low_txn) { + tmp = hi_txn; + hi_txn = low_txn; + low_txn = tmp; + } + tmp = hi_txn - low_txn; + /* See if we wrapped around. */ + if (tmp > (TXN_MAXIMUM - TXN_MINIMUM) / 2) + tmp = (low_txn - TXN_MINIMUM) + (TXN_MAXIMUM - hi_txn); + size = tmp / 5; + if (size < 100) + size = 100; + } + if ((ret = __os_malloc(env, + sizeof(DB_TXNHEAD) + size * sizeof(headp->head), &headp)) != 0) + return (ret); + + memset(headp, 0, sizeof(DB_TXNHEAD) + size * sizeof(headp->head)); + headp->maxid = hi_txn; + headp->generation = 0; + headp->nslots = size; + headp->gen_alloc = 8; + headp->thread_info = ip; + if ((ret = __os_malloc(env, headp->gen_alloc * + sizeof(headp->gen_array[0]), &headp->gen_array)) != 0) { + __os_free(env, headp); + return (ret); + } + headp->gen_array[0].generation = 0; + headp->gen_array[0].txn_min = TXN_MINIMUM; + headp->gen_array[0].txn_max = TXN_MAXIMUM; + if (trunc_lsn != NULL) { + headp->trunc_lsn = *trunc_lsn; + headp->maxlsn = *trunc_lsn; + } else { + ZERO_LSN(headp->trunc_lsn); + ZERO_LSN(headp->maxlsn); + } + ZERO_LSN(headp->ckplsn); + + *retp = headp; + return (0); +} + +#define FIND_GENERATION(hp, txnid, gen) do { \ + u_int32_t __i; \ + for (__i = 0; __i <= (hp)->generation; __i++) \ + /* The range may wrap around the end. */ \ + if ((hp)->gen_array[__i].txn_min < \ + (hp)->gen_array[__i].txn_max ? \ + ((txnid) >= (hp)->gen_array[__i].txn_min && \ + (txnid) <= (hp)->gen_array[__i].txn_max) : \ + ((txnid) >= (hp)->gen_array[__i].txn_min || \ + (txnid) <= (hp)->gen_array[__i].txn_max)) \ + break; \ + DB_ASSERT(env, __i <= (hp)->generation); \ + gen = (hp)->gen_array[__i].generation; \ +} while (0) + +/* + * __db_txnlist_add -- + * Add an element to our transaction linked list. + * + * PUBLIC: int __db_txnlist_add __P((ENV *, + * PUBLIC: DB_TXNHEAD *, u_int32_t, u_int32_t, DB_LSN *)); + */ +int +__db_txnlist_add(env, hp, txnid, status, lsn) + ENV *env; + DB_TXNHEAD *hp; + u_int32_t txnid, status; + DB_LSN *lsn; +{ + DB_TXNLIST *elp; + int ret; + + if ((ret = __os_malloc(env, sizeof(DB_TXNLIST), &elp)) != 0) + return (ret); + + LIST_INSERT_HEAD(&hp->head[DB_TXNLIST_MASK(hp, txnid)], elp, links); + + /* Find the most recent generation containing this ID */ + FIND_GENERATION(hp, txnid, elp->u.t.generation); + elp->type = TXNLIST_TXNID; + elp->u.t.txnid = txnid; + elp->u.t.status = status; + if (txnid > hp->maxid) + hp->maxid = txnid; + if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT) + hp->maxlsn = *lsn; + + DB_ASSERT(env, lsn == NULL || + status != TXN_COMMIT || LOG_COMPARE(&hp->maxlsn, lsn) >= 0); + + return (0); +} + +/* + * __db_txnlist_remove -- + * Remove an element from our transaction linked list. + * + * PUBLIC: int __db_txnlist_remove __P((ENV *, DB_TXNHEAD *, u_int32_t)); + */ +int +__db_txnlist_remove(env, hp, txnid) + ENV *env; + DB_TXNHEAD *hp; + u_int32_t txnid; +{ + DB_TXNLIST *entry; + u_int32_t status; + + return (__db_txnlist_find_internal(env, + hp, TXNLIST_TXNID, txnid, &entry, 1, &status)); +} + +/* + * __db_txnlist_ckp -- + * Used to record the maximum checkpoint that will be retained + * after recovery. Typically this is simply the max checkpoint, but + * if we are doing client replication recovery or timestamp-based + * recovery, we are going to virtually truncate the log and we need + * to retain the last checkpoint before the truncation point. + * + * PUBLIC: void __db_txnlist_ckp __P((ENV *, DB_TXNHEAD *, DB_LSN *)); + */ +void +__db_txnlist_ckp(env, hp, ckp_lsn) + ENV *env; + DB_TXNHEAD *hp; + DB_LSN *ckp_lsn; +{ + + COMPQUIET(env, NULL); + + if (IS_ZERO_LSN(hp->ckplsn) && !IS_ZERO_LSN(hp->maxlsn) && + LOG_COMPARE(&hp->maxlsn, ckp_lsn) >= 0) + hp->ckplsn = *ckp_lsn; +} + +/* + * __db_txnlist_end -- + * Discard transaction linked list. + * + * PUBLIC: void __db_txnlist_end __P((ENV *, DB_TXNHEAD *)); + */ +void +__db_txnlist_end(env, hp) + ENV *env; + DB_TXNHEAD *hp; +{ + u_int32_t i; + DB_TXNLIST *p; + + if (hp == NULL) + return; + + for (i = 0; i < hp->nslots; i++) + while (hp != NULL && (p = LIST_FIRST(&hp->head[i])) != NULL) { + switch (p->type) { + case TXNLIST_LSN: + __os_free(env, p->u.l.lsn_stack); + break; + case TXNLIST_DELETE: + case TXNLIST_TXNID: + default: + /* + * Possibly an incomplete DB_TXNLIST; just + * free it. + */ + break; + } + LIST_REMOVE(p, links); + __os_free(env, p); + } + + if (hp->gen_array != NULL) + __os_free(env, hp->gen_array); + __os_free(env, hp); +} + +/* + * __db_txnlist_find -- + * Checks to see if a txnid with the current generation is in the + * txnid list. This returns DB_NOTFOUND if the item isn't in the + * list otherwise it returns (like __db_txnlist_find_internal) + * the status of the transaction. A txnid of 0 means the record + * was generated while not in a transaction. + * + * PUBLIC: int __db_txnlist_find __P((ENV *, + * PUBLIC: DB_TXNHEAD *, u_int32_t, u_int32_t *)); + */ +int +__db_txnlist_find(env, hp, txnid, statusp) + ENV *env; + DB_TXNHEAD *hp; + u_int32_t txnid, *statusp; +{ + DB_TXNLIST *entry; + + if (txnid == 0) + return (DB_NOTFOUND); + + return (__db_txnlist_find_internal(env, hp, + TXNLIST_TXNID, txnid, &entry, 0, statusp)); +} + +/* + * __db_txnlist_update -- + * Change the status of an existing transaction entry. + * Returns DB_NOTFOUND if no such entry exists. + * + * PUBLIC: int __db_txnlist_update __P((ENV *, DB_TXNHEAD *, + * PUBLIC: u_int32_t, u_int32_t, DB_LSN *, u_int32_t *, int)); + */ +int +__db_txnlist_update(env, hp, txnid, status, lsn, ret_status, add_ok) + ENV *env; + DB_TXNHEAD *hp; + u_int32_t txnid, status; + DB_LSN *lsn; + u_int32_t *ret_status; + int add_ok; +{ + DB_TXNLIST *elp; + int ret; + + if (txnid == 0) + return (DB_NOTFOUND); + + ret = __db_txnlist_find_internal(env, + hp, TXNLIST_TXNID, txnid, &elp, 0, ret_status); + + if (ret == DB_NOTFOUND && add_ok) { + *ret_status = status; + return (__db_txnlist_add(env, hp, txnid, status, lsn)); + } + if (ret != 0) + return (ret); + + if (*ret_status == TXN_IGNORE) + return (0); + + elp->u.t.status = status; + + if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT) + hp->maxlsn = *lsn; + + return (ret); +} + +/* + * __db_txnlist_find_internal -- + * Find an entry on the transaction list. If the entry is not there or + * the list pointer is not initialized we return DB_NOTFOUND. If the + * item is found, we return the status. Currently we always call this + * with an initialized list pointer but checking for NULL keeps it general. + */ +static int +__db_txnlist_find_internal(env, + hp, type, txnid, txnlistp, delete, statusp) + ENV *env; + DB_TXNHEAD *hp; + db_txnlist_type type; + u_int32_t txnid; + DB_TXNLIST **txnlistp; + int delete; + u_int32_t *statusp; +{ + struct __db_headlink *head; + DB_TXNLIST *p; + u_int32_t generation, hash; + int ret; + + ret = 0; + + if (hp == NULL) + return (DB_NOTFOUND); + + switch (type) { + case TXNLIST_TXNID: + hash = txnid; + FIND_GENERATION(hp, txnid, generation); + break; + case TXNLIST_DELETE: + case TXNLIST_LSN: + default: + return (__env_panic(env, EINVAL)); + } + + head = &hp->head[DB_TXNLIST_MASK(hp, hash)]; + LIST_FOREACH(p, head, links) { + if (p->type != type) + continue; + switch (type) { + case TXNLIST_TXNID: + if (p->u.t.txnid != txnid || + generation != p->u.t.generation) + continue; + *statusp = p->u.t.status; + break; + + case TXNLIST_DELETE: + case TXNLIST_LSN: + default: + return (__env_panic(env, EINVAL)); + } + if (delete == 1) { + LIST_REMOVE(p, links); + __os_free(env, p); + *txnlistp = NULL; + } else if (p != LIST_FIRST(head)) { + /* Move it to head of list. */ + LIST_REMOVE(p, links); + LIST_INSERT_HEAD(head, p, links); + *txnlistp = p; + } else + *txnlistp = p; + return (ret); + } + + return (DB_NOTFOUND); +} + +/* + * __db_txnlist_gen -- + * Change the current generation number. + * + * PUBLIC: int __db_txnlist_gen __P((ENV *, + * PUBLIC: DB_TXNHEAD *, int, u_int32_t, u_int32_t)); + */ +int +__db_txnlist_gen(env, hp, incr, min, max) + ENV *env; + DB_TXNHEAD *hp; + int incr; + u_int32_t min, max; +{ + int ret; + + /* + * During recovery generation numbers keep track of "restart" + * checkpoints and recycle records. Restart checkpoints occur + * whenever we take a checkpoint and there are no outstanding + * transactions. When that happens, we can reset transaction IDs + * back to TXNID_MINIMUM. Currently we only do the reset + * at then end of recovery. Recycle records occur when txnids + * are exhausted during runtime. A free range of ids is identified + * and logged. This code maintains a stack of ranges. A txnid + * is given the generation number of the first range it falls into + * in the stack. + */ + if (incr < 0) { + --hp->generation; + memmove(hp->gen_array, &hp->gen_array[1], + (hp->generation + 1) * sizeof(hp->gen_array[0])); + } else { + ++hp->generation; + if (hp->generation >= hp->gen_alloc) { + hp->gen_alloc *= 2; + if ((ret = __os_realloc(env, hp->gen_alloc * + sizeof(hp->gen_array[0]), &hp->gen_array)) != 0) + return (ret); + } + memmove(&hp->gen_array[1], &hp->gen_array[0], + hp->generation * sizeof(hp->gen_array[0])); + hp->gen_array[0].generation = hp->generation; + hp->gen_array[0].txn_min = min; + hp->gen_array[0].txn_max = max; + } + return (0); +} + +/* + * __db_txnlist_lsnadd -- + * Save the prev_lsn from a txn_child record. + * + * PUBLIC: int __db_txnlist_lsnadd __P((ENV *, DB_TXNHEAD *, DB_LSN *)); + */ +int +__db_txnlist_lsnadd(env, hp, lsnp) + ENV *env; + DB_TXNHEAD *hp; + DB_LSN *lsnp; +{ + DB_TXNLIST *elp; + int ret; + + if (IS_ZERO_LSN(*lsnp)) + return (0); + + LIST_FOREACH(elp, &hp->head[0], links) + if (elp->type == TXNLIST_LSN) + break; + + if (elp == NULL) { + if ((ret = __db_txnlist_lsninit(env, hp, lsnp)) != 0) + return (ret); + return (DB_SURPRISE_KID); + } + + if (elp->u.l.stack_indx == elp->u.l.stack_size) { + elp->u.l.stack_size <<= 1; + if ((ret = __os_realloc(env, sizeof(DB_LSN) * + elp->u.l.stack_size, &elp->u.l.lsn_stack)) != 0) { + __db_txnlist_end(env, hp); + return (ret); + } + } + elp->u.l.lsn_stack[elp->u.l.stack_indx++] = *lsnp; + + return (0); +} + +/* + * __db_txnlist_lsnget -- + * + * PUBLIC: int __db_txnlist_lsnget __P((ENV *, + * PUBLIC: DB_TXNHEAD *, DB_LSN *, u_int32_t)); + * Get the lsn saved from a txn_child record. + */ +int +__db_txnlist_lsnget(env, hp, lsnp, flags) + ENV *env; + DB_TXNHEAD *hp; + DB_LSN *lsnp; + u_int32_t flags; +{ + DB_TXNLIST *elp; + + COMPQUIET(env, NULL); + COMPQUIET(flags, 0); + + LIST_FOREACH(elp, &hp->head[0], links) + if (elp->type == TXNLIST_LSN) + break; + + if (elp == NULL || elp->u.l.stack_indx == 0) { + ZERO_LSN(*lsnp); + return (0); + } + + *lsnp = elp->u.l.lsn_stack[--elp->u.l.stack_indx]; + + return (0); +} + +/* + * __db_txnlist_lsninit -- + * Initialize a transaction list with an lsn array entry. + * + * PUBLIC: int __db_txnlist_lsninit __P((ENV *, DB_TXNHEAD *, DB_LSN *)); + */ +int +__db_txnlist_lsninit(env, hp, lsnp) + ENV *env; + DB_TXNHEAD *hp; + DB_LSN *lsnp; +{ + DB_TXNLIST *elp; + int ret; + + elp = NULL; + + if ((ret = __os_malloc(env, sizeof(DB_TXNLIST), &elp)) != 0) + goto err; + LIST_INSERT_HEAD(&hp->head[0], elp, links); + + elp->type = TXNLIST_LSN; + if ((ret = __os_malloc(env, + sizeof(DB_LSN) * DB_LSN_STACK_SIZE, &elp->u.l.lsn_stack)) != 0) + goto err; + elp->u.l.stack_indx = 1; + elp->u.l.stack_size = DB_LSN_STACK_SIZE; + elp->u.l.lsn_stack[0] = *lsnp; + + return (0); + +err: __db_txnlist_end(env, hp); + return (ret); +} + +#ifdef DEBUG +/* + * __db_txnlist_print -- + * Print out the transaction list. + * + * PUBLIC: void __db_txnlist_print __P((DB_TXNHEAD *)); + */ +void +__db_txnlist_print(hp) + DB_TXNHEAD *hp; +{ + DB_TXNLIST *p; + u_int32_t i; + char *txntype; + + printf("Maxid: %lu Generation: %lu\n", + (u_long)hp->maxid, (u_long)hp->generation); + for (i = 0; i < hp->nslots; i++) + LIST_FOREACH(p, &hp->head[i], links) { + if (p->type != TXNLIST_TXNID) { + printf("Unrecognized type: %d\n", p->type); + continue; + } + switch (p->u.t.status) { + case TXN_OK: + txntype = "OK"; + break; + case TXN_COMMIT: + txntype = "commit"; + break; + case TXN_PREPARE: + txntype = "prepare"; + break; + case TXN_ABORT: + txntype = "abort"; + break; + case TXN_IGNORE: + txntype = "ignore"; + break; + case TXN_EXPECTED: + txntype = "expected"; + break; + case TXN_UNEXPECTED: + txntype = "unexpected"; + break; + default: + txntype = "UNKNOWN"; + break; + } + printf("TXNID: %lx(%lu): %s\n", + (u_long)p->u.t.txnid, + (u_long)p->u.t.generation, txntype); + } +} +#endif |