diff options
author | jbj <devnull@localhost> | 2003-12-15 21:42:09 +0000 |
---|---|---|
committer | jbj <devnull@localhost> | 2003-12-15 21:42:09 +0000 |
commit | 8960e3895f7af91126465368dff8fbb36ab4e853 (patch) | |
tree | 3c515e39dde0e88edeb806ea87d08524ba25c761 /db/lock/lock_deadlock.c | |
parent | 752cac72e220dcad4e6fce39508e714e59e3e0a1 (diff) | |
download | librpm-tizen-8960e3895f7af91126465368dff8fbb36ab4e853.tar.gz librpm-tizen-8960e3895f7af91126465368dff8fbb36ab4e853.tar.bz2 librpm-tizen-8960e3895f7af91126465368dff8fbb36ab4e853.zip |
- upgrade to db-4.2.52.
CVS patchset: 6972
CVS date: 2003/12/15 21:42:09
Diffstat (limited to 'db/lock/lock_deadlock.c')
-rw-r--r-- | db/lock/lock_deadlock.c | 663 |
1 files changed, 479 insertions, 184 deletions
diff --git a/db/lock/lock_deadlock.c b/db/lock/lock_deadlock.c index 1f37db389..d7cf5e0b7 100644 --- a/db/lock/lock_deadlock.c +++ b/db/lock/lock_deadlock.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: lock_deadlock.c,v 11.23 2000/12/08 20:15:31 ubell Exp $"; +static const char revid[] = "$Id: lock_deadlock.c,v 11.66 2003/11/19 19:59:02 ubell Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -17,19 +17,11 @@ static const char revid[] = "$Id: lock_deadlock.c,v 11.23 2000/12/08 20:15:31 ub #include <string.h> #endif -#ifdef HAVE_RPC -#include "db_server.h" -#endif - #include "db_int.h" -#include "db_shash.h" -#include "lock.h" -#include "txn.h" - -#ifdef HAVE_RPC -#include "gen_client_ext.h" -#include "rpc_client_ext.h" -#endif +#include "dbinc/db_shash.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" #define ISSET_MAP(M, N) ((M)[(N) / 32] & (1 << (N) % 32)) @@ -51,159 +43,273 @@ static const char revid[] = "$Id: lock_deadlock.c,v 11.23 2000/12/08 20:15:31 ub typedef struct { int valid; + int self_wait; + int in_abort; + u_int32_t count; u_int32_t id; u_int32_t last_lock; + ssize_t last_obj; u_int32_t last_locker_id; db_pgno_t pgno; } locker_info; -static int __dd_abort __P((DB_ENV *, locker_info *)); -static int __dd_build - __P((DB_ENV *, u_int32_t **, u_int32_t *, locker_info **)); -static int __dd_find - __P((DB_ENV *,u_int32_t *, locker_info *, u_int32_t, u_int32_t ***)); +static int __dd_abort __P((DB_ENV *, locker_info *)); +static int __dd_build __P((DB_ENV *, + u_int32_t, u_int32_t **, u_int32_t *, u_int32_t *, locker_info **)); +static int __dd_find __P((DB_ENV *, + u_int32_t *, locker_info *, u_int32_t, u_int32_t, u_int32_t ***)); +static int __dd_isolder __P((u_int32_t, u_int32_t, u_int32_t, u_int32_t)); +static int __dd_verify __P((locker_info *, u_int32_t *, u_int32_t *, + u_int32_t *, u_int32_t, u_int32_t, u_int32_t)); #ifdef DIAGNOSTIC -static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t)); +static void __dd_debug + __P((DB_ENV *, locker_info *, u_int32_t *, u_int32_t, u_int32_t)); #endif +/* + * __lock_detect_pp -- + * DB_ENV->lock_detect pre/post processing. + * + * PUBLIC: int __lock_detect_pp __P((DB_ENV *, u_int32_t, u_int32_t, int *)); + */ int -lock_detect(dbenv, flags, atype, abortp) +__lock_detect_pp(dbenv, flags, atype, abortp) DB_ENV *dbenv; u_int32_t flags, atype; int *abortp; { + int ret, rep_check; + + PANIC_CHECK(dbenv); + ENV_REQUIRES_CONFIG(dbenv, + dbenv->lk_handle, "DB_ENV->lock_detect", DB_INIT_LOCK); + + /* Validate arguments. */ + if ((ret = __db_fchk(dbenv, "DB_ENV->lock_detect", flags, 0)) != 0) + return (ret); + switch (atype) { + case DB_LOCK_DEFAULT: + case DB_LOCK_EXPIRE: + case DB_LOCK_MAXLOCKS: + case DB_LOCK_MINLOCKS: + case DB_LOCK_MINWRITE: + case DB_LOCK_OLDEST: + case DB_LOCK_RANDOM: + case DB_LOCK_YOUNGEST: + break; + default: + __db_err(dbenv, + "DB_ENV->lock_detect: unknown deadlock detection mode specified"); + return (EINVAL); + } + + rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0; + if (rep_check) + __env_rep_enter(dbenv); + ret = __lock_detect(dbenv, atype, abortp); + if (rep_check) + __env_rep_exit(dbenv); + return (ret); +} + +/* + * __lock_detect -- + * DB_ENV->lock_detect. + * + * PUBLIC: int __lock_detect __P((DB_ENV *, u_int32_t, int *)); + */ +int +__lock_detect(dbenv, atype, abortp) + DB_ENV *dbenv; + u_int32_t atype; + int *abortp; +{ DB_LOCKREGION *region; DB_LOCKTAB *lt; + DB_TXNMGR *tmgr; + db_timeval_t now; locker_info *idmap; - u_int32_t *bitmap, **deadp, **free_me, i, killid, nentries, nlockers; - int do_pass, ret; + u_int32_t *bitmap, *copymap, **deadp, **free_me, *tmpmap; + u_int32_t i, keeper, killid, limit, nalloc, nlockers; + u_int32_t lock_max, txn_max; + int ret; -#ifdef HAVE_RPC - if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) - return (__dbcl_lock_detect(dbenv, flags, atype, abortp)); -#endif + /* + * If this environment is a replication client, then we must use the + * MINWRITE detection discipline. + */ + if (__rep_is_client(dbenv)) + atype = DB_LOCK_MINWRITE; - PANIC_CHECK(dbenv); - ENV_REQUIRES_CONFIG(dbenv, dbenv->lk_handle, DB_INIT_LOCK); + free_me = NULL; lt = dbenv->lk_handle; if (abortp != NULL) *abortp = 0; - /* Validate arguments. */ - if ((ret = - __db_fchk(dbenv, "lock_detect", flags, DB_LOCK_CONFLICT)) != 0) - return (ret); - /* Check if a detector run is necessary. */ LOCKREGION(dbenv, lt); - if (LF_ISSET(DB_LOCK_CONFLICT)) { - /* Make a pass every time a lock waits. */ - region = lt->reginfo.primary; - do_pass = region->need_dd != 0; - if (!do_pass) { - UNLOCKREGION(dbenv, lt); - return (0); - } + /* Make a pass only if auto-detect would run. */ + region = lt->reginfo.primary; + + LOCK_SET_TIME_INVALID(&now); + if (region->need_dd == 0 && + (!LOCK_TIME_ISVALID(®ion->next_timeout) || + !__lock_expired(dbenv, &now, ®ion->next_timeout))) { + UNLOCKREGION(dbenv, lt); + return (0); } + if (region->need_dd == 0) + atype = DB_LOCK_EXPIRE; + + /* Reset need_dd, so we know we've run the detector. */ + region->need_dd = 0; /* Build the waits-for bitmap. */ - ret = __dd_build(dbenv, &bitmap, &nlockers, &idmap); + ret = __dd_build(dbenv, atype, &bitmap, &nlockers, &nalloc, &idmap); + lock_max = region->stat.st_cur_maxid; UNLOCKREGION(dbenv, lt); - if (ret != 0) + + /* + * We need the cur_maxid from the txn region as well. In order + * to avoid tricky synchronization between the lock and txn + * regions, we simply unlock the lock region and then lock the + * txn region. This introduces a small window during which the + * transaction system could then wrap. We're willing to return + * the wrong answer for "oldest" or "youngest" in those rare + * circumstances. + */ + tmgr = dbenv->tx_handle; + if (tmgr != NULL) { + R_LOCK(dbenv, &tmgr->reginfo); + txn_max = ((DB_TXNREGION *)tmgr->reginfo.primary)->cur_maxid; + R_UNLOCK(dbenv, &tmgr->reginfo); + } else + txn_max = TXN_MAXIMUM; + if (ret != 0 || atype == DB_LOCK_EXPIRE) return (ret); if (nlockers == 0) return (0); #ifdef DIAGNOSTIC if (FLD_ISSET(dbenv->verbose, DB_VERB_WAITSFOR)) - __dd_debug(dbenv, idmap, bitmap, nlockers); + __dd_debug(dbenv, idmap, bitmap, nlockers, nalloc); #endif + /* Now duplicate the bitmaps so we can verify deadlock participants. */ + if ((ret = __os_calloc(dbenv, (size_t)nlockers, + sizeof(u_int32_t) * nalloc, ©map)) != 0) + goto err; + memcpy(copymap, bitmap, nlockers * sizeof(u_int32_t) * nalloc); + + if ((ret = __os_calloc(dbenv, sizeof(u_int32_t), nalloc, &tmpmap)) != 0) + goto err1; + /* Find a deadlock. */ - if ((ret = __dd_find(dbenv, bitmap, idmap, nlockers, &deadp)) != 0) + if ((ret = + __dd_find(dbenv, bitmap, idmap, nlockers, nalloc, &deadp)) != 0) return (ret); - nentries = ALIGN(nlockers, 32) / 32; killid = BAD_KILLID; free_me = deadp; for (; *deadp != NULL; deadp++) { if (abortp != NULL) ++*abortp; - switch (atype) { /* Kill someone. */ - case DB_LOCK_OLDEST: - /* - * Find the first bit set in the current - * array and then look for a lower tid in - * the array. - */ - for (i = 0; i < nlockers; i++) - if (ISSET_MAP(*deadp, i)) { - killid = i; - break; + killid = (u_int32_t)((*deadp - bitmap) / nalloc); + limit = killid; + keeper = BAD_KILLID; - } - /* - * It's conceivable that under XA, the locker could - * have gone away. - */ - if (killid == BAD_KILLID) - break; - - /* - * The oldest transaction has the lowest - * transaction id. - */ - for (i = killid + 1; i < nlockers; i++) - if (ISSET_MAP(*deadp, i) && - idmap[i].id < idmap[killid].id) - killid = i; - break; - case DB_LOCK_DEFAULT: - case DB_LOCK_RANDOM: - /* - * We are trying to calculate the id of the - * locker whose entry is indicated by deadlock. - */ - killid = (*deadp - bitmap) / nentries; + if (atype == DB_LOCK_DEFAULT || atype == DB_LOCK_RANDOM) + goto dokill; + /* + * It's conceivable that under XA, the locker could + * have gone away. + */ + if (killid == BAD_KILLID) break; - case DB_LOCK_YOUNGEST: - /* - * Find the first bit set in the current - * array and then look for a lower tid in - * the array. - */ - for (i = 0; i < nlockers; i++) - if (ISSET_MAP(*deadp, i)) { - killid = i; - break; - } - /* - * It's conceivable that under XA, the locker could - * have gone away. - */ - if (killid == BAD_KILLID) - break; + /* + * Start with the id that we know is deadlocked + * and then examine all other set bits and see + * if any are a better candidate for abortion + * and that they are genuinely part of the + * deadlock. The definition of "best": + * OLDEST: smallest id + * YOUNGEST: largest id + * MAXLOCKS: maximum count + * MINLOCKS: minimum count + * MINWRITE: minimum count + */ - /* - * The youngest transaction has the highest - * transaction id. - */ - for (i = killid + 1; i < nlockers; i++) - if (ISSET_MAP(*deadp, i) && - idmap[i].id > idmap[killid].id) - killid = i; - break; - default: - killid = BAD_KILLID; - ret = EINVAL; + for (i = (killid + 1) % nlockers; + i != limit; + i = (i + 1) % nlockers) { + if (!ISSET_MAP(*deadp, i) || idmap[i].in_abort) + continue; + switch (atype) { + case DB_LOCK_OLDEST: + if (__dd_isolder(idmap[killid].id, + idmap[i].id, lock_max, txn_max)) + continue; + keeper = i; + break; + case DB_LOCK_YOUNGEST: + if (__dd_isolder(idmap[i].id, + idmap[killid].id, lock_max, txn_max)) + continue; + keeper = i; + break; + case DB_LOCK_MAXLOCKS: + if (idmap[i].count < idmap[killid].count) + continue; + keeper = i; + break; + case DB_LOCK_MINLOCKS: + case DB_LOCK_MINWRITE: + if (idmap[i].count > idmap[killid].count) + continue; + keeper = i; + break; + default: + killid = BAD_KILLID; + ret = EINVAL; + goto dokill; + } + if (__dd_verify(idmap, *deadp, + tmpmap, copymap, nlockers, nalloc, i)) + killid = i; } - if (killid == BAD_KILLID) +dokill: if (killid == BAD_KILLID) continue; + /* + * There are cases in which our general algorithm will + * fail. Returning 1 from verify indicates that the + * particular locker is not only involved in a deadlock, + * but that killing him will allow others to make forward + * progress. Unfortunately, there are cases where we need + * to abort someone, but killing them will not necessarily + * ensure forward progress (imagine N readers all trying to + * acquire a write lock). In such a scenario, we'll have + * gotten all the way through the loop, we will have found + * someone to keep (keeper will be valid), but killid will + * still be the initial deadlocker. In this case, if the + * initial killid satisfies __dd_verify, kill it, else abort + * keeper and indicate that we need to run deadlock detection + * again. + */ + + if (keeper != BAD_KILLID && killid == limit && + __dd_verify(idmap, *deadp, + tmpmap, copymap, nlockers, nalloc, killid) == 0) { + LOCKREGION(dbenv, lt); + region->need_dd = 1; + UNLOCKREGION(dbenv, lt); + killid = keeper; + } + /* Kill the locker with lockid idmap[killid]. */ if ((ret = __dd_abort(dbenv, &idmap[killid])) != 0) { /* @@ -221,9 +327,13 @@ lock_detect(dbenv, flags, atype, abortp) __db_err(dbenv, "Aborting locker %lx", (u_long)idmap[killid].id); } - __os_free(free_me, 0); - __os_free(bitmap, 0); - __os_free(idmap, 0); + __os_free(dbenv, tmpmap); +err1: __os_free(dbenv, copymap); + +err: if (free_me != NULL) + __os_free(dbenv, free_me); + __os_free(dbenv, bitmap); + __os_free(dbenv, idmap); return (ret); } @@ -236,9 +346,9 @@ lock_detect(dbenv, flags, atype, abortp) # define DD_INVALID_ID ((u_int32_t) -1) static int -__dd_build(dbenv, bmp, nlockers, idmap) +__dd_build(dbenv, atype, bmp, nlockers, allocp, idmap) DB_ENV *dbenv; - u_int32_t **bmp, *nlockers; + u_int32_t atype, **bmp, *nlockers, *allocp; locker_info **idmap; { struct __db_lock *lp; @@ -247,12 +357,30 @@ __dd_build(dbenv, bmp, nlockers, idmap) DB_LOCKREGION *region; DB_LOCKTAB *lt; locker_info *id_array; - u_int32_t *bitmap, count, dd, *entryp, i, id, ndx, nentries, *tmpmap; + db_timeval_t now, min_timeout; + u_int32_t *bitmap, count, dd, *entryp, id, ndx, nentries, *tmpmap; u_int8_t *pptr; - int is_first, ret; + int expire_only, is_first, ret; lt = dbenv->lk_handle; region = lt->reginfo.primary; + LOCK_SET_TIME_INVALID(&now); + LOCK_SET_TIME_MAX(&min_timeout); + expire_only = atype == DB_LOCK_EXPIRE; + + /* + * While we always check for expired timeouts, if we are called + * with DB_LOCK_EXPIRE, then we are only checking for timeouts + * (i.e., not doing deadlock detection at all). If we aren't + * doing real deadlock detection, then we can skip a significant, + * amount of the processing. In particular we do not build + * the conflict array and our caller needs to expect this. + */ + if (expire_only) { + count = 0; + nentries = 0; + goto obj_loop; + } /* * We'll check how many lockers there are, add a few more in for @@ -260,8 +388,7 @@ __dd_build(dbenv, bmp, nlockers, idmap) * verify that we have enough room when we go back in and get the * mutex the second time. */ -retry: count = region->nlockers; - region->need_dd = 0; +retry: count = region->stat.st_nlockers; if (count == 0) { *nlockers = 0; @@ -271,7 +398,7 @@ retry: count = region->nlockers; if (FLD_ISSET(dbenv->verbose, DB_VERB_DEADLOCK)) __db_err(dbenv, "%lu lockers", (u_long)count); - count += 40; + count += 20; nentries = ALIGN(count, 32) / 32; /* @@ -287,38 +414,46 @@ retry: count = region->nlockers; if ((ret = __os_calloc(dbenv, sizeof(u_int32_t), nentries, &tmpmap)) != 0) { - __os_free(bitmap, sizeof(u_int32_t) * nentries); + __os_free(dbenv, bitmap); return (ret); } if ((ret = __os_calloc(dbenv, (size_t)count, sizeof(locker_info), &id_array)) != 0) { - __os_free(bitmap, count * sizeof(u_int32_t) * nentries); - __os_free(tmpmap, sizeof(u_int32_t) * nentries); + __os_free(dbenv, bitmap); + __os_free(dbenv, tmpmap); return (ret); } /* * Now go back in and actually fill in the matrix. */ - if (region->nlockers > count) { - __os_free(bitmap, count * sizeof(u_int32_t) * nentries); - __os_free(tmpmap, sizeof(u_int32_t) * nentries); - __os_free(id_array, count * sizeof(locker_info)); + if (region->stat.st_nlockers > count) { + __os_free(dbenv, bitmap); + __os_free(dbenv, tmpmap); + __os_free(dbenv, id_array); goto retry; } /* * First we go through and assign each locker a deadlock detector id. */ - for (id = 0, i = 0; i < region->locker_t_size; i++) { - for (lip = SH_TAILQ_FIRST(<->locker_tab[i], __db_locker); - lip != NULL; lip = SH_TAILQ_NEXT(lip, links, __db_locker)) - if (lip->master_locker == INVALID_ROFF) { - lip->dd_id = id++; - id_array[lip->dd_id].id = lip->id; - } else - lip->dd_id = DD_INVALID_ID; + for (id = 0, lip = SH_TAILQ_FIRST(®ion->lockers, __db_locker); + lip != NULL; + lip = SH_TAILQ_NEXT(lip, ulinks, __db_locker)) { + if (lip->master_locker == INVALID_ROFF) { + lip->dd_id = id++; + id_array[lip->dd_id].id = lip->id; + if (atype == DB_LOCK_MINLOCKS || + atype == DB_LOCK_MAXLOCKS) + id_array[lip->dd_id].count = lip->nlocks; + if (atype == DB_LOCK_MINWRITE) + id_array[lip->dd_id].count = lip->nwrites; + if (F_ISSET(lip, DB_LOCKER_INABORT)) + id_array[lip->dd_id].in_abort = 1; + } else + lip->dd_id = DD_INVALID_ID; + } /* @@ -328,8 +463,11 @@ retry: count = region->nlockers; * list and add an entry in the waitsfor matrix for each waiter/holder * combination. */ +obj_loop: for (op = SH_TAILQ_FIRST(®ion->dd_objs, __db_lockobj); op != NULL; op = SH_TAILQ_NEXT(op, dd_links, __db_lockobj)) { + if (expire_only) + goto look_waiters; CLEAR_MAP(tmpmap, nentries); /* @@ -343,11 +481,20 @@ retry: count = region->nlockers; if ((ret = __lock_getlocker(lt, lp->holder, ndx, 0, &lockerp)) != 0) continue; - if (lockerp->dd_id == DD_INVALID_ID) - dd = ((DB_LOCKER *) - R_ADDR(<->reginfo, - lockerp->master_locker))->dd_id; - else + + if (lockerp->dd_id == DD_INVALID_ID) { + dd = ((DB_LOCKER *)R_ADDR(<->reginfo, + lockerp->master_locker))->dd_id; + lockerp->dd_id = dd; + if (atype == DB_LOCK_MINLOCKS || + atype == DB_LOCK_MAXLOCKS) + id_array[dd].count += lockerp->nlocks; + if (atype == DB_LOCK_MINWRITE) + id_array[dd].count += lockerp->nwrites; + if (F_ISSET(lockerp, DB_LOCKER_INABORT)) + id_array[dd].in_abort = 1; + + } else dd = lockerp->dd_id; id_array[dd].valid = 1; @@ -363,6 +510,7 @@ retry: count = region->nlockers; * Next, for each waiter, we set its row in the matrix * equal to the map of holders we set up above. */ +look_waiters: for (is_first = 1, lp = SH_TAILQ_FIRST(&op->waiters, __db_lock); lp != NULL; @@ -372,11 +520,32 @@ retry: count = region->nlockers; if ((ret = __lock_getlocker(lt, lp->holder, ndx, 0, &lockerp)) != 0) continue; - if (lockerp->dd_id == DD_INVALID_ID) - dd = ((DB_LOCKER *) - R_ADDR(<->reginfo, - lockerp->master_locker))->dd_id; - else + if (lp->status == DB_LSTAT_WAITING) { + if (__lock_expired(dbenv, + &now, &lockerp->lk_expire)) { + lp->status = DB_LSTAT_EXPIRED; + MUTEX_UNLOCK(dbenv, &lp->mutex); + continue; + } + if (LOCK_TIME_GREATER( + &min_timeout, &lockerp->lk_expire)) + min_timeout = lockerp->lk_expire; + + } + + if (expire_only) + continue; + + if (lockerp->dd_id == DD_INVALID_ID) { + dd = ((DB_LOCKER *)R_ADDR(<->reginfo, + lockerp->master_locker))->dd_id; + lockerp->dd_id = dd; + if (atype == DB_LOCK_MINLOCKS || + atype == DB_LOCK_MAXLOCKS) + id_array[dd].count += lockerp->nlocks; + if (atype == DB_LOCK_MINWRITE) + id_array[dd].count += lockerp->nwrites; + } else dd = lockerp->dd_id; id_array[dd].valid = 1; @@ -396,11 +565,23 @@ retry: count = region->nlockers; * else on the queue, then we have to keep * it and we have an automatic deadlock. */ - if (is_first) + if (is_first) { + if (ISSET_MAP(entryp, dd)) + id_array[dd].self_wait = 1; CLR_MAP(entryp, dd); + } } } + if (LOCK_TIME_ISVALID(®ion->next_timeout)) { + if (LOCK_TIME_ISMAX(&min_timeout)) + LOCK_SET_TIME_INVALID(®ion->next_timeout); + else + region->next_timeout = min_timeout; + } + if (expire_only) + return (0); + /* Now for each locker; record its last lock. */ for (id = 0; id < count; id++) { if (!id_array[id].valid) @@ -423,7 +604,7 @@ retry: count = region->nlockers; do { lp = SH_LIST_FIRST(&child->heldby, __db_lock); if (lp != NULL && - lp->status == DB_LSTAT_WAITING) { + lp->status == DB_LSTAT_WAITING) { id_array[id].last_locker_id = child->id; goto get_lock; } @@ -435,6 +616,7 @@ retry: count = region->nlockers; if (lp != NULL) { id_array[id].last_locker_id = lockerp->id; get_lock: id_array[id].last_lock = R_OFFSET(<->reginfo, lp); + id_array[id].last_obj = lp->obj; lo = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj); pptr = SH_DBT_PTR(&lo->lockobj); if (lo->lockobj.size >= sizeof(db_pgno_t)) @@ -445,7 +627,9 @@ retry: count = region->nlockers; } } - /* Pass complete, reset the deadlock detector bit. */ + /* + * Pass complete, reset the deadlock detector bit. + */ region->need_dd = 0; /* @@ -455,18 +639,19 @@ retry: count = region->nlockers; *nlockers = id; *idmap = id_array; *bmp = bitmap; - __os_free(tmpmap, sizeof(u_int32_t) * nentries); + *allocp = nentries; + __os_free(dbenv, tmpmap); return (0); } static int -__dd_find(dbenv, bmp, idmap, nlockers, deadp) +__dd_find(dbenv, bmp, idmap, nlockers, nalloc, deadp) DB_ENV *dbenv; - u_int32_t *bmp, nlockers; + u_int32_t *bmp, nlockers, nalloc; locker_info *idmap; u_int32_t ***deadp; { - u_int32_t i, j, k, nentries, *mymap, *tmpmap; + u_int32_t i, j, k, *mymap, *tmpmap; u_int32_t **retp; int ndead, ndeadalloc, ret; @@ -476,24 +661,23 @@ __dd_find(dbenv, bmp, idmap, nlockers, deadp) ndeadalloc = INITIAL_DEAD_ALLOC; ndead = 0; if ((ret = __os_malloc(dbenv, - ndeadalloc * sizeof(u_int32_t *), NULL, &retp)) != 0) + ndeadalloc * sizeof(u_int32_t *), &retp)) != 0) return (ret); /* * For each locker, OR in the bits from the lockers on which that * locker is waiting. */ - nentries = ALIGN(nlockers, 32) / 32; - for (mymap = bmp, i = 0; i < nlockers; i++, mymap += nentries) { - if (!idmap[i].valid) + for (mymap = bmp, i = 0; i < nlockers; i++, mymap += nalloc) { + if (!idmap[i].valid || idmap[i].in_abort) continue; for (j = 0; j < nlockers; j++) { if (!ISSET_MAP(mymap, j)) continue; /* Find the map for this bit. */ - tmpmap = bmp + (nentries * j); - OR_MAP(mymap, tmpmap, nentries); + tmpmap = bmp + (nalloc * j); + OR_MAP(mymap, tmpmap, nalloc); if (!ISSET_MAP(mymap, i)) continue; @@ -506,7 +690,7 @@ __dd_find(dbenv, bmp, idmap, nlockers, deadp) */ if (__os_realloc(dbenv, ndeadalloc * sizeof(u_int32_t), - NULL, &retp) != 0) { + &retp) != 0) { retp[ndead] = NULL; *deadp = retp; return (0); @@ -543,34 +727,37 @@ __dd_abort(dbenv, info) region = lt->reginfo.primary; LOCKREGION(dbenv, lt); - /* Find the locker's last lock. */ + + /* + * Get the locker. If its gone or was aborted while + * we were detecting return that. + */ LOCKER_LOCK(lt, region, info->last_locker_id, ndx); if ((ret = __lock_getlocker(lt, - info->last_locker_id, ndx, 0, &lockerp)) != 0 || lockerp == NULL) { + info->last_locker_id, ndx, 0, &lockerp)) != 0 || + lockerp == NULL || F_ISSET(lockerp, DB_LOCKER_INABORT)) { if (ret == 0) ret = DB_ALREADY_ABORTED; goto out; } - lockp = SH_LIST_FIRST(&lockerp->heldby, __db_lock); - /* - * It's possible that this locker was already aborted. If that's - * the case, make sure that we remove its locker from the hash table. + * Find the locker's last lock. + * It is possible for this lock to have been freed, + * either though a timeout or another detector run. */ - if (lockp == NULL) { - if (LOCKER_FREEABLE(lockerp)) { - __lock_freelocker(lt, region, lockerp, ndx); - goto out; - } - } else if (R_OFFSET(<->reginfo, lockp) != info->last_lock || - lockp->status != DB_LSTAT_WAITING) { + if ((lockp = SH_LIST_FIRST(&lockerp->heldby, __db_lock)) == NULL) { + ret = DB_ALREADY_ABORTED; + goto out; + } + if (R_OFFSET(<->reginfo, lockp) != info->last_lock || + lockp->holder != lockerp->id || + lockp->obj != info->last_obj || lockp->status != DB_LSTAT_WAITING) { ret = DB_ALREADY_ABORTED; goto out; } sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj); - SH_LIST_REMOVE(lockp, locker_links, __db_lock); /* Abort lock, take it off list, and wake up this lock. */ SHOBJECT_LOCK(lt, region, sh_obj, ndx); @@ -589,7 +776,7 @@ __dd_abort(dbenv, info) ret = __lock_promote(lt, sh_obj, 0); MUTEX_UNLOCK(dbenv, &lockp->mutex); - region->ndeadlocks++; + region->stat.st_ndeadlocks++; UNLOCKREGION(dbenv, lt); return (0); @@ -600,13 +787,12 @@ out: UNLOCKREGION(dbenv, lt); #ifdef DIAGNOSTIC static void -__dd_debug(dbenv, idmap, bitmap, nlockers) +__dd_debug(dbenv, idmap, bitmap, nlockers, nalloc) DB_ENV *dbenv; locker_info *idmap; - u_int32_t *bitmap, nlockers; + u_int32_t *bitmap, nlockers, nalloc; { - u_int32_t i, j, *mymap, nentries; - int ret; + u_int32_t i, j, *mymap; char *msgbuf; __db_err(dbenv, "Waitsfor array\nWaiter:\tWaiting on:"); @@ -614,11 +800,10 @@ __dd_debug(dbenv, idmap, bitmap, nlockers) /* Allocate space to print 10 bytes per item waited on. */ #undef MSGBUF_LEN #define MSGBUF_LEN ((nlockers + 1) * 10 + 64) - if ((ret = __os_malloc(dbenv, MSGBUF_LEN, NULL, &msgbuf)) != 0) + if (__os_malloc(dbenv, MSGBUF_LEN, &msgbuf) != 0) return; - nentries = ALIGN(nlockers, 32) / 32; - for (mymap = bitmap, i = 0; i < nlockers; i++, mymap += nentries) { + for (mymap = bitmap, i = 0; i < nlockers; i++, mymap += nalloc) { if (!idmap[i].valid) continue; sprintf(msgbuf, /* Waiter. */ @@ -632,6 +817,116 @@ __dd_debug(dbenv, idmap, bitmap, nlockers) __db_err(dbenv, msgbuf); } - __os_free(msgbuf, MSGBUF_LEN); + __os_free(dbenv, msgbuf); } #endif + +/* + * Given a bitmap that contains a deadlock, verify that the bit + * specified in the which parameter indicates a transaction that + * is actually deadlocked. Return 1 if really deadlocked, 0 otherwise. + * deadmap is the array that identified the deadlock. + * tmpmap is a copy of the initial bitmaps from the dd_build phase + * origmap is a temporary bit map into which we can OR things + * nlockers is the number of actual lockers under consideration + * nalloc is the number of words allocated for the bitmap + * which is the locker in question + */ +static int +__dd_verify(idmap, deadmap, tmpmap, origmap, nlockers, nalloc, which) + locker_info *idmap; + u_int32_t *deadmap, *tmpmap, *origmap; + u_int32_t nlockers, nalloc, which; +{ + u_int32_t *tmap; + u_int32_t j; + int count; + + memset(tmpmap, 0, sizeof(u_int32_t) * nalloc); + + /* + * In order for "which" to be actively involved in + * the deadlock, removing him from the evaluation + * must remove the deadlock. So, we OR together everyone + * except which; if all the participants still have their + * bits set, then the deadlock persists and which does + * not participate. If the deadlock does not persist + * then "which" does participate. + */ + count = 0; + for (j = 0; j < nlockers; j++) { + if (!ISSET_MAP(deadmap, j) || j == which) + continue; + + /* Find the map for this bit. */ + tmap = origmap + (nalloc * j); + + /* + * We special case the first waiter who is also a holder, so + * we don't automatically call that a deadlock. However, if + * it really is a deadlock, we need the bit set now so that + * we treat the first waiter like other waiters. + */ + if (idmap[j].self_wait) + SET_MAP(tmap, j); + OR_MAP(tmpmap, tmap, nalloc); + count++; + } + + if (count == 1) + return (1); + + /* + * Now check the resulting map and see whether + * all participants still have their bit set. + */ + for (j = 0; j < nlockers; j++) { + if (!ISSET_MAP(deadmap, j) || j == which) + continue; + if (!ISSET_MAP(tmpmap, j)) + return (1); + } + return (0); +} + +/* + * __dd_isolder -- + * + * Figure out the relative age of two lockers. We make all lockers + * older than all transactions, because that's how it's worked + * historically (because lockers are lower ids). + */ +static int +__dd_isolder(a, b, lock_max, txn_max) + u_int32_t a, b; + u_int32_t lock_max, txn_max; +{ + u_int32_t max; + + /* Check for comparing lock-id and txnid. */ + if (a <= DB_LOCK_MAXID && b > DB_LOCK_MAXID) + return (1); + if (b <= DB_LOCK_MAXID && a > DB_LOCK_MAXID) + return (0); + + /* In the same space; figure out which one. */ + max = txn_max; + if (a <= DB_LOCK_MAXID) + max = lock_max; + + /* + * We can't get a 100% correct ordering, because we don't know + * where the current interval started and if there were older + * lockers outside the interval. We do the best we can. + */ + + /* + * Check for a wrapped case with ids above max. + */ + if (a > max && b < max) + return (1); + if (b > max && a < max) + return (0); + + return (a < b); +} |