diff options
author | Panu Matilainen <pmatilai@redhat.com> | 2007-07-16 16:48:14 +0300 |
---|---|---|
committer | Panu Matilainen <pmatilai@redhat.com> | 2007-07-16 16:48:14 +0300 |
commit | 2cfd3012bfcb5c5c61bbaf662ef084e0ab789d79 (patch) | |
tree | e12ee52087506ac8c7a5eee83b17497d98df2d40 /db/env | |
parent | b754fe19fd387ca5fe8e7c00ddaa25c898fa192f (diff) | |
download | librpm-tizen-2cfd3012bfcb5c5c61bbaf662ef084e0ab789d79.tar.gz librpm-tizen-2cfd3012bfcb5c5c61bbaf662ef084e0ab789d79.tar.bz2 librpm-tizen-2cfd3012bfcb5c5c61bbaf662ef084e0ab789d79.zip |
Update internal BDB to version 4.5.20
Diffstat (limited to 'db/env')
-rw-r--r-- | db/env/db_salloc.c | 41 | ||||
-rw-r--r-- | db/env/db_shash.c | 10 | ||||
-rw-r--r-- | db/env/env_config.c | 370 | ||||
-rw-r--r-- | db/env/env_failchk.c | 335 | ||||
-rw-r--r-- | db/env/env_file.c | 131 | ||||
-rw-r--r-- | db/env/env_method.c | 717 | ||||
-rw-r--r-- | db/env/env_open.c | 1092 | ||||
-rw-r--r-- | db/env/env_recover.c | 324 | ||||
-rw-r--r-- | db/env/env_region.c | 711 | ||||
-rw-r--r-- | db/env/env_register.c | 422 | ||||
-rw-r--r-- | db/env/env_stat.c | 341 |
11 files changed, 2812 insertions, 1682 deletions
diff --git a/db/env/db_salloc.c b/db/env/db_salloc.c index f2b1ed386..edd4fcb8e 100644 --- a/db/env/db_salloc.c +++ b/db/env/db_salloc.c @@ -1,21 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 1996-2006 + * Oracle Corporation. All rights reserved. * - * $Id: db_salloc.c,v 11.28 2004/09/17 22:00:27 mjc Exp $ + * $Id: db_salloc.c,v 12.10 2006/08/24 14:45:38 bostic Exp $ */ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <stdlib.h> -#include <string.h> -#endif - #include "db_int.h" /* @@ -58,7 +51,7 @@ __db_shalloc_init(infop, size) SH_LIST_INIT(hp); elp = (struct __data *)(hp + 1); - elp->len = size - sizeof(struct __head) - sizeof(elp->len); + elp->len = (size - sizeof(struct __head)) - sizeof(elp->len); SH_LIST_INSERT_HEAD(hp, elp, links, __data); } @@ -155,9 +148,15 @@ __db_shalloc(infop, len, align, retp) p = infop->addr; /* Walk the list, looking for a slot. */ - for (elp = SH_LIST_FIRST((struct __head *)p, __data); - elp != NULL; - elp = SH_LIST_NEXT(elp, links, __data)) { + SH_LIST_FOREACH(elp, (struct __head *)p, links, __data) { + /* + * Skip chunks that are too small to work. This avoids address + * wrap-around in the subsequent calculations (if len were too + * large). + */ + if (elp->len < len) + continue; + /* * Calculate the value of the returned pointer if we were to * use this chunk. @@ -167,7 +166,6 @@ __db_shalloc(infop, len, align, retp) */ rp = (u_int8_t *)elp + sizeof(size_t) + elp->len; rp = (u_int8_t *)rp - len; - rp = (u_int8_t *)((uintptr_t)rp & ~(align - 1)); rp = ALIGNP_DEC(rp, align); /* @@ -252,7 +250,7 @@ __db_shalloc_free(infop, ptr) /* In a private region, we call free. */ if (F_ISSET(dbenv, DB_ENV_PRIVATE)) { - DB_ASSERT(infop->allocated >= free_size); + DB_ASSERT(dbenv, infop->allocated >= free_size); infop->allocated -= free_size; __os_free(dbenv, newp); @@ -266,16 +264,7 @@ __db_shalloc_free(infop, ptr) * * Check it to make sure it hasn't been stomped. */ - if (*((u_int8_t *)ptr + free_size - 1) != GUARD_BYTE) { - /* - * Eventually, once we push a DB_ENV handle down to these - * routines, we should use the standard output channels. - */ - fprintf(stderr, - "Guard byte incorrect during shared memory free.\n"); - abort(); - /* NOTREACHED */ - } + DB_ASSERT(dbenv, *((u_int8_t *)ptr + free_size - 1) == GUARD_BYTE); /* Trash the returned memory (including guard byte). */ memset(ptr, CLEAR_BYTE, free_size); diff --git a/db/env/db_shash.c b/db/env/db_shash.c index ac3b31622..15dccf139 100644 --- a/db/env/db_shash.c +++ b/db/env/db_shash.c @@ -1,18 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 1996-2006 + * Oracle Corporation. All rights reserved. * - * $Id: db_shash.c,v 11.9 2004/03/20 16:18:51 bostic Exp $ + * $Id: db_shash.c,v 12.4 2006/08/24 14:45:38 bostic Exp $ */ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> -#endif - #include "db_int.h" /* diff --git a/db/env/env_config.c b/db/env/env_config.c new file mode 100644 index 000000000..1a098e277 --- /dev/null +++ b/db/env/env_config.c @@ -0,0 +1,370 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: env_config.c,v 12.67 2006/09/19 14:14:07 mjc Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +#undef CONFIG_SLOTS +#define CONFIG_SLOTS 10 + +static int __config_parse __P((DB_ENV *, char *, int)); +static int __config_split __P((char *, char *[CONFIG_SLOTS])); + +/* + * __env_read_db_config -- + * Read the DB_CONFIG file. + * + * PUBLIC: int __env_read_db_config __P((DB_ENV *)); + */ +int +__env_read_db_config(dbenv) + DB_ENV *dbenv; +{ + FILE *fp; + int lc, ret; + char *p, buf[256]; + + /* Parse the config file. */ + p = NULL; + if ((ret = + __db_appname(dbenv, DB_APP_NONE, "DB_CONFIG", 0, NULL, &p)) != 0) + return (ret); + if (p == NULL) + fp = NULL; + else { + fp = fopen(p, "r"); + __os_free(dbenv, p); + } + + if (fp == NULL) + return (0); + + for (lc = 1; fgets(buf, sizeof(buf), fp) != NULL; ++lc) { + if ((p = strchr(buf, '\n')) != NULL) + *p = '\0'; + else if (strlen(buf) + 1 == sizeof(buf)) { + __db_errx(dbenv, "DB_CONFIG: line too long"); + ret = EINVAL; + break; + } + for (p = buf; *p != '\0' || isspace((int)*p); ++p) + ; + if (buf[0] == '\0' || buf[0] == '#') + continue; + + if ((ret = __config_parse(dbenv, buf, lc)) != 0) + break; + } + (void)fclose(fp); + + return (ret); +} + +#undef CONFIG_GET_INT +#define CONFIG_GET_INT(s, vp) do { \ + int __ret; \ + if ((__ret = \ + __db_getlong(dbenv, NULL, s, 0, INT_MAX, vp)) != 0) \ + return (__ret); \ +} while (0) +#undef CONFIG_GET_LONG +#define CONFIG_GET_LONG(s, vp) do { \ + int __ret; \ + if ((__ret = \ + __db_getlong(dbenv, NULL, s, 0, LONG_MAX, vp)) != 0) \ + return (__ret); \ +} while (0) +#undef CONFIG_INT +#define CONFIG_INT(s, f) do { \ + if (strcasecmp(s, argv[0]) == 0) { \ + long __v; \ + if (nf != 2) \ + goto format; \ + CONFIG_GET_INT(argv[1], &__v); \ + return (f(dbenv, (int)__v)); \ + } \ +} while (0) +#undef CONFIG_GET_UINT32 +#define CONFIG_GET_UINT32(s, vp) do { \ + if (__db_getulong(dbenv, NULL, s, 0, UINT32_MAX, vp) != 0) \ + return (EINVAL); \ +} while (0) +#undef CONFIG_UINT32 +#define CONFIG_UINT32(s, f) do { \ + if (strcasecmp(s, argv[0]) == 0) { \ + u_long __v; \ + if (nf != 2) \ + goto format; \ + CONFIG_GET_UINT32(argv[1], &__v); \ + return (f(dbenv, (u_int32_t)__v)); \ + } \ +} while (0) + +/* + * __config_parse -- + * Parse a single NAME VALUE pair. + */ +static int +__config_parse(dbenv, s, lc) + DB_ENV *dbenv; + char *s; + int lc; +{ + u_long uv1, uv2; + u_int32_t flags; + long lv1, lv2; + int nf; + char *argv[CONFIG_SLOTS]; + /* Split the line by white-space. */ + if ((nf = __config_split(s, argv)) < 2) { +format: __db_errx(dbenv, + "line %d: %s: incorrect name-value pair", lc, argv[0]); + return (EINVAL); + } + + CONFIG_UINT32("mutex_set_align", __mutex_set_align); + CONFIG_UINT32("mutex_set_increment", __mutex_set_increment); + CONFIG_UINT32("mutex_set_max", __mutex_set_max); + CONFIG_UINT32("mutex_set_tas_spins", __mutex_set_tas_spins); + + if (strcasecmp(argv[0], "rep_set_config") == 0) { + if (nf != 2) + goto format; + if (strcasecmp(argv[1], "rep_bulk") == 0) + return (__rep_set_config(dbenv, + DB_REP_CONF_BULK, 1)); + if (strcasecmp(argv[1], "rep_delayclient") == 0) + return (__rep_set_config(dbenv, + DB_REP_CONF_DELAYCLIENT, 1)); + if (strcasecmp(argv[1], "rep_noautoinit") == 0) + return (__rep_set_config(dbenv, + DB_REP_CONF_NOAUTOINIT, 1)); + if (strcasecmp(argv[1], "rep_nowait") == 0) + return (__rep_set_config(dbenv, DB_REP_CONF_NOWAIT, 1)); + goto format; + } + + if (strcasecmp(argv[0], "set_cachesize") == 0) { + if (nf != 4) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + CONFIG_GET_UINT32(argv[2], &uv2); + CONFIG_GET_INT(argv[3], &lv1); + return (__memp_set_cachesize( + dbenv, (u_int32_t)uv1, (u_int32_t)uv2, (int)lv1)); + } + + if (strcasecmp(argv[0], "set_data_dir") == 0 || + strcasecmp(argv[0], "db_data_dir") == 0) { /* Compatibility. */ + if (nf != 2) + goto format; + return (__env_set_data_dir(dbenv, argv[1])); + } + /* Undocumented. */ + if (strcasecmp(argv[0], "set_intermediate_dir") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_INT(argv[1], &lv1); + return (__env_set_intermediate_dir(dbenv, (int)lv1, 0)); + } + + if (strcasecmp(argv[0], "set_flags") == 0) { + if (nf != 2) + goto format; + if (strcasecmp(argv[1], "db_auto_commit") == 0) + return (__env_set_flags(dbenv, DB_AUTO_COMMIT, 1)); + if (strcasecmp(argv[1], "db_cdb_alldb") == 0) + return (__env_set_flags(dbenv, DB_CDB_ALLDB, 1)); + if (strcasecmp(argv[1], "db_direct_db") == 0) + return (__env_set_flags(dbenv, DB_DIRECT_DB, 1)); + if (strcasecmp(argv[1], "db_direct_log") == 0) + return (__env_set_flags(dbenv, DB_DIRECT_LOG, 1)); + if (strcasecmp(argv[1], "db_dsync_db") == 0) + return (__env_set_flags(dbenv, DB_DSYNC_DB, 1)); + if (strcasecmp(argv[1], "db_dsync_log") == 0) + return (__env_set_flags(dbenv, DB_DSYNC_LOG, 1)); + if (strcasecmp(argv[1], "db_log_autoremove") == 0) + return (__env_set_flags(dbenv, DB_LOG_AUTOREMOVE, 1)); + if (strcasecmp(argv[1], "db_log_inmemory") == 0) + return (__env_set_flags(dbenv, DB_LOG_INMEMORY, 1)); + if (strcasecmp(argv[1], "db_multiversion") == 0) + return (__env_set_flags(dbenv, DB_MULTIVERSION, 1)); + if (strcasecmp(argv[1], "db_nolocking") == 0) + return (__env_set_flags(dbenv, DB_NOLOCKING, 1)); + if (strcasecmp(argv[1], "db_nommap") == 0) + return (__env_set_flags(dbenv, DB_NOMMAP, 1)); + if (strcasecmp(argv[1], "db_nopanic") == 0) + return (__env_set_flags(dbenv, DB_NOPANIC, 1)); + if (strcasecmp(argv[1], "db_overwrite") == 0) + return (__env_set_flags(dbenv, DB_OVERWRITE, 1)); + if (strcasecmp(argv[1], "db_region_init") == 0) + return (__env_set_flags(dbenv, DB_REGION_INIT, 1)); + if (strcasecmp(argv[1], "db_txn_nosync") == 0) + return (__env_set_flags(dbenv, DB_TXN_NOSYNC, 1)); + if (strcasecmp(argv[1], "db_txn_snapshot") == 0) + return (__env_set_flags(dbenv, DB_TXN_SNAPSHOT, 1)); + if (strcasecmp(argv[1], "db_txn_write_nosync") == 0) + return ( + __env_set_flags(dbenv, DB_TXN_WRITE_NOSYNC, 1)); + if (strcasecmp(argv[1], "db_yieldcpu") == 0) + return (__env_set_flags(dbenv, DB_YIELDCPU, 1)); + goto format; + } + + CONFIG_UINT32("set_lg_bsize", __log_set_lg_bsize); + CONFIG_INT("set_lg_filemode", __log_set_lg_filemode); + CONFIG_UINT32("set_lg_max", __log_set_lg_max); + CONFIG_UINT32("set_lg_regionmax", __log_set_lg_regionmax); + + if (strcasecmp(argv[0], "set_lg_dir") == 0 || + strcasecmp(argv[0], "db_log_dir") == 0) { /* Compatibility. */ + if (nf != 2) + goto format; + return (__log_set_lg_dir(dbenv, argv[1])); + } + + if (strcasecmp(argv[0], "set_lk_detect") == 0) { + if (nf != 2) + goto format; + if (strcasecmp(argv[1], "db_lock_default") == 0) + flags = DB_LOCK_DEFAULT; + else if (strcasecmp(argv[1], "db_lock_expire") == 0) + flags = DB_LOCK_EXPIRE; + else if (strcasecmp(argv[1], "db_lock_maxlocks") == 0) + flags = DB_LOCK_MAXLOCKS; + else if (strcasecmp(argv[1], "db_lock_maxwrite") == 0) + flags = DB_LOCK_MAXWRITE; + else if (strcasecmp(argv[1], "db_lock_minlocks") == 0) + flags = DB_LOCK_MINLOCKS; + else if (strcasecmp(argv[1], "db_lock_minwrite") == 0) + flags = DB_LOCK_MINWRITE; + else if (strcasecmp(argv[1], "db_lock_oldest") == 0) + flags = DB_LOCK_OLDEST; + else if (strcasecmp(argv[1], "db_lock_random") == 0) + flags = DB_LOCK_RANDOM; + else if (strcasecmp(argv[1], "db_lock_youngest") == 0) + flags = DB_LOCK_YOUNGEST; + else + goto format; + return (__lock_set_lk_detect(dbenv, flags)); + } + + CONFIG_UINT32("set_lk_max_locks", __lock_set_lk_max_locks); + CONFIG_UINT32("set_lk_max_lockers", __lock_set_lk_max_lockers); + CONFIG_UINT32("set_lk_max_objects", __lock_set_lk_max_objects); + + if (strcasecmp(argv[0], "set_lock_timeout") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + return (__lock_set_env_timeout( + dbenv, (u_int32_t)uv1, DB_SET_LOCK_TIMEOUT)); + } + + CONFIG_INT("set_mp_max_openfd", __memp_set_mp_max_openfd); + + if (strcasecmp(argv[0], "set_mp_max_write") == 0) { + if (nf != 3) + goto format; + CONFIG_GET_INT(argv[1], &lv1); + CONFIG_GET_INT(argv[2], &lv2); + return (__memp_set_mp_max_write(dbenv, (int)lv1, (int)lv2)); + } + + CONFIG_UINT32("set_mp_mmapsize", __memp_set_mp_mmapsize); + + if (strcasecmp(argv[0], "set_region_init") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_INT(argv[1], &lv1); + if (lv1 != 0 && lv1 != 1) + goto format; + return (__env_set_flags( + dbenv, DB_REGION_INIT, lv1 == 0 ? 0 : 1)); + } + + if (strcasecmp(argv[0], "set_shm_key") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_LONG(argv[1], &lv1); + return (__env_set_shm_key(dbenv, lv1)); + } + + /* + * The set_tas_spins method has been replaced by mutex_set_tas_spins. + * The set_tas_spins argv[0] remains for DB_CONFIG compatibility. + */ + CONFIG_UINT32("set_tas_spins", __mutex_set_tas_spins); + + if (strcasecmp(argv[0], "set_tmp_dir") == 0 || + strcasecmp(argv[0], "db_tmp_dir") == 0) { /* Compatibility.*/ + if (nf != 2) + goto format; + return (__env_set_tmp_dir(dbenv, argv[1])); + } + + CONFIG_UINT32("set_tx_max", __txn_set_tx_max); + + if (strcasecmp(argv[0], "set_txn_timeout") == 0) { + if (nf != 2) + goto format; + CONFIG_GET_UINT32(argv[1], &uv1); + return (__lock_set_env_timeout( + dbenv, (u_int32_t)uv1, DB_SET_TXN_TIMEOUT)); + } + + if (strcasecmp(argv[0], "set_verbose") == 0) { + if (nf != 2) + goto format; + if (strcasecmp(argv[1], "db_verb_deadlock") == 0) + flags = DB_VERB_DEADLOCK; + else if (strcasecmp(argv[1], "db_verb_recovery") == 0) + flags = DB_VERB_RECOVERY; + else if (strcasecmp(argv[1], "db_verb_register") == 0) + flags = DB_VERB_REGISTER; + else if (strcasecmp(argv[1], "db_verb_replication") == 0) + flags = DB_VERB_REPLICATION; + else if (strcasecmp(argv[1], "db_verb_waitsfor") == 0) + flags = DB_VERB_WAITSFOR; + else + goto format; + return (__env_set_verbose(dbenv, flags, 1)); + } + + __db_errx(dbenv, "unrecognized name-value pair: %s", s); + return (EINVAL); +} + +/* + * __config_split -- + * Split lines into white-space separated fields, returning the count of + * fields. + */ +static int +__config_split(input, argv) + char *input, *argv[CONFIG_SLOTS]; +{ + int count; + char **ap; + + for (count = 0, ap = argv; (*ap = strsep(&input, " \t\n")) != NULL;) + if (**ap != '\0') { + ++count; + if (++ap == &argv[CONFIG_SLOTS - 1]) { + *ap = NULL; + break; + } + } + return (count); +} diff --git a/db/env/env_failchk.c b/db/env/env_failchk.c new file mode 100644 index 000000000..96348d44c --- /dev/null +++ b/db/env/env_failchk.c @@ -0,0 +1,335 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2005-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: env_failchk.c,v 12.28 2006/08/24 14:45:39 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" +#include "dbinc/mutex_int.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/hash.h" /* Needed for call to __ham_func5. */ +#include "dbinc/lock.h" +#include "dbinc/txn.h" + +static int __env_in_api __P((DB_ENV *)); + +/* + * __env_failchk_pp -- + * DB_ENV->failchk pre/post processing. + * + * PUBLIC: int __env_failchk_pp __P((DB_ENV *, u_int32_t)); + */ +int +__env_failchk_pp(dbenv, flags) + DB_ENV *dbenv; + u_int32_t flags; +{ + DB_THREAD_INFO *ip; + int ret; + + PANIC_CHECK(dbenv); + ENV_ILLEGAL_BEFORE_OPEN(dbenv, "DB_ENV->failchk"); + + /* + * DB_ENV->failchk requires self and is-alive functions. We + * have a default self function, but no is-alive function. + */ + if (!ALIVE_ON(dbenv)) { + __db_errx(dbenv, + "DB_ENV->failchk requires DB_ENV->is_alive be configured"); + return (EINVAL); + } + + if (flags != 0) + return (__db_ferr(dbenv, "DB_ENV->failchk", 0)); + + ENV_ENTER(dbenv, ip); + + /* + * We check for dead threads in the API first as this would be likely + * to hang other things we try later, like locks and transactions. + */ + if ((ret = __env_in_api(dbenv)) != 0) + goto err; + + if (LOCKING_ON(dbenv) && (ret = __lock_failchk(dbenv)) != 0) + goto err; + + if (TXN_ON(dbenv) && (ret = __txn_failchk(dbenv)) != 0) + goto err; + + ret = __mut_failchk(dbenv); + +err: ENV_LEAVE(dbenv, ip); + return (ret); +} + +/* + * __env_thread_init -- + * Initialize the thread control block table. + * + * PUBLIC: int __env_thread_init __P((DB_ENV *, int)); + */ +int +__env_thread_init(dbenv, created) + DB_ENV *dbenv; + int created; +{ + DB_HASHTAB *htab; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + REGINFO *infop; + THREAD_INFO *thread; + int ret; + + mtxmgr = dbenv->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + infop = &mtxmgr->reginfo; + + if (mtxregion->thread_off == INVALID_ROFF) { + if (dbenv->thr_nbucket == 0) { + dbenv->thr_hashtab = NULL; + if (ALIVE_ON(dbenv)) { + __db_errx(dbenv, + "is_alive method specified but no thread region allocated"); + return (EINVAL); + } + return (0); + } + + if (!created) { + __db_errx(dbenv, + "thread table must be allocated at environment create time"); + return (EINVAL); + } + + if ((ret = __db_shalloc(infop, + sizeof(THREAD_INFO), 0, &thread)) != 0) { + __db_errx(dbenv, + "cannot allocate a thread status block"); + return (ret); + } + memset(thread, 0, sizeof(*thread)); + mtxregion->thread_off = R_OFFSET(infop, thread); + thread->thr_nbucket = __db_tablesize(dbenv->thr_nbucket); + if ((ret = __db_shalloc(infop, + thread->thr_nbucket * sizeof(DB_HASHTAB), 0, &htab)) != 0) + return (ret); + thread->thr_hashoff = R_OFFSET(infop, htab); + __db_hashinit(htab, thread->thr_nbucket); + thread->thr_max = dbenv->thr_max; + } else { + thread = R_ADDR(infop, mtxregion->thread_off); + htab = R_ADDR(infop, thread->thr_hashoff); + } + + dbenv->thr_hashtab = htab; + dbenv->thr_nbucket = thread->thr_nbucket; + dbenv->thr_max = thread->thr_max; + return (0); +} + +/* + * __env_in_api -- + * Look for threads which died in the api and complain. + */ +static int +__env_in_api(dbenv) + DB_ENV *dbenv; +{ + DB_HASHTAB *htab; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + DB_THREAD_INFO *ip; + REGINFO *infop; + THREAD_INFO *thread; + u_int32_t i; + + if ((htab = dbenv->thr_hashtab) == NULL) + return (EINVAL); + + mtxmgr = dbenv->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + infop = &mtxmgr->reginfo; + thread = R_ADDR(infop, mtxregion->thread_off); + + for (i = 0; i < dbenv->thr_nbucket; i++) + SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) { + if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE || + (ip->dbth_state == THREAD_OUT && + thread->thr_count < thread->thr_max)) + continue; + if (dbenv->is_alive( + dbenv, ip->dbth_pid, ip->dbth_tid, 0)) + continue; + if (ip->dbth_state == THREAD_OUT) { + ip->dbth_state = THREAD_SLOT_NOT_IN_USE; + continue; + } + return (__db_failed(dbenv, + "Thread died in Berkeley DB library", + ip->dbth_pid, ip->dbth_tid)); + } + + return (0); +} + +struct __db_threadid { + pid_t pid; + db_threadid_t tid; +}; + +/* + * PUBLIC: int __env_set_state __P((DB_ENV *, + * PUBLIC: DB_THREAD_INFO **, DB_THREAD_STATE)); + */ +int +__env_set_state(dbenv, ipp, state) + DB_ENV *dbenv; + DB_THREAD_INFO **ipp; + DB_THREAD_STATE state; +{ + DB_HASHTAB *htab; + DB_MUTEXMGR *mtxmgr; + DB_MUTEXREGION *mtxregion; + DB_THREAD_INFO *ip; + struct __db_threadid id; + REGINFO *infop; + THREAD_INFO *thread; + int ret; + u_int32_t indx; + + htab = (DB_HASHTAB *)dbenv->thr_hashtab; + + dbenv->thread_id(dbenv, &id.pid, &id.tid); + + /* + * Hashing of thread ids. This is simple but could be replaced with + * something more expensive if needed. + */ +#ifdef HAVE_SIMPLE_THREAD_TYPE + /* + * A thread ID may be a pointer, so explicitly cast to a pointer of + * the appropriate size before doing the bitwise XOR. + */ + indx = (u_int32_t)((uintptr_t)id.pid ^ (uintptr_t)id.tid); +#else + indx = __ham_func5(NULL, &id.tid, sizeof(id.tid)); +#endif + indx %= dbenv->thr_nbucket; + SH_TAILQ_FOREACH(ip, &htab[indx], dbth_links, __db_thread_info) { +#ifdef HAVE_SIMPLE_THREAD_TYPE + if (id.pid == ip->dbth_pid && id.tid == ip->dbth_tid) + break; +#else + if (memcmp(&id.pid, &ip->dbth_pid, sizeof(id.pid)) != 0) + continue; + if (memcmp(&id.tid, &ip->dbth_tid, sizeof(id.tid)) != 0) + continue; + break; +#endif + } + +#ifdef DIAGNOSTIC + if (state == THREAD_DIAGNOSTIC) { + *ipp = ip; + return (0); + } +#endif + + ret = 0; + if (ip == NULL) { + mtxmgr = dbenv->mutex_handle; + mtxregion = mtxmgr->reginfo.primary; + infop = &mtxmgr->reginfo; + thread = R_ADDR(infop, mtxregion->thread_off); + MUTEX_SYSTEM_LOCK(dbenv); + + /* + * If we are passed the specified max, try to reclaim one from + * our queue. If failcheck has marked the slot not in use, we + * can take it, otherwise we must call is_alive before freeing + * it. + */ + if (thread->thr_count >= thread->thr_max) { + SH_TAILQ_FOREACH( + ip, &htab[indx], dbth_links, __db_thread_info) + if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE || + (ip->dbth_state == THREAD_OUT && + ALIVE_ON(dbenv) && !dbenv->is_alive(dbenv, + ip->dbth_pid, ip->dbth_tid, 0))) + break; + + if (ip != NULL) + goto init; + } + + thread->thr_count++; + if ((ret = __db_shalloc(infop, + sizeof(DB_THREAD_INFO), 0, &ip)) == 0) { + memset(ip, 0, sizeof(*ip)); + /* + * This assumes we can link atomically since we do + * no locking here. We never use the backpointer + * so we only need to be able to write an offset + * atomically. + */ + SH_TAILQ_INSERT_HEAD( + &htab[indx], ip, dbth_links, __db_thread_info); +init: ip->dbth_pid = id.pid; + ip->dbth_tid = id.tid; + ip->dbth_state = state; + } + MUTEX_SYSTEM_UNLOCK(dbenv); + } else + ip->dbth_state = state; + *ipp = ip; + + return (ret); +} + +/* + * __env_thread_id_string -- + * Convert a thread id to a string. + * + * PUBLIC: char *__env_thread_id_string + * PUBLIC: __P((DB_ENV *, pid_t, db_threadid_t, char *)); + */ +char * +__env_thread_id_string(dbenv, pid, tid, buf) + DB_ENV *dbenv; + pid_t pid; + db_threadid_t tid; + char *buf; +{ +#ifdef HAVE_SIMPLE_THREAD_TYPE +#ifdef UINT64_FMT + char fmt[20]; + + snprintf(fmt, sizeof(fmt), "%s/%s", UINT64_FMT, UINT64_FMT); + snprintf(buf, + DB_THREADID_STRLEN, fmt, (u_int64_t)pid, (u_int64_t)(uintptr_t)tid); +#else + snprintf(buf, DB_THREADID_STRLEN, "%lu/%lu", (u_long)pid, (u_long)tid); +#endif +#else +#ifdef UINT64_FMT + char fmt[20]; + + snprintf(fmt, sizeof(fmt), "%s/TID", UINT64_FMT); + snprintf(buf, DB_THREADID_STRLEN, fmt, (u_int64_t)pid); +#else + snprintf(buf, DB_THREADID_STRLEN, "%lu/TID", (u_long)pid); +#endif +#endif + COMPQUIET(dbenv, NULL); + COMPQUIET(*(u_int8_t *)&tid, 0); + + return (buf); +} diff --git a/db/env/env_file.c b/db/env/env_file.c index 53f93cc53..09ff5c6fd 100644 --- a/db/env/env_file.c +++ b/db/env/env_file.c @@ -1,98 +1,66 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2002-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 2002-2006 + * Oracle Corporation. All rights reserved. * - * $Id: env_file.c,v 1.11 2004/03/24 20:51:38 bostic Exp $ + * $Id: env_file.c,v 12.12 2006/08/24 14:45:39 bostic Exp $ */ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <string.h> -#endif - #include "db_int.h" -static int __db_overwrite_pass __P((DB_ENV *, - const char *, DB_FH *, u_int32_t, u_int32_t, int)); - /* - * __db_fileinit -- - * Initialize a regular file, optionally zero-filling it as well. + * __db_file_extend -- + * Initialize a regular file by writing the last page of the file. * - * PUBLIC: int __db_fileinit __P((DB_ENV *, DB_FH *, size_t, int)); + * PUBLIC: int __db_file_extend __P((DB_ENV *, DB_FH *, size_t)); */ int -__db_fileinit(dbenv, fhp, size, zerofill) +__db_file_extend(dbenv, fhp, size) DB_ENV *dbenv; DB_FH *fhp; size_t size; - int zerofill; { db_pgno_t pages; - size_t i; size_t nw; u_int32_t relative; int ret; - char buf[OS_VMPAGESIZE]; - - /* Write nuls to the new bytes. */ - memset(buf, 0, sizeof(buf)); + char *buf; /* - * Extend the region by writing the last page. If the region is >4Gb, + * Extend the file by writing the last page. If the region is >4Gb, * increment may be larger than the maximum possible seek "relative" * argument, as it's an unsigned 32-bit value. Break the offset into - * pages of 1MB each so that we don't overflow (2^20 + 2^32 is bigger + * pages of 1MB each so we don't overflow -- (2^20 + 2^32 is bigger * than any memory I expect to see for awhile). */ - if ((ret = __os_seek(dbenv, fhp, 0, 0, 0, 0, DB_OS_SEEK_END)) != 0) - return (ret); - pages = (db_pgno_t)((size - OS_VMPAGESIZE) / MEGABYTE); - relative = (u_int32_t)((size - OS_VMPAGESIZE) % MEGABYTE); - if ((ret = __os_seek(dbenv, - fhp, MEGABYTE, pages, relative, 0, DB_OS_SEEK_CUR)) != 0) - return (ret); - if ((ret = __os_write(dbenv, fhp, buf, sizeof(buf), &nw)) != 0) +#undef FILE_EXTEND_IO_SIZE +#define FILE_EXTEND_IO_SIZE (8 * 1024) + if ((ret = __os_calloc(dbenv, FILE_EXTEND_IO_SIZE, 1, &buf)) != 0) return (ret); - /* - * We may want to guarantee that there is enough disk space for the - * file, so we also write a byte to each page. We write the byte - * because reading it is insufficient on systems smart enough not to - * instantiate disk pages to satisfy a read (e.g., Solaris). - */ - if (zerofill) { - pages = (db_pgno_t)(size / MEGABYTE); - relative = (u_int32_t)(size % MEGABYTE); - if ((ret = __os_seek(dbenv, fhp, - MEGABYTE, pages, relative, 1, DB_OS_SEEK_END)) != 0) - return (ret); + pages = (db_pgno_t)((size - FILE_EXTEND_IO_SIZE) / MEGABYTE); + relative = (u_int32_t)((size - FILE_EXTEND_IO_SIZE) % MEGABYTE); + if ((ret = __os_seek(dbenv, fhp, pages, MEGABYTE, relative)) != 0) + goto err; + if ((ret = __os_write(dbenv, fhp, buf, FILE_EXTEND_IO_SIZE, &nw)) != 0) + goto err; + +err: __os_free(dbenv, buf); - /* Write a byte to each page. */ - for (i = 0; i < size; i += OS_VMPAGESIZE) { - if ((ret = __os_write(dbenv, fhp, buf, 1, &nw)) != 0) - return (ret); - if ((ret = __os_seek(dbenv, fhp, - 0, 0, OS_VMPAGESIZE - 1, 0, DB_OS_SEEK_CUR)) != 0) - return (ret); - } - } return (0); } /* - * __db_overwrite -- - * Overwrite a file. + * __db_file_multi_write -- + * Overwrite a file with multiple passes to corrupt the data. * - * PUBLIC: int __db_overwrite __P((DB_ENV *, const char *)); + * PUBLIC: int __db_file_multi_write __P((DB_ENV *, const char *)); */ int -__db_overwrite(dbenv, path) +__db_file_multi_write(dbenv, path) DB_ENV *dbenv; const char *path; { @@ -108,17 +76,17 @@ __db_overwrite(dbenv, path) * byte patterns. Implies a fixed-block filesystem, journaling * or logging filesystems will require operating system support. */ - if ((ret = __db_overwrite_pass( - dbenv, path, fhp, mbytes, bytes, 255)) != 0) + if ((ret = + __db_file_write(dbenv, fhp, mbytes, bytes, 255)) != 0) goto err; - if ((ret = __db_overwrite_pass( - dbenv, path, fhp, mbytes, bytes, 0)) != 0) + if ((ret = + __db_file_write(dbenv, fhp, mbytes, bytes, 0)) != 0) goto err; - if ((ret = __db_overwrite_pass( - dbenv, path, fhp, mbytes, bytes, 255)) != 0) + if ((ret = + __db_file_write(dbenv, fhp, mbytes, bytes, 255)) != 0) goto err; } else - __db_err(dbenv, "%s: %s", path, db_strerror(ret)); + __db_err(dbenv, ret, "%s", path); err: if (fhp != NULL) (void)__os_closehandle(dbenv, fhp); @@ -126,39 +94,44 @@ err: if (fhp != NULL) } /* - * __db_overwrite_pass -- + * __db_file_write -- * A single pass over the file, writing the specified byte pattern. + * + * PUBLIC: int __db_file_write __P((DB_ENV *, + * PUBLIC: DB_FH *, u_int32_t, u_int32_t, int)); */ -static int -__db_overwrite_pass(dbenv, path, fhp, mbytes, bytes, pattern) +int +__db_file_write(dbenv, fhp, mbytes, bytes, pattern) DB_ENV *dbenv; - const char *path; DB_FH *fhp; int pattern; u_int32_t mbytes, bytes; { size_t len, nw; int i, ret; - char buf[8 * 1024]; + char *buf; - if ((ret = __os_seek(dbenv, fhp, 0, 0, 0, 0, DB_OS_SEEK_SET)) != 0) - goto err; - - memset(buf, pattern, sizeof(buf)); +#undef FILE_WRITE_IO_SIZE +#define FILE_WRITE_IO_SIZE (64 * 1024) + if ((ret = __os_malloc(dbenv, FILE_WRITE_IO_SIZE, &buf)) != 0) + return (ret); + memset(buf, pattern, FILE_WRITE_IO_SIZE); + if ((ret = __os_seek(dbenv, fhp, 0, 0, 0)) != 0) + goto err; for (; mbytes > 0; --mbytes) - for (i = MEGABYTE / sizeof(buf); i > 0; --i) - if ((ret = - __os_write(dbenv, fhp, buf, sizeof(buf), &nw)) != 0) + for (i = MEGABYTE / FILE_WRITE_IO_SIZE; i > 0; --i) + if ((ret = __os_write( + dbenv, fhp, buf, FILE_WRITE_IO_SIZE, &nw)) != 0) goto err; for (; bytes > 0; bytes -= (u_int32_t)len) { - len = bytes < sizeof(buf) ? bytes : sizeof(buf); + len = bytes < FILE_WRITE_IO_SIZE ? bytes : FILE_WRITE_IO_SIZE; if ((ret = __os_write(dbenv, fhp, buf, len, &nw)) != 0) goto err; } - if ((ret = __os_fsync(dbenv, fhp)) != 0) -err: __db_err(dbenv, "%s: %s", path, db_strerror(ret)); + ret = __os_fsync(dbenv, fhp); +err: __os_free(dbenv, buf); return (ret); } diff --git a/db/env/env_method.c b/db/env/env_method.c index 4f865061b..a2c2acbec 100644 --- a/db/env/env_method.c +++ b/db/env/env_method.c @@ -1,28 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 1999-2006 + * Oracle Corporation. All rights reserved. * - * $Id: env_method.c,v 11.136 2004/10/11 18:47:50 bostic Exp $ + * $Id: env_method.c,v 12.47 2006/09/11 15:40:20 bostic Exp $ */ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#ifdef HAVE_RPC -#include <rpc/rpc.h> -#endif - -#include <string.h> -#endif - -#ifdef HAVE_RPC -#include "db_server.h" -#endif - /* * This is the file that initializes the global array. Do it this way because * people keep changing one without changing the other. Having declaration and @@ -33,7 +19,6 @@ #include "db_int.h" #include "dbinc/crypto.h" #include "dbinc/hmac.h" -#include "dbinc/db_shash.h" #include "dbinc/db_page.h" #include "dbinc/db_am.h" #include "dbinc/lock.h" @@ -42,26 +27,42 @@ #include "dbinc/txn.h" #ifdef HAVE_RPC +#ifndef NO_SYSTEM_INCLUDES +#include <rpc/rpc.h> +#endif +#include "db_server.h" #include "dbinc_auto/rpc_client_ext.h" #endif -static void __dbenv_err __P((const DB_ENV *, int, const char *, ...)); -static void __dbenv_errx __P((const DB_ENV *, const char *, ...)); -static int __dbenv_get_data_dirs __P((DB_ENV *, const char ***)); -static int __dbenv_get_flags __P((DB_ENV *, u_int32_t *)); -static int __dbenv_get_home __P((DB_ENV *, const char **)); -static int __dbenv_get_shm_key __P((DB_ENV *, long *)); -static int __dbenv_get_tas_spins __P((DB_ENV *, u_int32_t *)); -static int __dbenv_get_tmp_dir __P((DB_ENV *, const char **)); -static int __dbenv_get_verbose __P((DB_ENV *, u_int32_t, int *)); -static int __dbenv_init __P((DB_ENV *)); -static void __dbenv_map_flags __P((DB_ENV *, u_int32_t *, u_int32_t *)); -static int __dbenv_set_app_dispatch +static void __env_err __P((const DB_ENV *, int, const char *, ...)); +static void __env_errx __P((const DB_ENV *, const char *, ...)); +static int __env_get_data_dirs __P((DB_ENV *, const char ***)); +static int __env_get_flags __P((DB_ENV *, u_int32_t *)); +static int __env_get_home __P((DB_ENV *, const char **)); +static int __env_get_shm_key __P((DB_ENV *, long *)); +static int __env_get_tmp_dir __P((DB_ENV *, const char **)); +static int __env_get_verbose __P((DB_ENV *, u_int32_t, int *)); +static int __env_init __P((DB_ENV *)); +static void __env_map_flags __P((DB_ENV *, u_int32_t *, u_int32_t *)); +static int __env_set_app_dispatch __P((DB_ENV *, int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops))); -static int __dbenv_set_feedback __P((DB_ENV *, void (*)(DB_ENV *, int, int))); -static int __dbenv_set_rpc_server_noclnt +static int __env_set_event_notify + __P((DB_ENV *, void (*)(DB_ENV *, u_int32_t, void *))); +static int __env_set_feedback __P((DB_ENV *, void (*)(DB_ENV *, int, int))); +static int __env_set_isalive __P((DB_ENV *, + int (*)(DB_ENV *, pid_t, db_threadid_t, u_int32_t))); +static int __env_set_thread_id __P((DB_ENV *, void (*)(DB_ENV *, + pid_t *, db_threadid_t *))); +static int __env_set_thread_id_string __P((DB_ENV *, + char * (*)(DB_ENV *, pid_t, db_threadid_t, char *))); +static int __env_set_thread_count __P((DB_ENV *, u_int32_t)); +static int __env_set_rpc_server __P((DB_ENV *, void *, const char *, long, long, u_int32_t)); +#ifndef HAVE_REPLICATION_THREADS +static int __db_norepmgr __P((DB_ENV *)); +#endif + /* * db_env_create -- * DB_ENV constructor. @@ -95,21 +96,68 @@ db_env_create(dbenvpp, flags) if (LF_ISSET(DB_RPCCLIENT)) F_SET(dbenv, DB_ENV_RPCCLIENT); #endif - if ((ret = __dbenv_init(dbenv)) != 0) { - __os_free(NULL, dbenv); - return (ret); + if ((ret = __env_init(dbenv)) != 0 || + (ret = __lock_dbenv_create(dbenv)) != 0 || + (ret = __log_dbenv_create(dbenv)) != 0 || + (ret = __memp_dbenv_create(dbenv)) != 0 || +#ifdef HAVE_REPLICATION + (ret = __rep_dbenv_create(dbenv)) != 0 || +#endif + (ret = __txn_dbenv_create(dbenv))) + goto err; + +#ifdef HAVE_RPC + /* + * RPC specific: must be last, as we replace methods set by the + * access methods. + */ + if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) { + __dbcl_dbenv_init(dbenv); + /* + * !!! + * We wrap the DB_ENV->open and close methods for RPC, and + * the rpc.src file can't handle that. + */ + dbenv->open = __dbcl_env_open_wrap; + dbenv->close = __dbcl_env_close_wrap; } +#endif *dbenvpp = dbenv; return (0); + +err: __db_env_destroy(dbenv); + return (ret); } /* - * __dbenv_init -- + * __db_env_destroy -- + * DB_ENV destructor. + * + * PUBLIC: void __db_env_destroy __P((DB_ENV *)); + */ +void +__db_env_destroy(dbenv) + DB_ENV *dbenv; +{ + __lock_dbenv_destroy(dbenv); + __log_dbenv_destroy(dbenv); + __memp_dbenv_destroy(dbenv); +#ifdef HAVE_REPLICATION + __rep_dbenv_destroy(dbenv); +#endif + __txn_dbenv_destroy(dbenv); + + memset(dbenv, CLEAR_BYTE, sizeof(DB_ENV)); + __os_free(NULL, dbenv); +} + +/* + * __env_init -- * Initialize a DB_ENV structure. */ static int -__dbenv_init(dbenv) +__env_init(dbenv) DB_ENV *dbenv; { /* @@ -118,146 +166,219 @@ __dbenv_init(dbenv) * state or turn off mutex locking, and so we can neither check * the panic state or acquire a mutex in the DB_ENV create path. * - * Set up methods that are the same in both normal and RPC + * Initialize the method handles. */ - dbenv->err = __dbenv_err; - dbenv->errx = __dbenv_errx; - dbenv->set_errcall = __dbenv_set_errcall; - dbenv->get_errfile = __dbenv_get_errfile; - dbenv->set_errfile = __dbenv_set_errfile; - dbenv->get_errpfx = __dbenv_get_errpfx; - dbenv->set_errpfx = __dbenv_set_errpfx; - dbenv->set_msgcall = __dbenv_set_msgcall; - dbenv->get_msgfile = __dbenv_get_msgfile; - dbenv->set_msgfile = __dbenv_set_msgfile; - -#ifdef HAVE_RPC - if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) { - dbenv->close = __dbcl_env_close_wrap; - dbenv->dbremove = __dbcl_env_dbremove; - dbenv->dbrename = __dbcl_env_dbrename; - dbenv->get_home = __dbcl_env_get_home; - dbenv->get_open_flags = __dbcl_env_get_open_flags; - dbenv->open = __dbcl_env_open_wrap; - dbenv->remove = __dbcl_env_remove; - dbenv->stat_print = NULL; - - dbenv->fileid_reset = NULL; - dbenv->is_bigendian = NULL; - dbenv->lsn_reset = NULL; - dbenv->prdbt = NULL; - - dbenv->set_alloc = __dbcl_env_alloc; - dbenv->set_app_dispatch = __dbcl_set_app_dispatch; - dbenv->get_data_dirs = __dbcl_get_data_dirs; - dbenv->set_data_dir = __dbcl_set_data_dir; - dbenv->get_encrypt_flags = __dbcl_env_get_encrypt_flags; - dbenv->set_encrypt = __dbcl_env_encrypt; - dbenv->set_feedback = __dbcl_env_set_feedback; - dbenv->get_flags = __dbcl_env_get_flags; - dbenv->set_flags = __dbcl_env_flags; - dbenv->set_paniccall = __dbcl_env_paniccall; - dbenv->set_rpc_server = __dbcl_envrpcserver; - dbenv->get_shm_key = __dbcl_get_shm_key; - dbenv->set_shm_key = __dbcl_set_shm_key; - dbenv->get_tas_spins = __dbcl_get_tas_spins; - dbenv->set_tas_spins = __dbcl_set_tas_spins; - dbenv->get_timeout = __dbcl_get_timeout; - dbenv->set_timeout = __dbcl_set_timeout; - dbenv->get_tmp_dir = __dbcl_get_tmp_dir; - dbenv->set_tmp_dir = __dbcl_set_tmp_dir; - dbenv->get_verbose = __dbcl_get_verbose; - dbenv->set_verbose = __dbcl_set_verbose; - } else { -#endif - dbenv->close = __dbenv_close_pp; - dbenv->dbremove = __dbenv_dbremove_pp; - dbenv->dbrename = __dbenv_dbrename_pp; - dbenv->open = __dbenv_open; - dbenv->remove = __dbenv_remove; - dbenv->stat_print = __dbenv_stat_print_pp; - - dbenv->fileid_reset = __db_fileid_reset; - dbenv->is_bigendian = __db_isbigendian; - dbenv->lsn_reset = __db_lsn_reset; - dbenv->prdbt = __db_prdbt; - - dbenv->get_home = __dbenv_get_home; - dbenv->get_open_flags = __dbenv_get_open_flags; - dbenv->set_alloc = __dbenv_set_alloc; - dbenv->set_app_dispatch = __dbenv_set_app_dispatch; - dbenv->get_data_dirs = __dbenv_get_data_dirs; - dbenv->set_data_dir = __dbenv_set_data_dir; - dbenv->get_encrypt_flags = __dbenv_get_encrypt_flags; - dbenv->set_encrypt = __dbenv_set_encrypt; - dbenv->set_feedback = __dbenv_set_feedback; - dbenv->get_flags = __dbenv_get_flags; - dbenv->set_flags = __dbenv_set_flags; - dbenv->set_intermediate_dir = __dbenv_set_intermediate_dir; - dbenv->set_paniccall = __dbenv_set_paniccall; - dbenv->set_rpc_server = __dbenv_set_rpc_server_noclnt; - dbenv->get_shm_key = __dbenv_get_shm_key; - dbenv->set_shm_key = __dbenv_set_shm_key; - dbenv->get_tas_spins = __dbenv_get_tas_spins; - dbenv->set_tas_spins = __dbenv_set_tas_spins; - dbenv->get_tmp_dir = __dbenv_get_tmp_dir; - dbenv->set_tmp_dir = __dbenv_set_tmp_dir; - dbenv->get_verbose = __dbenv_get_verbose; - dbenv->set_verbose = __dbenv_set_verbose; -#ifdef HAVE_RPC - } -#endif - dbenv->shm_key = INVALID_REGION_SEGID; + /* DB_ENV PUBLIC HANDLE LIST BEGIN */ + dbenv->cdsgroup_begin = __cdsgroup_begin; + dbenv->close = __env_close_pp; + dbenv->dbremove = __env_dbremove_pp; + dbenv->dbrename = __env_dbrename_pp; + dbenv->err = __env_err; + dbenv->errx = __env_errx; + dbenv->failchk = __env_failchk_pp; + dbenv->fileid_reset = __env_fileid_reset_pp; + dbenv->get_cachesize = __memp_get_cachesize; + dbenv->get_data_dirs = __env_get_data_dirs; + dbenv->get_encrypt_flags = __env_get_encrypt_flags; + dbenv->get_errfile = __env_get_errfile; + dbenv->get_errpfx = __env_get_errpfx; + dbenv->get_flags = __env_get_flags; + dbenv->get_home = __env_get_home; + dbenv->get_lg_bsize = __log_get_lg_bsize; + dbenv->get_lg_dir = __log_get_lg_dir; + dbenv->get_lg_filemode = __log_get_lg_filemode; + dbenv->get_lg_max = __log_get_lg_max; + dbenv->get_lg_regionmax = __log_get_lg_regionmax; + dbenv->get_lk_conflicts = __lock_get_lk_conflicts; + dbenv->get_lk_detect = __lock_get_lk_detect; + dbenv->get_lk_max_lockers = __lock_get_lk_max_lockers; + dbenv->get_lk_max_locks = __lock_get_lk_max_locks; + dbenv->get_lk_max_objects = __lock_get_lk_max_objects; + dbenv->get_mp_max_openfd = __memp_get_mp_max_openfd; + dbenv->get_mp_max_write = __memp_get_mp_max_write; + dbenv->get_mp_mmapsize = __memp_get_mp_mmapsize; + dbenv->get_msgfile = __env_get_msgfile; + dbenv->get_open_flags = __env_get_open_flags; + dbenv->get_shm_key = __env_get_shm_key; + dbenv->get_timeout = __lock_get_env_timeout; + dbenv->get_tmp_dir = __env_get_tmp_dir; + dbenv->get_tx_max = __txn_get_tx_max; + dbenv->get_tx_timestamp = __txn_get_tx_timestamp; + dbenv->get_verbose = __env_get_verbose; + dbenv->is_bigendian = __db_isbigendian; + dbenv->lock_detect = __lock_detect_pp; + dbenv->lock_get = __lock_get_pp; + dbenv->lock_id = __lock_id_pp; + dbenv->lock_id_free = __lock_id_free_pp; + dbenv->lock_put = __lock_put_pp; + dbenv->lock_stat = __lock_stat_pp; + dbenv->lock_stat_print = __lock_stat_print_pp; + dbenv->lock_vec = __lock_vec_pp; + dbenv->log_archive = __log_archive_pp; + dbenv->log_cursor = __log_cursor_pp; + dbenv->log_file = __log_file_pp; + dbenv->log_flush = __log_flush_pp; + dbenv->log_printf = __log_printf_capi; + dbenv->log_put = __log_put_pp; + dbenv->log_stat = __log_stat_pp; + dbenv->log_stat_print = __log_stat_print_pp; + dbenv->lsn_reset = __env_lsn_reset_pp; + dbenv->memp_fcreate = __memp_fcreate_pp; + dbenv->memp_register = __memp_register_pp; + dbenv->memp_stat = __memp_stat_pp; + dbenv->memp_stat_print = __memp_stat_print_pp; + dbenv->memp_sync = __memp_sync_pp; + dbenv->memp_trickle = __memp_trickle_pp; + dbenv->mutex_alloc = __mutex_alloc_pp; + dbenv->mutex_free = __mutex_free_pp; + dbenv->mutex_get_align = __mutex_get_align; + dbenv->mutex_get_increment = __mutex_get_increment; + dbenv->mutex_get_max = __mutex_get_max; + dbenv->mutex_get_tas_spins = __mutex_get_tas_spins; + dbenv->mutex_lock = __mutex_lock_pp; + dbenv->mutex_set_align = __mutex_set_align; + dbenv->mutex_set_increment = __mutex_set_increment; + dbenv->mutex_set_max = __mutex_set_max; + dbenv->mutex_set_tas_spins = __mutex_set_tas_spins; + dbenv->mutex_stat = __mutex_stat; + dbenv->mutex_stat_print = __mutex_stat_print; + dbenv->mutex_unlock = __mutex_unlock_pp; + dbenv->open = __env_open_pp; + dbenv->remove = __env_remove; + dbenv->rep_elect = __rep_elect; + dbenv->rep_flush = __rep_flush; + dbenv->rep_get_config = __rep_get_config; + dbenv->rep_get_limit = __rep_get_limit; + dbenv->rep_get_nsites = __rep_get_nsites; + dbenv->rep_get_priority = __rep_get_priority; + dbenv->rep_get_timeout = __rep_get_timeout; + dbenv->rep_process_message = __rep_process_message; + dbenv->rep_set_config = __rep_set_config; + dbenv->rep_set_limit = __rep_set_limit; + dbenv->rep_set_nsites = __rep_set_nsites; + dbenv->rep_set_priority = __rep_set_priority; + dbenv->rep_set_timeout = __rep_set_timeout; + dbenv->rep_set_transport = __rep_set_transport; + dbenv->rep_start = __rep_start; + dbenv->rep_stat = __rep_stat_pp; + dbenv->rep_stat_print = __rep_stat_print_pp; + dbenv->rep_sync = __rep_sync; +#ifdef HAVE_REPLICATION_THREADS + dbenv->repmgr_add_remote_site = __repmgr_add_remote_site; + dbenv->repmgr_get_ack_policy = __repmgr_get_ack_policy; + dbenv->repmgr_set_ack_policy = __repmgr_set_ack_policy; + dbenv->repmgr_set_local_site = __repmgr_set_local_site; + dbenv->repmgr_site_list = __repmgr_site_list; + dbenv->repmgr_start = __repmgr_start; +#else /* !HAVE_REPLICATION_THREADS */ + dbenv->repmgr_add_remote_site = (int (*)(DB_ENV *, + const char *, u_int, int *, u_int32_t))__db_norepmgr; + dbenv->repmgr_get_ack_policy = (int (*)(DB_ENV *, int *))__db_norepmgr; + dbenv->repmgr_set_ack_policy = (int (*)(DB_ENV *, int))__db_norepmgr; + dbenv->repmgr_set_local_site = + (int (*)(DB_ENV *, const char *, u_int, u_int32_t))__db_norepmgr; + dbenv->repmgr_site_list = + (int (*)(DB_ENV *, u_int *, DB_REPMGR_SITE **))__db_norepmgr; + dbenv->repmgr_start = (int (*)(DB_ENV *, int, u_int32_t))__db_norepmgr; +#endif /* HAVE_REPLICATION_THREADS */ + dbenv->set_alloc = __env_set_alloc; + dbenv->set_app_dispatch = __env_set_app_dispatch; + dbenv->set_cachesize = __memp_set_cachesize; + dbenv->set_data_dir = __env_set_data_dir; + dbenv->set_encrypt = __env_set_encrypt; + dbenv->set_errcall = __env_set_errcall; + dbenv->set_errfile = __env_set_errfile; + dbenv->set_errpfx = __env_set_errpfx; + dbenv->set_event_notify = __env_set_event_notify; + dbenv->set_feedback = __env_set_feedback; + dbenv->set_flags = __env_set_flags; + dbenv->set_intermediate_dir = __env_set_intermediate_dir; + dbenv->set_isalive = __env_set_isalive; + dbenv->set_lg_bsize = __log_set_lg_bsize; + dbenv->set_lg_dir = __log_set_lg_dir; + dbenv->set_lg_filemode = __log_set_lg_filemode; + dbenv->set_lg_max = __log_set_lg_max; + dbenv->set_lg_regionmax = __log_set_lg_regionmax; + dbenv->set_lk_conflicts = __lock_set_lk_conflicts; + dbenv->set_lk_detect = __lock_set_lk_detect; + dbenv->set_lk_max_lockers = __lock_set_lk_max_lockers; + dbenv->set_lk_max_locks = __lock_set_lk_max_locks; + dbenv->set_lk_max_objects = __lock_set_lk_max_objects; + dbenv->set_mp_max_openfd = __memp_set_mp_max_openfd; + dbenv->set_mp_max_write = __memp_set_mp_max_write; + dbenv->set_mp_mmapsize = __memp_set_mp_mmapsize; + dbenv->set_msgcall = __env_set_msgcall; + dbenv->set_msgfile = __env_set_msgfile; + dbenv->set_paniccall = __env_set_paniccall; + dbenv->set_rep_request = __rep_set_request; + dbenv->set_rpc_server = __env_set_rpc_server; + dbenv->set_shm_key = __env_set_shm_key; + dbenv->set_thread_count = __env_set_thread_count; + dbenv->set_thread_id = __env_set_thread_id; + dbenv->set_thread_id_string = __env_set_thread_id_string; + dbenv->set_timeout = __lock_set_env_timeout; + dbenv->set_tmp_dir = __env_set_tmp_dir; + dbenv->set_tx_max = __txn_set_tx_max; + dbenv->set_tx_timestamp = __txn_set_tx_timestamp; + dbenv->set_verbose = __env_set_verbose; + dbenv->stat_print = __env_stat_print_pp; + dbenv->txn_begin = __txn_begin_pp; + dbenv->txn_checkpoint = __txn_checkpoint_pp; + dbenv->txn_recover = __txn_recover_pp; + dbenv->txn_stat = __txn_stat_pp; + dbenv->txn_stat_print = __txn_stat_print_pp; + /* DB_ENV PUBLIC HANDLE LIST END */ + + /* DB_ENV PRIVATE HANDLE LIST BEGIN */ + dbenv->prdbt = __db_prdbt; + /* DB_ENV PRIVATE HANDLE LIST END */ + + __os_id(NULL, &dbenv->pid_cache, NULL); + dbenv->thread_id = __os_id; + dbenv->thread_id_string = __env_thread_id_string; dbenv->db_ref = 0; - - __os_spin(dbenv); - - __log_dbenv_create(dbenv); /* Subsystem specific. */ - __lock_dbenv_create(dbenv); - __memp_dbenv_create(dbenv); - __rep_dbenv_create(dbenv); - __txn_dbenv_create(dbenv); + dbenv->shm_key = INVALID_REGION_SEGID; return (0); } /* - * __dbenv_err -- + * __env_err -- * Error message, including the standard error string. */ static void #ifdef STDC_HEADERS -__dbenv_err(const DB_ENV *dbenv, int error, const char *fmt, ...) +__env_err(const DB_ENV *dbenv, int error, const char *fmt, ...) #else -__dbenv_err(dbenv, error, fmt, va_alist) +__env_err(dbenv, error, fmt, va_alist) const DB_ENV *dbenv; int error; const char *fmt; va_dcl #endif { - DB_REAL_ERR(dbenv, error, 1, 1, fmt); + DB_REAL_ERR(dbenv, error, DB_ERROR_SET, 1, fmt); } /* - * __dbenv_errx -- + * __env_errx -- * Error message. */ static void #ifdef STDC_HEADERS -__dbenv_errx(const DB_ENV *dbenv, const char *fmt, ...) +__env_errx(const DB_ENV *dbenv, const char *fmt, ...) #else -__dbenv_errx(dbenv, fmt, va_alist) +__env_errx(dbenv, fmt, va_alist) const DB_ENV *dbenv; const char *fmt; va_dcl #endif { - DB_REAL_ERR(dbenv, 0, 0, 1, fmt); + DB_REAL_ERR(dbenv, 0, DB_ERROR_NOT_SET, 1, fmt); } static int -__dbenv_get_home(dbenv, homep) +__env_get_home(dbenv, homep) DB_ENV *dbenv; const char **homep; { @@ -267,14 +388,14 @@ __dbenv_get_home(dbenv, homep) } /* - * __dbenv_set_alloc -- + * __env_set_alloc -- * {DB_ENV,DB}->set_alloc. * - * PUBLIC: int __dbenv_set_alloc __P((DB_ENV *, void *(*)(size_t), + * PUBLIC: int __env_set_alloc __P((DB_ENV *, void *(*)(size_t), * PUBLIC: void *(*)(void *, size_t), void (*)(void *))); */ int -__dbenv_set_alloc(dbenv, mal_func, real_func, free_func) +__env_set_alloc(dbenv, mal_func, real_func, free_func) DB_ENV *dbenv; void *(*mal_func) __P((size_t)); void *(*real_func) __P((void *, size_t)); @@ -289,11 +410,11 @@ __dbenv_set_alloc(dbenv, mal_func, real_func, free_func) } /* - * __dbenv_set_app_dispatch -- + * __env_set_app_dispatch -- * Set the transaction abort recover function. */ static int -__dbenv_set_app_dispatch(dbenv, app_dispatch) +__env_set_app_dispatch(dbenv, app_dispatch) DB_ENV *dbenv; int (*app_dispatch) __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); { @@ -304,13 +425,13 @@ __dbenv_set_app_dispatch(dbenv, app_dispatch) } /* - * __dbenv_get_encrypt_flags -- + * __env_get_encrypt_flags -- * {DB_ENV,DB}->get_encrypt_flags. * - * PUBLIC: int __dbenv_get_encrypt_flags __P((DB_ENV *, u_int32_t *)); + * PUBLIC: int __env_get_encrypt_flags __P((DB_ENV *, u_int32_t *)); */ int -__dbenv_get_encrypt_flags(dbenv, flagsp) +__env_get_encrypt_flags(dbenv, flagsp) DB_ENV *dbenv; u_int32_t *flagsp; { @@ -325,20 +446,20 @@ __dbenv_get_encrypt_flags(dbenv, flagsp) return (0); #else COMPQUIET(flagsp, 0); - __db_err(dbenv, + __db_errx(dbenv, "library build did not include support for cryptography"); return (DB_OPNOTSUP); #endif } /* - * __dbenv_set_encrypt -- + * __env_set_encrypt -- * DB_ENV->set_encrypt. * - * PUBLIC: int __dbenv_set_encrypt __P((DB_ENV *, const char *, u_int32_t)); + * PUBLIC: int __env_set_encrypt __P((DB_ENV *, const char *, u_int32_t)); */ int -__dbenv_set_encrypt(dbenv, passwd, flags) +__env_set_encrypt(dbenv, passwd, flags) DB_ENV *dbenv; const char *passwd; u_int32_t flags; @@ -354,7 +475,7 @@ __dbenv_set_encrypt(dbenv, passwd, flags) return (__db_ferr(dbenv, "DB_ENV->set_encrypt", 0)); if (passwd == NULL || strlen(passwd) == 0) { - __db_err(dbenv, "Empty password specified to set_encrypt"); + __db_errx(dbenv, "Empty password specified to set_encrypt"); return (EINVAL); } if (!CRYPTO_ON(dbenv)) { @@ -406,14 +527,14 @@ err: COMPQUIET(passwd, NULL); COMPQUIET(flags, 0); - __db_err(dbenv, + __db_errx(dbenv, "library build did not include support for cryptography"); return (DB_OPNOTSUP); #endif } static void -__dbenv_map_flags(dbenv, inflagsp, outflagsp) +__env_map_flags(dbenv, inflagsp, outflagsp) DB_ENV *dbenv; u_int32_t *inflagsp, *outflagsp; { @@ -435,6 +556,10 @@ __dbenv_map_flags(dbenv, inflagsp, outflagsp) FLD_SET(*outflagsp, DB_ENV_DIRECT_LOG); FLD_CLR(*inflagsp, DB_DIRECT_LOG); } + if (FLD_ISSET(*inflagsp, DB_DSYNC_DB)) { + FLD_SET(*outflagsp, DB_ENV_DSYNC_DB); + FLD_CLR(*inflagsp, DB_DSYNC_DB); + } if (FLD_ISSET(*inflagsp, DB_DSYNC_LOG)) { FLD_SET(*outflagsp, DB_ENV_DSYNC_LOG); FLD_CLR(*inflagsp, DB_DSYNC_LOG); @@ -447,6 +572,10 @@ __dbenv_map_flags(dbenv, inflagsp, outflagsp) FLD_SET(*outflagsp, DB_ENV_LOG_INMEMORY); FLD_CLR(*inflagsp, DB_LOG_INMEMORY); } + if (FLD_ISSET(*inflagsp, DB_MULTIVERSION)) { + FLD_SET(*outflagsp, DB_ENV_MULTIVERSION); + FLD_CLR(*inflagsp, DB_MULTIVERSION); + } if (FLD_ISSET(*inflagsp, DB_NOLOCKING)) { FLD_SET(*outflagsp, DB_ENV_NOLOCKING); FLD_CLR(*inflagsp, DB_NOLOCKING); @@ -475,6 +604,10 @@ __dbenv_map_flags(dbenv, inflagsp, outflagsp) FLD_SET(*outflagsp, DB_ENV_TXN_NOSYNC); FLD_CLR(*inflagsp, DB_TXN_NOSYNC); } + if (FLD_ISSET(*inflagsp, DB_TXN_SNAPSHOT)) { + FLD_SET(*outflagsp, DB_ENV_TXN_SNAPSHOT); + FLD_CLR(*inflagsp, DB_TXN_SNAPSHOT); + } if (FLD_ISSET(*inflagsp, DB_TXN_WRITE_NOSYNC)) { FLD_SET(*outflagsp, DB_ENV_TXN_WRITE_NOSYNC); FLD_CLR(*inflagsp, DB_TXN_WRITE_NOSYNC); @@ -486,7 +619,7 @@ __dbenv_map_flags(dbenv, inflagsp, outflagsp) } static int -__dbenv_get_flags(dbenv, flagsp) +__env_get_flags(dbenv, flagsp) DB_ENV *dbenv; u_int32_t *flagsp; { @@ -495,9 +628,11 @@ __dbenv_get_flags(dbenv, flagsp) DB_CDB_ALLDB, DB_DIRECT_DB, DB_DIRECT_LOG, + DB_DSYNC_DB, DB_DSYNC_LOG, DB_LOG_AUTOREMOVE, DB_LOG_INMEMORY, + DB_MULTIVERSION, DB_NOLOCKING, DB_NOMMAP, DB_NOPANIC, @@ -505,6 +640,7 @@ __dbenv_get_flags(dbenv, flagsp) DB_REGION_INIT, DB_TIME_NOTGRANTED, DB_TXN_NOSYNC, + DB_TXN_SNAPSHOT, DB_TXN_WRITE_NOSYNC, DB_YIELDCPU, 0 @@ -515,15 +651,15 @@ __dbenv_get_flags(dbenv, flagsp) flags = 0; for (i = 0; (f = env_flags[i]) != 0; i++) { mapped_flag = 0; - __dbenv_map_flags(dbenv, &f, &mapped_flag); - DB_ASSERT(f == 0); + __env_map_flags(dbenv, &f, &mapped_flag); + DB_ASSERT(dbenv, f == 0); if (F_ISSET(dbenv, mapped_flag) == mapped_flag) LF_SET(env_flags[i]); } /* Some flags are persisted in the regions. */ if (dbenv->reginfo != NULL && - ((REGENV *)((REGINFO *)dbenv->reginfo)->primary)->envpanic != 0) { + ((REGENV *)((REGINFO *)dbenv->reginfo)->primary)->panic != 0) { LF_SET(DB_PANIC_ENVIRONMENT); } __log_get_flags(dbenv, &flags); @@ -533,13 +669,13 @@ __dbenv_get_flags(dbenv, flagsp) } /* - * __dbenv_set_flags -- + * __env_set_flags -- * DB_ENV->set_flags. * - * PUBLIC: int __dbenv_set_flags __P((DB_ENV *, u_int32_t, int)); + * PUBLIC: int __env_set_flags __P((DB_ENV *, u_int32_t, int)); */ int -__dbenv_set_flags(dbenv, flags, on) +__env_set_flags(dbenv, flags, on) DB_ENV *dbenv; u_int32_t flags; int on; @@ -549,10 +685,12 @@ __dbenv_set_flags(dbenv, flags, on) #define OK_FLAGS \ (DB_AUTO_COMMIT | DB_CDB_ALLDB | DB_DIRECT_DB | DB_DIRECT_LOG | \ - DB_DSYNC_LOG | DB_LOG_AUTOREMOVE | DB_LOG_INMEMORY | \ - DB_NOLOCKING | DB_NOMMAP | DB_NOPANIC | DB_OVERWRITE | \ - DB_PANIC_ENVIRONMENT | DB_REGION_INIT | DB_TIME_NOTGRANTED | \ - DB_TXN_NOSYNC | DB_TXN_WRITE_NOSYNC | DB_YIELDCPU) + DB_DSYNC_DB | DB_DSYNC_LOG | DB_LOG_AUTOREMOVE | \ + DB_LOG_INMEMORY | DB_MULTIVERSION | DB_NOLOCKING | \ + DB_NOMMAP | DB_NOPANIC | DB_OVERWRITE | \ + DB_PANIC_ENVIRONMENT | DB_REGION_INIT | \ + DB_TIME_NOTGRANTED | DB_TXN_NOSYNC | DB_TXN_SNAPSHOT | \ + DB_TXN_WRITE_NOSYNC | DB_YIELDCPU) if (LF_ISSET(~OK_FLAGS)) return (__db_ferr(dbenv, "DB_ENV->set_flags", 0)); @@ -567,8 +705,8 @@ __dbenv_set_flags(dbenv, flags, on) flags, DB_TXN_NOSYNC, DB_TXN_WRITE_NOSYNC)) != 0) return (ret); if (LF_ISSET(DB_DIRECT_DB | - DB_DIRECT_LOG) && __os_have_direct() == 0) { - __db_err(dbenv, + DB_DIRECT_LOG) && __os_support_direct_io() == 0) { + __db_errx(dbenv, "DB_ENV->set_flags: direct I/O either not configured or not supported"); return (EINVAL); } @@ -580,11 +718,18 @@ __dbenv_set_flags(dbenv, flags, on) if (LF_ISSET(DB_PANIC_ENVIRONMENT)) { ENV_ILLEGAL_BEFORE_OPEN(dbenv, "DB_ENV->set_flags: DB_PANIC_ENVIRONMENT"); - PANIC_SET(dbenv, on); + if (on) { + __db_errx(dbenv, "Environment panic set"); + (void)__db_panic(dbenv, DB_RUNRECOVERY); + } else + __db_panic_set(dbenv, 0); } if (LF_ISSET(DB_REGION_INIT)) ENV_ILLEGAL_AFTER_OPEN(dbenv, "DB_ENV->set_flags: DB_REGION_INIT"); + if (LF_ISSET(DB_LOG_INMEMORY)) + ENV_ILLEGAL_AFTER_OPEN(dbenv, + "DB_ENV->set_flags: DB_LOG_INMEMORY"); /* * DB_LOG_INMEMORY, DB_TXN_NOSYNC and DB_TXN_WRITE_NOSYNC are @@ -601,7 +746,7 @@ __dbenv_set_flags(dbenv, flags, on) __log_set_flags(dbenv, flags, on); mapped_flags = 0; - __dbenv_map_flags(dbenv, &flags, &mapped_flags); + __env_map_flags(dbenv, &flags, &mapped_flags); if (on) F_SET(dbenv, mapped_flags); else @@ -611,7 +756,7 @@ __dbenv_set_flags(dbenv, flags, on) } static int -__dbenv_get_data_dirs(dbenv, dirpp) +__env_get_data_dirs(dbenv, dirpp) DB_ENV *dbenv; const char ***dirpp; { @@ -620,13 +765,13 @@ __dbenv_get_data_dirs(dbenv, dirpp) } /* - * __dbenv_set_data_dir -- + * __env_set_data_dir -- * DB_ENV->set_data_dir. * - * PUBLIC: int __dbenv_set_data_dir __P((DB_ENV *, const char *)); + * PUBLIC: int __env_set_data_dir __P((DB_ENV *, const char *)); */ int -__dbenv_set_data_dir(dbenv, dir) +__env_set_data_dir(dbenv, dir) DB_ENV *dbenv; const char *dir; { @@ -658,17 +803,17 @@ __dbenv_set_data_dir(dbenv, dir) } /* - * __dbenv_set_intermediate_dir -- + * __env_set_intermediate_dir -- * DB_ENV->set_intermediate_dir. * * !!! * Undocumented routine allowing applications to configure Berkeley DB to * create intermediate directories. * - * PUBLIC: int __dbenv_set_intermediate_dir __P((DB_ENV *, int, u_int32_t)); + * PUBLIC: int __env_set_intermediate_dir __P((DB_ENV *, int, u_int32_t)); */ int -__dbenv_set_intermediate_dir(dbenv, mode, flags) +__env_set_intermediate_dir(dbenv, mode, flags) DB_ENV *dbenv; int mode; u_int32_t flags; @@ -676,7 +821,7 @@ __dbenv_set_intermediate_dir(dbenv, mode, flags) if (flags != 0) return (__db_ferr(dbenv, "DB_ENV->set_intermediate_dir", 0)); if (mode == 0) { - __db_err(dbenv, + __db_errx(dbenv, "DB_ENV->set_intermediate_dir: mode may not be set to 0"); return (EINVAL); } @@ -686,14 +831,14 @@ __dbenv_set_intermediate_dir(dbenv, mode, flags) } /* - * __dbenv_set_errcall -- + * __env_set_errcall -- * {DB_ENV,DB}->set_errcall. * - * PUBLIC: void __dbenv_set_errcall __P((DB_ENV *, + * PUBLIC: void __env_set_errcall __P((DB_ENV *, * PUBLIC: void (*)(const DB_ENV *, const char *, const char *))); */ void -__dbenv_set_errcall(dbenv, errcall) +__env_set_errcall(dbenv, errcall) DB_ENV *dbenv; void (*errcall) __P((const DB_ENV *, const char *, const char *)); { @@ -701,13 +846,13 @@ __dbenv_set_errcall(dbenv, errcall) } /* - * __dbenv_get_errfile -- + * __env_get_errfile -- * {DB_ENV,DB}->get_errfile. * - * PUBLIC: void __dbenv_get_errfile __P((DB_ENV *, FILE **)); + * PUBLIC: void __env_get_errfile __P((DB_ENV *, FILE **)); */ void -__dbenv_get_errfile(dbenv, errfilep) +__env_get_errfile(dbenv, errfilep) DB_ENV *dbenv; FILE **errfilep; { @@ -715,13 +860,13 @@ __dbenv_get_errfile(dbenv, errfilep) } /* - * __dbenv_set_errfile -- + * __env_set_errfile -- * {DB_ENV,DB}->set_errfile. * - * PUBLIC: void __dbenv_set_errfile __P((DB_ENV *, FILE *)); + * PUBLIC: void __env_set_errfile __P((DB_ENV *, FILE *)); */ void -__dbenv_set_errfile(dbenv, errfile) +__env_set_errfile(dbenv, errfile) DB_ENV *dbenv; FILE *errfile; { @@ -729,13 +874,13 @@ __dbenv_set_errfile(dbenv, errfile) } /* - * __dbenv_get_errpfx -- + * __env_get_errpfx -- * {DB_ENV,DB}->get_errpfx. * - * PUBLIC: void __dbenv_get_errpfx __P((DB_ENV *, const char **)); + * PUBLIC: void __env_get_errpfx __P((DB_ENV *, const char **)); */ void -__dbenv_get_errpfx(dbenv, errpfxp) +__env_get_errpfx(dbenv, errpfxp) DB_ENV *dbenv; const char **errpfxp; { @@ -743,13 +888,13 @@ __dbenv_get_errpfx(dbenv, errpfxp) } /* - * __dbenv_set_errpfx -- + * __env_set_errpfx -- * {DB_ENV,DB}->set_errpfx. * - * PUBLIC: void __dbenv_set_errpfx __P((DB_ENV *, const char *)); + * PUBLIC: void __env_set_errpfx __P((DB_ENV *, const char *)); */ void -__dbenv_set_errpfx(dbenv, errpfx) +__env_set_errpfx(dbenv, errpfx) DB_ENV *dbenv; const char *errpfx; { @@ -757,7 +902,7 @@ __dbenv_set_errpfx(dbenv, errpfx) } static int -__dbenv_set_feedback(dbenv, feedback) +__env_set_feedback(dbenv, feedback) DB_ENV *dbenv; void (*feedback) __P((DB_ENV *, int, int)); { @@ -766,14 +911,79 @@ __dbenv_set_feedback(dbenv, feedback) } /* - * __dbenv_set_msgcall -- + * __env_set_thread_id -- + * DB_ENV->set_thread_id + */ +static int +__env_set_thread_id(dbenv, id) + DB_ENV *dbenv; + void (*id) __P((DB_ENV *, pid_t *, db_threadid_t *)); +{ + dbenv->thread_id = id; + return (0); +} + +/* + * __env_set_threadid_string -- + * DB_ENV->set_threadid_string + */ +static int +__env_set_thread_id_string(dbenv, thread_id_string) + DB_ENV *dbenv; + char *(*thread_id_string) __P((DB_ENV *, pid_t, db_threadid_t, char *)); +{ + dbenv->thread_id_string = thread_id_string; + return (0); +} + +/* + * __env_set_isalive -- + * DB_ENV->set_isalive + */ +static int +__env_set_isalive(dbenv, is_alive) + DB_ENV *dbenv; + int (*is_alive) __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t)); +{ + if (F_ISSET(dbenv, DB_ENV_OPEN_CALLED) && dbenv->thr_nbucket == 0) { + __db_errx(dbenv, + "is_alive method specified but no thread region allocated"); + return (EINVAL); + } + dbenv->is_alive = is_alive; + return (0); +} + +/* + * __env_set_thread_count -- + * DB_ENV->set_thread_count + */ +static int +__env_set_thread_count(dbenv, count) + DB_ENV *dbenv; + u_int32_t count; +{ + ENV_ILLEGAL_AFTER_OPEN(dbenv, "DB_ENV->set_thread_count"); + dbenv->thr_max = count; + + /* + * Set the number of buckets to be 1/8th the number of + * proposed threads control blocks. This is rather + * arbitrary. + */ + dbenv->thr_nbucket = count / 8; + return (0); +} + +/* + * __env_set_msgcall -- * {DB_ENV,DB}->set_msgcall. * - * PUBLIC: void __dbenv_set_msgcall + * PUBLIC: void __env_set_msgcall * PUBLIC: __P((DB_ENV *, void (*)(const DB_ENV *, const char *))); */ void -__dbenv_set_msgcall(dbenv, msgcall) +__env_set_msgcall(dbenv, msgcall) DB_ENV *dbenv; void (*msgcall) __P((const DB_ENV *, const char *)); { @@ -781,13 +991,13 @@ __dbenv_set_msgcall(dbenv, msgcall) } /* - * __dbenv_get_msgfile -- + * __env_get_msgfile -- * {DB_ENV,DB}->get_msgfile. * - * PUBLIC: void __dbenv_get_msgfile __P((DB_ENV *, FILE **)); + * PUBLIC: void __env_get_msgfile __P((DB_ENV *, FILE **)); */ void -__dbenv_get_msgfile(dbenv, msgfilep) +__env_get_msgfile(dbenv, msgfilep) DB_ENV *dbenv; FILE **msgfilep; { @@ -795,13 +1005,13 @@ __dbenv_get_msgfile(dbenv, msgfilep) } /* - * __dbenv_set_msgfile -- + * __env_set_msgfile -- * {DB_ENV,DB}->set_msgfile. * - * PUBLIC: void __dbenv_set_msgfile __P((DB_ENV *, FILE *)); + * PUBLIC: void __env_set_msgfile __P((DB_ENV *, FILE *)); */ void -__dbenv_set_msgfile(dbenv, msgfile) +__env_set_msgfile(dbenv, msgfile) DB_ENV *dbenv; FILE *msgfile; { @@ -809,13 +1019,13 @@ __dbenv_set_msgfile(dbenv, msgfile) } /* - * __dbenv_set_paniccall -- + * __env_set_paniccall -- * {DB_ENV,DB}->set_paniccall. * - * PUBLIC: int __dbenv_set_paniccall __P((DB_ENV *, void (*)(DB_ENV *, int))); + * PUBLIC: int __env_set_paniccall __P((DB_ENV *, void (*)(DB_ENV *, int))); */ int -__dbenv_set_paniccall(dbenv, paniccall) +__env_set_paniccall(dbenv, paniccall) DB_ENV *dbenv; void (*paniccall) __P((DB_ENV *, int)); { @@ -823,58 +1033,47 @@ __dbenv_set_paniccall(dbenv, paniccall) return (0); } -static int -__dbenv_get_shm_key(dbenv, shm_keyp) - DB_ENV *dbenv; - long *shm_keyp; /* !!!: really a key_t *. */ -{ - *shm_keyp = dbenv->shm_key; - return (0); -} - /* - * __dbenv_set_shm_key -- - * DB_ENV->set_shm_key. - * - * PUBLIC: int __dbenv_set_shm_key __P((DB_ENV *, long)); + * __env_set_event_notify -- + * DB_ENV->set_event_notify. */ -int -__dbenv_set_shm_key(dbenv, shm_key) +static int +__env_set_event_notify(dbenv, event_func) DB_ENV *dbenv; - long shm_key; /* !!!: really a key_t. */ + void (*event_func) __P((DB_ENV *, u_int32_t, void *)); { - ENV_ILLEGAL_AFTER_OPEN(dbenv, "DB_ENV->set_shm_key"); - - dbenv->shm_key = shm_key; + dbenv->db_event_func = event_func; return (0); } static int -__dbenv_get_tas_spins(dbenv, tas_spinsp) +__env_get_shm_key(dbenv, shm_keyp) DB_ENV *dbenv; - u_int32_t *tas_spinsp; + long *shm_keyp; /* !!!: really a key_t *. */ { - *tas_spinsp = dbenv->tas_spins; + *shm_keyp = dbenv->shm_key; return (0); } /* - * __dbenv_set_tas_spins -- - * DB_ENV->set_tas_spins. + * __env_set_shm_key -- + * DB_ENV->set_shm_key. * - * PUBLIC: int __dbenv_set_tas_spins __P((DB_ENV *, u_int32_t)); + * PUBLIC: int __env_set_shm_key __P((DB_ENV *, long)); */ int -__dbenv_set_tas_spins(dbenv, tas_spins) +__env_set_shm_key(dbenv, shm_key) DB_ENV *dbenv; - u_int32_t tas_spins; + long shm_key; /* !!!: really a key_t. */ { - dbenv->tas_spins = tas_spins; + ENV_ILLEGAL_AFTER_OPEN(dbenv, "DB_ENV->set_shm_key"); + + dbenv->shm_key = shm_key; return (0); } static int -__dbenv_get_tmp_dir(dbenv, dirp) +__env_get_tmp_dir(dbenv, dirp) DB_ENV *dbenv; const char **dirp; { @@ -883,13 +1082,13 @@ __dbenv_get_tmp_dir(dbenv, dirp) } /* - * __dbenv_set_tmp_dir -- + * __env_set_tmp_dir -- * DB_ENV->set_tmp_dir. * - * PUBLIC: int __dbenv_set_tmp_dir __P((DB_ENV *, const char *)); + * PUBLIC: int __env_set_tmp_dir __P((DB_ENV *, const char *)); */ int -__dbenv_set_tmp_dir(dbenv, dir) +__env_set_tmp_dir(dbenv, dir) DB_ENV *dbenv; const char *dir; { @@ -899,7 +1098,7 @@ __dbenv_set_tmp_dir(dbenv, dir) } static int -__dbenv_get_verbose(dbenv, which, onoffp) +__env_get_verbose(dbenv, which, onoffp) DB_ENV *dbenv; u_int32_t which; int *onoffp; @@ -907,6 +1106,7 @@ __dbenv_get_verbose(dbenv, which, onoffp) switch (which) { case DB_VERB_DEADLOCK: case DB_VERB_RECOVERY: + case DB_VERB_REGISTER: case DB_VERB_REPLICATION: case DB_VERB_WAITSFOR: *onoffp = FLD_ISSET(dbenv->verbose, which) ? 1 : 0; @@ -918,13 +1118,13 @@ __dbenv_get_verbose(dbenv, which, onoffp) } /* - * __dbenv_set_verbose -- + * __env_set_verbose -- * DB_ENV->set_verbose. * - * PUBLIC: int __dbenv_set_verbose __P((DB_ENV *, u_int32_t, int)); + * PUBLIC: int __env_set_verbose __P((DB_ENV *, u_int32_t, int)); */ int -__dbenv_set_verbose(dbenv, which, on) +__env_set_verbose(dbenv, which, on) DB_ENV *dbenv; u_int32_t which; int on; @@ -932,6 +1132,7 @@ __dbenv_set_verbose(dbenv, which, on) switch (which) { case DB_VERB_DEADLOCK: case DB_VERB_RECOVERY: + case DB_VERB_REGISTER: case DB_VERB_REPLICATION: case DB_VERB_WAITSFOR: if (on) @@ -956,7 +1157,7 @@ __db_mi_env(dbenv, name) DB_ENV *dbenv; const char *name; { - __db_err(dbenv, "%s: method not permitted when environment specified", + __db_errx(dbenv, "%s: method not permitted when environment specified", name); return (EINVAL); } @@ -973,7 +1174,7 @@ __db_mi_open(dbenv, name, after) const char *name; int after; { - __db_err(dbenv, "%s: method not permitted %s handle's open method", + __db_errx(dbenv, "%s: method not permitted %s handle's open method", name, after ? "after" : "before"); return (EINVAL); } @@ -1012,14 +1213,14 @@ __db_env_config(dbenv, i, flags) sub = "<unspecified>"; break; } - __db_err(dbenv, + __db_errx(dbenv, "%s interface requires an environment configured for the %s subsystem", i, sub); return (EINVAL); } static int -__dbenv_set_rpc_server_noclnt(dbenv, cl, host, tsec, ssec, flags) +__env_set_rpc_server(dbenv, cl, host, tsec, ssec, flags) DB_ENV *dbenv; void *cl; const char *host; @@ -1032,7 +1233,21 @@ __dbenv_set_rpc_server_noclnt(dbenv, cl, host, tsec, ssec, flags) COMPQUIET(ssec, 0); COMPQUIET(flags, 0); - __db_err(dbenv, - "set_rpc_server method not permitted in non-RPC environment"); + __db_errx(dbenv, "Berkeley DB was not configured for RPC support"); return (DB_OPNOTSUP); } + +#ifndef HAVE_REPLICATION_THREADS +/* + * __db_norepmgr -- + * Error when a Berkeley DB build doesn't include the replication manager. + */ +static int +__db_norepmgr(dbenv) + DB_ENV *dbenv; +{ + __db_errx(dbenv, + "Berkeley DB library build did not include replication manager support"); + return (DB_OPNOTSUP); +} +#endif diff --git a/db/env/env_open.c b/db/env/env_open.c index 0fbce72d0..61e284e5e 100644 --- a/db/env/env_open.c +++ b/db/env/env_open.c @@ -1,41 +1,25 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 1996-2006 + * Oracle Corporation. All rights reserved. * - * $Id: env_open.c,v 11.177 2004/07/17 18:55:08 ubell Exp $ + * $Id: env_open.c,v 12.71 2006/08/24 14:45:39 bostic Exp $ */ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <ctype.h> -#include <limits.h> -#include <stdlib.h> -#include <string.h> -#endif - #include "db_int.h" #include "dbinc/crypto.h" #include "dbinc/db_page.h" -#include "dbinc/db_shash.h" #include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/fop.h" #include "dbinc/lock.h" #include "dbinc/log.h" #include "dbinc/mp.h" -#include "dbinc/qam.h" #include "dbinc/txn.h" -static int __db_parse __P((DB_ENV *, char *)); static int __db_tmp_open __P((DB_ENV *, u_int32_t, char *, DB_FH **)); -static int __dbenv_config __P((DB_ENV *, const char *, u_int32_t)); -static int __dbenv_refresh __P((DB_ENV *, u_int32_t, int)); -static int __dbenv_remove_int __P((DB_ENV *, const char *, u_int32_t)); +static int __env_refresh __P((DB_ENV *, u_int32_t, int)); /* * db_version -- @@ -57,117 +41,195 @@ db_version(majverp, minverp, patchp) } /* - * __dbenv_open -- - * DB_ENV->open. + * __env_open_pp -- + * DB_ENV->open pre/post processing. * - * PUBLIC: int __dbenv_open __P((DB_ENV *, const char *, u_int32_t, int)); + * PUBLIC: int __env_open_pp __P((DB_ENV *, const char *, u_int32_t, int)); */ int -__dbenv_open(dbenv, db_home, flags, mode) +__env_open_pp(dbenv, db_home, flags, mode) DB_ENV *dbenv; const char *db_home; u_int32_t flags; int mode; { - DB_MPOOL *dbmp; - u_int32_t init_flags, orig_flags; - int rep_check, ret; - - orig_flags = dbenv->flags; - rep_check = 0; + int ret; #undef OKFLAGS #define OKFLAGS \ (DB_CREATE | DB_INIT_CDB | DB_INIT_LOCK | DB_INIT_LOG | \ - DB_INIT_MPOOL | DB_INIT_REP | DB_INIT_TXN | DB_JOINENV | \ - DB_LOCKDOWN | DB_PRIVATE | DB_RECOVER | DB_RECOVER_FATAL | \ - DB_SYSTEM_MEM | DB_THREAD | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT) + DB_INIT_MPOOL | DB_INIT_REP | DB_INIT_TXN | DB_LOCKDOWN | \ + DB_PRIVATE | DB_RECOVER | DB_RECOVER_FATAL | DB_REGISTER | \ + DB_SYSTEM_MEM | DB_THREAD | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT) #undef OKFLAGS_CDB #define OKFLAGS_CDB \ (DB_CREATE | DB_INIT_CDB | DB_INIT_MPOOL | DB_LOCKDOWN | \ DB_PRIVATE | DB_SYSTEM_MEM | DB_THREAD | \ DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT) - /* - * Flags saved in the init_flags field of the environment, representing - * flags to DB_ENV->set_flags and DB_ENV->open that need to be set. - */ -#define DB_INITENV_CDB 0x0001 /* DB_INIT_CDB */ -#define DB_INITENV_CDB_ALLDB 0x0002 /* DB_INIT_CDB_ALLDB */ -#define DB_INITENV_LOCK 0x0004 /* DB_INIT_LOCK */ -#define DB_INITENV_LOG 0x0008 /* DB_INIT_LOG */ -#define DB_INITENV_MPOOL 0x0010 /* DB_INIT_MPOOL */ -#define DB_INITENV_REP 0x0020 /* DB_INIT_REP */ -#define DB_INITENV_TXN 0x0040 /* DB_INIT_TXN */ - if ((ret = __db_fchk(dbenv, "DB_ENV->open", flags, OKFLAGS)) != 0) return (ret); - if (LF_ISSET(DB_INIT_CDB) && - (ret = __db_fchk(dbenv, "DB_ENV->open", flags, OKFLAGS_CDB)) != 0) + if ((ret = __db_fcchk( + dbenv, "DB_ENV->open", flags, DB_INIT_CDB, ~OKFLAGS_CDB)) != 0) return (ret); - if ((ret = __db_fcchk(dbenv, - "DB_ENV->open", flags, DB_PRIVATE, DB_SYSTEM_MEM)) != 0) - return (ret); - if ((ret = __db_fcchk(dbenv, - "DB_ENV->open", flags, DB_RECOVER, DB_RECOVER_FATAL)) != 0) - return (ret); - if ((ret = __db_fcchk(dbenv, "DB_ENV->open", flags, DB_JOINENV, - DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | - DB_INIT_REP | DB_INIT_TXN | - DB_PRIVATE | DB_RECOVER | DB_RECOVER_FATAL)) != 0) - return (ret); - if (LF_ISSET(DB_INIT_REP) && !LF_ISSET(DB_INIT_TXN)) { - __db_err(dbenv, "Replication requires transaction support"); - return (EINVAL); + if (LF_ISSET(DB_REGISTER)) { + if (!__os_support_db_register()) { + __db_errx(dbenv, + "Berkeley DB library does not support DB_REGISTER on this system"); + return (EINVAL); + } + if ((ret = __db_fcchk(dbenv, "DB_ENV->open", flags, + DB_PRIVATE, DB_REGISTER | DB_SYSTEM_MEM)) != 0) + return (ret); + if (!LF_ISSET(DB_INIT_TXN)) { + __db_errx( + dbenv, "registration requires transaction support"); + return (EINVAL); + } } - if (LF_ISSET(DB_INIT_REP) && !LF_ISSET(DB_INIT_LOCK)) { - __db_err(dbenv, "Replication requires locking support"); - return (EINVAL); + if (LF_ISSET(DB_INIT_REP)) { + if (!__os_support_replication()) { + __db_errx(dbenv, + "Berkeley DB library does not support replication on this system"); + return (EINVAL); + } + if (!LF_ISSET(DB_INIT_LOCK)) { + __db_errx(dbenv, + "replication requires locking support"); + return (EINVAL); + } + if (!LF_ISSET(DB_INIT_TXN)) { + __db_errx( + dbenv, "replication requires transaction support"); + return (EINVAL); + } } - if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && !LF_ISSET(DB_INIT_TXN)) { - __db_err(dbenv, "Recovery requires transaction support"); - return (EINVAL); + if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) { + if ((ret = __db_fcchk(dbenv, + "DB_ENV->open", flags, DB_RECOVER, DB_RECOVER_FATAL)) != 0) + return (ret); + if ((ret = __db_fcchk(dbenv, + "DB_ENV->open", flags, DB_REGISTER, DB_RECOVER_FATAL)) != 0) + return (ret); + if (!LF_ISSET(DB_CREATE)) { + __db_errx(dbenv, "recovery requires the create flag"); + return (EINVAL); + } + if (!LF_ISSET(DB_INIT_TXN)) { + __db_errx( + dbenv, "recovery requires transaction support"); + return (EINVAL); + } } +#ifdef HAVE_MUTEX_THREAD_ONLY /* * Currently we support one kind of mutex that is intra-process only, * POSIX 1003.1 pthreads, because a variety of systems don't support * the full pthreads API, and our only alternative is test-and-set. */ -#ifdef HAVE_MUTEX_THREAD_ONLY if (!LF_ISSET(DB_PRIVATE)) { - __db_err(dbenv, - "Berkeley DB library configured to support only private environments"); + __db_errx(dbenv, + "Berkeley DB library configured to support only private environments"); return (EINVAL); } #endif +#ifdef HAVE_MUTEX_FCNTL /* - * If we're doing recovery, destroy the environment so that we create - * all the regions from scratch. I'd like to reuse already created - * regions, but that's hard. We would have to create the environment - * region from scratch, at least, as we have no way of knowing if its - * linked lists are corrupted. + * !!! + * We need a file descriptor for fcntl(2) locking. We use the file + * handle from the REGENV file for this purpose. * - * I suppose we could set flags while modifying those links, but that - * is going to be difficult to get right. The major concern I have - * is if the application stomps the environment with a rogue pointer. - * We have no way of detecting that, and we could be forced into a - * situation where we start up and then crash, repeatedly. + * Since we may be using shared memory regions, e.g., shmget(2), and + * not a mapped-in regular file, the backing file may be only a few + * bytes in length. So, this depends on the ability to call fcntl to + * lock file offsets much larger than the actual physical file. I + * think that's safe -- besides, very few systems actually need this + * kind of support, SunOS is the only one still in wide use of which + * I'm aware. * - * Note that we do not check any flags like DB_PRIVATE before calling - * remove. We don't care if the current environment was private or - * not, we just want to nail any files that are left-over for whatever - * reason, from whatever session. + * The error case is if an application lacks spinlocks and wants to be + * threaded. That doesn't work because fcntl will lock the underlying + * process, including all its threads. */ - if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) - if ((ret = __dbenv_remove_int(dbenv, db_home, DB_FORCE)) != 0 || - (ret = __dbenv_refresh(dbenv, orig_flags, 0)) != 0) - return (ret); + if (F_ISSET(dbenv, DB_ENV_THREAD)) { + __db_errx(dbenv, + "architecture lacks fast mutexes: applications cannot be threaded"); + return (EINVAL); + } +#endif - /* Initialize the DB_ENV structure. */ - if ((ret = __dbenv_config(dbenv, db_home, flags)) != 0) - goto err; + return (__env_open(dbenv, db_home, flags, mode)); +} + +/* + * __env_open -- + * DB_ENV->open. + * + * PUBLIC: int __env_open __P((DB_ENV *, const char *, u_int32_t, int)); + */ +int +__env_open(dbenv, db_home, flags, mode) + DB_ENV *dbenv; + const char *db_home; + u_int32_t flags; + int mode; +{ + DB_THREAD_INFO *ip; + REGINFO *infop; + u_int32_t init_flags, orig_flags; + int register_recovery, rep_check, ret, t_ret; + + ip = NULL; + register_recovery = rep_check = 0; + + /* Initial configuration. */ + if ((ret = __env_config(dbenv, db_home, flags, mode)) != 0) + return (ret); + + /* + * Save the DB_ENV handle's configuration flags as set by user-called + * configuration methods and the environment directory's DB_CONFIG + * file. If we use this DB_ENV structure to recover the existing + * environment or to remove an environment we created after failure, + * we'll restore the DB_ENV flags to these values. + */ + orig_flags = dbenv->flags; + + /* + * If we're going to register with the environment, that's the first + * thing we do. + */ + if (LF_ISSET(DB_REGISTER)) { + if ((ret = __envreg_register(dbenv, ®ister_recovery)) != 0) + goto err; + if (register_recovery) { + if (!LF_ISSET(DB_RECOVER)) { + __db_errx(dbenv, + "The DB_RECOVER flag was not specified, and recovery is needed"); + ret = DB_RUNRECOVERY; + goto err; + } + } else + LF_CLR(DB_RECOVER); + } + + /* + * If we're doing recovery, destroy the environment so that we create + * all the regions from scratch. The major concern I have is if the + * application stomps the environment with a rogue pointer. We have + * no way of detecting that, and we could be forced into a situation + * where we start up and then crash, repeatedly. + * + * We do not check any flags like DB_PRIVATE before calling remove. + * We don't care if the current environment was private or not, we + * want to remove files left over for any reason, from any session. + */ + if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) + if ((ret = __db_e_remove(dbenv, DB_FORCE)) != 0 || + (ret = __env_refresh(dbenv, orig_flags, 0)) != 0) + goto err; /* Convert the DB_ENV->open flags to internal flags. */ if (LF_ISSET(DB_CREATE)) @@ -183,17 +245,29 @@ __dbenv_open(dbenv, db_home, flags, mode) if (LF_ISSET(DB_THREAD)) F_SET(dbenv, DB_ENV_THREAD); - /* Default permissions are read-write for both owner and group. */ - dbenv->db_mode = mode == 0 ? __db_omode("rwrw--") : mode; + /* + * Flags saved in the init_flags field of the environment, representing + * flags to DB_ENV->set_flags and DB_ENV->open that need to be set. + */ +#define DB_INITENV_CDB 0x0001 /* DB_INIT_CDB */ +#define DB_INITENV_CDB_ALLDB 0x0002 /* DB_INIT_CDB_ALLDB */ +#define DB_INITENV_LOCK 0x0004 /* DB_INIT_LOCK */ +#define DB_INITENV_LOG 0x0008 /* DB_INIT_LOG */ +#define DB_INITENV_MPOOL 0x0010 /* DB_INIT_MPOOL */ +#define DB_INITENV_REP 0x0020 /* DB_INIT_REP */ +#define DB_INITENV_TXN 0x0040 /* DB_INIT_TXN */ /* - * Create/join the environment. We pass in the flags that will be of - * interest to an environment joining later; if we're not the ones to - * do the create, we pull out whatever has been stored. + * Create/join the environment. We pass in the flags of interest to + * a thread subsequently joining an environment we create. If we're + * not the ones to create the environment, our flags will be updated + * to match the existing environment. */ init_flags = 0; if (LF_ISSET(DB_INIT_CDB)) FLD_SET(init_flags, DB_INITENV_CDB); + if (F_ISSET(dbenv, DB_ENV_CDB_ALLDB)) + FLD_SET(init_flags, DB_INITENV_CDB_ALLDB); if (LF_ISSET(DB_INIT_LOCK)) FLD_SET(init_flags, DB_INITENV_LOCK); if (LF_ISSET(DB_INIT_LOG)) @@ -204,8 +278,6 @@ __dbenv_open(dbenv, db_home, flags, mode) FLD_SET(init_flags, DB_INITENV_REP); if (LF_ISSET(DB_INIT_TXN)) FLD_SET(init_flags, DB_INITENV_TXN); - if (F_ISSET(dbenv, DB_ENV_CDB_ALLDB)) - FLD_SET(init_flags, DB_INITENV_CDB_ALLDB); if ((ret = __db_e_attach(dbenv, &init_flags)) != 0) goto err; @@ -213,45 +285,29 @@ __dbenv_open(dbenv, db_home, flags, mode) * __db_e_attach will return the saved init_flags field, which contains * the DB_INIT_* flags used when the environment was created. * - * Check if existing environment flags conflict with our flags. + * We may be joining an environment -- reset our flags to match the + * ones in the environment. */ - if (LF_ISSET(DB_INIT_CDB) && FLD_ISSET(init_flags, DB_INITENV_TXN)) { - __db_err(dbenv, - "Concurrent Data Store incompatible with environment"); - ret = EINVAL; - goto err; - } - if (LF_ISSET(DB_INIT_TXN) && FLD_ISSET(init_flags, DB_INITENV_CDB)) { - __db_err(dbenv, - "Transactional Data Store incompatible with environment"); - ret = EINVAL; + if (FLD_ISSET(init_flags, DB_INITENV_CDB)) + LF_SET(DB_INIT_CDB); + if (FLD_ISSET(init_flags, DB_INITENV_LOCK)) + LF_SET(DB_INIT_LOCK); + if (FLD_ISSET(init_flags, DB_INITENV_LOG)) + LF_SET(DB_INIT_LOG); + if (FLD_ISSET(init_flags, DB_INITENV_MPOOL)) + LF_SET(DB_INIT_MPOOL); + if (FLD_ISSET(init_flags, DB_INITENV_REP)) + LF_SET(DB_INIT_REP); + if (FLD_ISSET(init_flags, DB_INITENV_TXN)) + LF_SET(DB_INIT_TXN); + if (FLD_ISSET(init_flags, DB_INITENV_CDB_ALLDB) && + (ret = __env_set_flags(dbenv, DB_CDB_ALLDB, 1)) != 0) goto err; - } - - /* If we're joining the environment, find out what we're joining. */ - if (LF_ISSET(DB_JOINENV)) { - LF_CLR(DB_JOINENV); - if (FLD_ISSET(init_flags, DB_INITENV_CDB)) - LF_SET(DB_INIT_CDB); - if (FLD_ISSET(init_flags, DB_INITENV_LOCK)) - LF_SET(DB_INIT_LOCK); - if (FLD_ISSET(init_flags, DB_INITENV_LOG)) - LF_SET(DB_INIT_LOG); - if (FLD_ISSET(init_flags, DB_INITENV_MPOOL)) - LF_SET(DB_INIT_MPOOL); - if (FLD_ISSET(init_flags, DB_INITENV_REP)) - LF_SET(DB_INIT_REP); - if (FLD_ISSET(init_flags, DB_INITENV_TXN)) - LF_SET(DB_INIT_TXN); - if (FLD_ISSET(init_flags, DB_INITENV_CDB_ALLDB) && - (ret = __dbenv_set_flags(dbenv, DB_CDB_ALLDB, 1)) != 0) - goto err; - } /* - * Save the flags passed to create the DB_ENV->open, that is, we've - * now replaced flags like DB_JOINENV with the flags responsible for - * the underlying set of subsystems. + * Save the flags matching the database environment: we'll replace + * the argument flags with the flags corresponding to the existing, + * underlying set of subsystems. */ dbenv->open_flags = flags; @@ -262,17 +318,36 @@ __dbenv_open(dbenv, db_home, flags, mode) } /* + * The DB_ENV structure has now been initialized. Turn off further + * use of the DB_ENV structure and most initialization methods, we're + * about to act on the values we currently have. + */ + F_SET(dbenv, DB_ENV_OPEN_CALLED); + + /* * Initialize the subsystems. * - * Initialize the replication area first, so that we can lock out this + * Initialize the mutex regions first. There's no ordering requirement, + * but it's simpler to get this in place so we don't have to keep track + * of mutexes for later allocation, once the mutex region is created we + * can go ahead and do the allocation for real. + */ + if ((ret = __mutex_open(dbenv)) != 0) + goto err; + + /* __mutex_open creates the thread info region, enter it now. */ + ENV_ENTER(dbenv, ip); + + /* + * Initialize the replication area next, so that we can lock out this * call if we're currently running recovery for replication. */ if (LF_ISSET(DB_INIT_REP) && (ret = __rep_open(dbenv)) != 0) goto err; rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0; - if (rep_check) - __env_rep_enter(dbenv); + if (rep_check && (ret = __env_rep_enter(dbenv, 0)) != 0) + goto err; if (LF_ISSET(DB_INIT_MPOOL)) if ((ret = __memp_open(dbenv)) != 0) @@ -310,29 +385,7 @@ __dbenv_open(dbenv, db_home, flags, mode) * If the application is running with transactions, initialize * the function tables. */ - if ((ret = __bam_init_recover(dbenv, &dbenv->recover_dtab, - &dbenv->recover_dtab_size)) != 0) - goto err; - if ((ret = __crdel_init_recover(dbenv, &dbenv->recover_dtab, - &dbenv->recover_dtab_size)) != 0) - goto err; - if ((ret = __db_init_recover(dbenv, &dbenv->recover_dtab, - &dbenv->recover_dtab_size)) != 0) - goto err; - if ((ret = __dbreg_init_recover(dbenv, &dbenv->recover_dtab, - &dbenv->recover_dtab_size)) != 0) - goto err; - if ((ret = __fop_init_recover(dbenv, &dbenv->recover_dtab, - &dbenv->recover_dtab_size)) != 0) - goto err; - if ((ret = __ham_init_recover(dbenv, &dbenv->recover_dtab, - &dbenv->recover_dtab_size)) != 0) - goto err; - if ((ret = __qam_init_recover(dbenv, &dbenv->recover_dtab, - &dbenv->recover_dtab_size)) != 0) - goto err; - if ((ret = __txn_init_recover(dbenv, &dbenv->recover_dtab, - &dbenv->recover_dtab_size)) != 0) + if ((ret = __env_init_rec(dbenv, DB_LOGVERSION)) != 0) goto err; } @@ -343,7 +396,7 @@ __dbenv_open(dbenv, db_home, flags, mode) * mpool wasn't initialized, then we can't ever open a DB handle. * * We also need to initialize the MT mutex as necessary, so do them - * both. If we error, __dbenv_refresh() will clean up. + * both. * * !!! * This must come after the __memp_open call above because if we are @@ -351,23 +404,19 @@ __dbenv_open(dbenv, db_home, flags, mode) * region for environments and db handles. So, the mpool region must * already be initialized. */ - LIST_INIT(&dbenv->dblist); + TAILQ_INIT(&dbenv->dblist); if (LF_ISSET(DB_INIT_MPOOL)) { - dbmp = dbenv->mp_handle; - if (F_ISSET(dbenv, DB_ENV_THREAD)) { - if ((ret = __db_mutex_setup( - dbenv, dbmp->reginfo, &dbenv->dblist_mutexp, - MUTEX_ALLOC | MUTEX_THREAD)) != 0) - goto err; - if ((ret = __db_mutex_setup( - dbenv, dbmp->reginfo, &dbenv->mt_mutexp, - MUTEX_ALLOC | MUTEX_THREAD)) != 0) - goto err; - } + if ((ret = __mutex_alloc(dbenv, MTX_ENV_DBLIST, + DB_MUTEX_PROCESS_ONLY, &dbenv->mtx_dblist)) != 0) + goto err; + if ((ret = __mutex_alloc(dbenv, MTX_TWISTER, + DB_MUTEX_PROCESS_ONLY, &dbenv->mtx_mt)) != 0) + goto err; + /* Register DB's pgin/pgout functions. */ if ((ret = __memp_register( dbenv, DB_FTYPE_SET, __db_pgin, __db_pgout)) != 0) - return (ret); + goto err; } /* Perform recovery for any previous run. */ @@ -386,47 +435,68 @@ __dbenv_open(dbenv, db_home, flags, mode) * transaction ID and logs the reset if that's appropriate, so we * don't need to do anything here in the recover case. */ + infop = dbenv->reginfo; if (TXN_ON(dbenv) && !F_ISSET(dbenv, DB_ENV_LOG_INMEMORY) && - F_ISSET((REGINFO *)dbenv->reginfo, REGION_CREATE) && + F_ISSET(infop, REGION_CREATE) && !LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && (ret = __txn_reset(dbenv)) != 0) goto err; + /* The database environment is ready for business. */ + if ((ret = __db_e_golive(dbenv)) != 0) + goto err; + if (rep_check) - __env_db_rep_exit(dbenv); + ret = __env_db_rep_exit(dbenv); - return (0); +err: ENV_LEAVE(dbenv, ip); -err: /* - * If we fail after creating the regions, remove them. - * - * !!! - * No need to call __env_db_rep_exit, that work is done by the calls to - * __dbenv_refresh. - */ - if (dbenv->reginfo != NULL && - F_ISSET((REGINFO *)dbenv->reginfo, REGION_CREATE)) { - ret = __db_panic(dbenv, ret); + if (ret != 0) { + /* + * If we fail after creating the regions, panic and remove them. + * + * !!! + * No need to call __env_db_rep_exit, that work is done by the + * calls to __env_refresh. + */ + infop = dbenv->reginfo; + if (infop != NULL && F_ISSET(infop, REGION_CREATE)) { + ret = __db_panic(dbenv, ret); + + /* Refresh the DB_ENV so can use it to call remove. */ + (void)__env_refresh(dbenv, orig_flags, rep_check); + (void)__db_e_remove(dbenv, DB_FORCE); + (void)__env_refresh(dbenv, orig_flags, 0); + } else + (void)__env_refresh(dbenv, orig_flags, rep_check); + } - /* Refresh the DB_ENV so we can use it to call remove. */ - (void)__dbenv_refresh(dbenv, orig_flags, rep_check); - (void)__dbenv_remove_int(dbenv, db_home, DB_FORCE); - (void)__dbenv_refresh(dbenv, orig_flags, 0); - } else - (void)__dbenv_refresh(dbenv, orig_flags, rep_check); + if (register_recovery) { + /* + * If recovery succeeded, release our exclusive lock, other + * processes can now proceed. + * + * If recovery failed, unregister now and let another process + * clean up. + */ + if (ret == 0 && (t_ret = __envreg_xunlock(dbenv)) != 0) + ret = t_ret; + if (ret != 0) + (void)__envreg_unregister(dbenv, 1); + } return (ret); } /* - * __dbenv_remove -- + * __env_remove -- * DB_ENV->remove. * - * PUBLIC: int __dbenv_remove __P((DB_ENV *, const char *, u_int32_t)); + * PUBLIC: int __env_remove __P((DB_ENV *, const char *, u_int32_t)); */ int -__dbenv_remove(dbenv, db_home, flags) +__env_remove(dbenv, db_home, flags) DB_ENV *dbenv; const char *db_home; u_int32_t flags; @@ -443,87 +513,62 @@ __dbenv_remove(dbenv, db_home, flags) ENV_ILLEGAL_AFTER_OPEN(dbenv, "DB_ENV->remove"); - ret = __dbenv_remove_int(dbenv, db_home, flags); + if ((ret = __env_config(dbenv, db_home, flags, 0)) != 0) + return (ret); + + ret = __db_e_remove(dbenv, flags); - if ((t_ret = __dbenv_close(dbenv, 0)) != 0 && ret == 0) + if ((t_ret = __env_close(dbenv, 0)) != 0 && ret == 0) ret = t_ret; return (ret); } /* - * __dbenv_remove_int -- - * Discard an environment, internal version. - */ -static int -__dbenv_remove_int(dbenv, db_home, flags) - DB_ENV *dbenv; - const char *db_home; - u_int32_t flags; -{ - int ret; - - /* Initialize the DB_ENV structure. */ - if ((ret = __dbenv_config(dbenv, db_home, flags)) != 0) - return (ret); - - /* Remove the environment. */ - return (__db_e_remove(dbenv, flags)); -} - -/* - * __dbenv_config -- - * Minor initialization of the DB_ENV structure, read the DB_CONFIG file. + * __env_config -- + * Argument-based initialization. + * + * PUBLIC: int __env_config __P((DB_ENV *, const char *, u_int32_t, int)); */ -static int -__dbenv_config(dbenv, db_home, flags) +int +__env_config(dbenv, db_home, flags, mode) DB_ENV *dbenv; const char *db_home; u_int32_t flags; + int mode; { - FILE *fp; int ret; - char *p, buf[256]; + char *home, home_buf[DB_MAXPATHLEN]; /* - * Set the database home. Do this before calling __db_appname, - * it uses the home directory. + * Set the database home. + * + * Use db_home by default, this allows utilities to reasonably + * override the environment either explicitly or by using a -h + * option. Otherwise, use the environment if it's permitted + * and initialized. */ - if ((ret = __db_home(dbenv, db_home, flags)) != 0) - return (ret); - - /* Parse the config file. */ - p = NULL; - if ((ret = - __db_appname(dbenv, DB_APP_NONE, "DB_CONFIG", 0, NULL, &p)) != 0) - return (ret); - if (p == NULL) - fp = NULL; - else { - fp = fopen(p, "r"); - __os_free(dbenv, p); + home = (char *)db_home; + if (home == NULL && (LF_ISSET(DB_USE_ENVIRON) || + (LF_ISSET(DB_USE_ENVIRON_ROOT) && __os_isroot()))) { + home = home_buf; + if ((ret = __os_getenv( + dbenv, "DB_HOME", &home, sizeof(home_buf))) != 0) + return (ret); + /* + * home set to NULL if __os_getenv failed to find DB_HOME. + */ } + if (home != NULL && + (ret = __os_strdup(dbenv, home, &dbenv->db_home)) != 0) + return (ret); - if (fp != NULL) { - while (fgets(buf, sizeof(buf), fp) != NULL) { - if ((p = strchr(buf, '\n')) != NULL) - *p = '\0'; - else if (strlen(buf) + 1 == sizeof(buf)) { - __db_err(dbenv, "DB_CONFIG: line too long"); - (void)fclose(fp); - return (EINVAL); - } - if (buf[0] == '\0' || - buf[0] == '#' || isspace((int)buf[0])) - continue; + /* Default permissions are read-write for both owner and group. */ + dbenv->db_mode = mode == 0 ? __db_omode("rw-rw----") : mode; - if ((ret = __db_parse(dbenv, buf)) != 0) { - (void)fclose(fp); - return (ret); - } - } - (void)fclose(fp); - } + /* Read the DB_CONFIG file. */ + if ((ret = __env_read_db_config(dbenv)) != 0) + return (ret); /* * If no temporary directory path was specified in the config file, @@ -532,29 +577,28 @@ __dbenv_config(dbenv, db_home, flags) if (dbenv->db_tmp_dir == NULL && (ret = __os_tmpdir(dbenv, flags)) != 0) return (ret); - /* Flag that the DB_ENV structure has been initialized. */ - F_SET(dbenv, DB_ENV_OPEN_CALLED); - return (0); } /* - * __dbenv_close_pp -- + * __env_close_pp -- * DB_ENV->close pre/post processor. * - * PUBLIC: int __dbenv_close_pp __P((DB_ENV *, u_int32_t)); + * PUBLIC: int __env_close_pp __P((DB_ENV *, u_int32_t)); */ int -__dbenv_close_pp(dbenv, flags) +__env_close_pp(dbenv, flags) DB_ENV *dbenv; u_int32_t flags; { + DB_THREAD_INFO *ip; int rep_check, ret, t_ret; ret = 0; PANIC_CHECK(dbenv); + ENV_ENTER(dbenv, ip); /* * Validate arguments, but as a DB_ENV handle destructor, we can't * fail. @@ -564,23 +608,36 @@ __dbenv_close_pp(dbenv, flags) ret = t_ret; rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0; - if (rep_check) - __env_rep_enter(dbenv); + if (rep_check) { +#ifdef HAVE_REPLICATION_THREADS + /* + * Shut down Replication Manager threads first of all. This + * must be done before __env_rep_enter to avoid a deadlock that + * could occur if repmgr's background threads try to do a rep + * operation that needs __rep_lockout. + */ + if ((t_ret = __repmgr_close(dbenv)) != 0 && ret == 0) + ret = t_ret; +#endif + if ((t_ret = __env_rep_enter(dbenv, 0)) != 0 && ret == 0) + ret = t_ret; + } - if ((t_ret = __dbenv_close(dbenv, rep_check)) != 0 && ret == 0) + if ((t_ret = __env_close(dbenv, rep_check)) != 0 && ret == 0) ret = t_ret; + /* Don't ENV_LEAVE as we have already detached from the region. */ return (ret); } /* - * __dbenv_close -- + * __env_close -- * DB_ENV->close. * - * PUBLIC: int __dbenv_close __P((DB_ENV *, int)); + * PUBLIC: int __env_close __P((DB_ENV *, int)); */ int -__dbenv_close(dbenv, rep_check) +__env_close(dbenv, rep_check) DB_ENV *dbenv; int rep_check; { @@ -597,22 +654,16 @@ __dbenv_close(dbenv, rep_check) if (TXN_ON(dbenv) && (t_ret = __txn_preclose(dbenv)) != 0 && ret == 0) ret = t_ret; - if (REP_ON(dbenv) && - (t_ret = __rep_preclose(dbenv, 1)) != 0 && ret == 0) +#ifdef HAVE_REPLICATION + if ((t_ret = __rep_close(dbenv)) != 0 && ret == 0) ret = t_ret; +#endif /* * Detach from the regions and undo the allocations done by * DB_ENV->open. */ - if ((t_ret = __dbenv_refresh(dbenv, 0, rep_check)) != 0 && ret == 0) - ret = t_ret; - - /* Do per-subsystem close. */ - if ((t_ret = __lock_dbenv_close(dbenv)) != 0 && ret == 0) - ret = t_ret; - - if ((t_ret = __rep_dbenv_close(dbenv)) != 0 && ret == 0) + if ((t_ret = __env_refresh(dbenv, 0, rep_check)) != 0 && ret == 0) ret = t_ret; #ifdef HAVE_CRYPTO @@ -623,54 +674,66 @@ __dbenv_close(dbenv, rep_check) if ((t_ret = __crypto_dbenv_close(dbenv)) != 0 && ret == 0) ret = t_ret; #endif + /* If we're registered, clean up. */ + if (dbenv->registry != NULL) { + (void)__envreg_unregister(dbenv, 0); + dbenv->registry = NULL; + } /* Release any string-based configuration parameters we've copied. */ if (dbenv->db_log_dir != NULL) __os_free(dbenv, dbenv->db_log_dir); + dbenv->db_log_dir = NULL; if (dbenv->db_tmp_dir != NULL) __os_free(dbenv, dbenv->db_tmp_dir); + dbenv->db_tmp_dir = NULL; if (dbenv->db_data_dir != NULL) { for (p = dbenv->db_data_dir; *p != NULL; ++p) __os_free(dbenv, *p); __os_free(dbenv, dbenv->db_data_dir); + dbenv->db_data_dir = NULL; + dbenv->data_next = 0; + } + if (dbenv->db_home != NULL) { + __os_free(dbenv, dbenv->db_home); + dbenv->db_home = NULL; } /* Discard the structure. */ - memset(dbenv, CLEAR_BYTE, sizeof(DB_ENV)); - __os_free(NULL, dbenv); + __db_env_destroy(dbenv); return (ret); } /* - * __dbenv_refresh -- - * Refresh the DB_ENV structure, releasing resources allocated by - * DB_ENV->open, and returning it to the state it was in just before - * open was called. (Note that this means that any state set by - * pre-open configuration functions must be preserved.) + * __env_refresh -- + * Refresh the DB_ENV structure. */ static int -__dbenv_refresh(dbenv, orig_flags, rep_check) +__env_refresh(dbenv, orig_flags, rep_check) DB_ENV *dbenv; u_int32_t orig_flags; int rep_check; { DB *ldbp; - DB_MPOOL *dbmp; + DB_THREAD_INFO *ip; int ret, t_ret; - dbmp = dbenv->mp_handle; ret = 0; /* + * Release resources allocated by DB_ENV->open, and return it to the + * state it was in just before __env_open was called. (This means + * state set by pre-open configuration functions must be preserved.) + * * Refresh subsystems, in the reverse order they were opened (txn * must be first, it may want to discard locks and flush the log). * * !!! - * Note that these functions, like all of __dbenv_refresh, only undo - * the effects of __dbenv_open. Functions that undo work done by + * Note that these functions, like all of __env_refresh, only undo + * the effects of __env_open. Functions that undo work done by * db_env_create or by a configuration function should go in - * __dbenv_close. + * __env_close. */ if (TXN_ON(dbenv) && (t_ret = __txn_dbenv_refresh(dbenv)) != 0 && ret == 0) @@ -686,11 +749,10 @@ __dbenv_refresh(dbenv, orig_flags, rep_check) */ if (LOCKING_ON(dbenv)) { if (!F_ISSET(dbenv, DB_ENV_THREAD) && - dbenv->env_lid != DB_LOCK_INVALIDID && - (t_ret = __lock_id_free(dbenv, dbenv->env_lid)) != 0 && - ret == 0) + dbenv->env_lref != NULL && (t_ret = __lock_id_free(dbenv, + ((DB_LOCKER *)dbenv->env_lref)->id)) != 0 && ret == 0) ret = t_ret; - dbenv->env_lid = DB_LOCK_INVALIDID; + dbenv->env_lref = NULL; if ((t_ret = __lock_dbenv_refresh(dbenv)) != 0 && ret == 0) ret = t_ret; @@ -708,20 +770,23 @@ __dbenv_refresh(dbenv, orig_flags, rep_check) * log file handles. Ick. */ if (dbenv->db_ref != 0) { - __db_err(dbenv, "Database handles remain at environment close"); - for (ldbp = LIST_FIRST(&dbenv->dblist); - ldbp != NULL; ldbp = LIST_NEXT(ldbp, dblistlinks)) - __db_err(dbenv, "Open database handle: %s%s%s", - ldbp->fname, ldbp->dname == NULL ? "" : "/", + __db_errx(dbenv, + "Database handles still open at environment close"); + TAILQ_FOREACH(ldbp, &dbenv->dblist, dblistlinks) + __db_errx(dbenv, "Open database handle: %s%s%s", + ldbp->fname == NULL ? "unnamed" : ldbp->fname, + ldbp->dname == NULL ? "" : "/", ldbp->dname == NULL ? "" : ldbp->dname); if (ret == 0) ret = EINVAL; } - LIST_INIT(&dbenv->dblist); - if (dbenv->dblist_mutexp != NULL) - __db_mutex_free(dbenv, dbmp->reginfo, dbenv->dblist_mutexp); - if (dbenv->mt_mutexp != NULL) - __db_mutex_free(dbenv, dbmp->reginfo, dbenv->mt_mutexp); + TAILQ_INIT(&dbenv->dblist); + + if ((t_ret = __mutex_free(dbenv, &dbenv->mtx_dblist)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __mutex_free(dbenv, &dbenv->mtx_mt)) != 0 && ret == 0) + ret = t_ret; + if (dbenv->mt != NULL) { __os_free(dbenv, dbenv->mt); dbenv->mt = NULL; @@ -751,15 +816,28 @@ __dbenv_refresh(dbenv, orig_flags, rep_check) * as soon as we drop the handle count, there's little opportunity * to do harm. */ - if (rep_check) - __env_db_rep_exit(dbenv); + if (rep_check && (t_ret = __env_db_rep_exit(dbenv)) != 0 && ret == 0) + ret = t_ret; /* * Detach from the region. * * Must come after we call __env_db_rep_exit above. */ - __rep_dbenv_refresh(dbenv); + if (REP_ON(dbenv)) + __rep_dbenv_refresh(dbenv); + + /* + * Mark the thread as out of the env before we get rid of the handles + * needed to do so. + */ + if (dbenv->thr_hashtab != NULL && + (t_ret = __env_set_state(dbenv, &ip, THREAD_OUT)) != 0 && ret == 0) + ret = t_ret; + + if (MUTEX_ON(dbenv) && + (t_ret = __mutex_dbenv_refresh(dbenv)) != 0 && ret == 0) + ret = t_ret; if (dbenv->reginfo != NULL) { if ((t_ret = __db_e_detach(dbenv, 0)) != 0 && ret == 0) @@ -771,15 +849,11 @@ __dbenv_refresh(dbenv, orig_flags, rep_check) */ } - /* Undo changes and allocations done by __dbenv_open. */ - if (dbenv->db_home != NULL) { - __os_free(dbenv, dbenv->db_home); - dbenv->db_home = NULL; + if (dbenv->mutex_iq != NULL) { + __os_free(dbenv, dbenv->mutex_iq); + dbenv->mutex_iq = NULL; } - dbenv->open_flags = 0; - dbenv->db_mode = 0; - if (dbenv->recover_dtab != NULL) { __os_free(dbenv, dbenv->recover_dtab); dbenv->recover_dtab = NULL; @@ -813,13 +887,13 @@ __dbenv_refresh(dbenv, orig_flags, rep_check) } /* - * __dbenv_get_open_flags + * __env_get_open_flags * Retrieve the flags passed to DB_ENV->open. * - * PUBLIC: int __dbenv_get_open_flags __P((DB_ENV *, u_int32_t *)); + * PUBLIC: int __env_get_open_flags __P((DB_ENV *, u_int32_t *)); */ int -__dbenv_get_open_flags(dbenv, flagsp) +__env_get_open_flags(dbenv, flagsp) DB_ENV *dbenv; u_int32_t *flagsp; { @@ -847,13 +921,15 @@ __db_appname(dbenv, appname, file, tmp_oflags, fhpp, namep) DB_FH **fhpp; char **namep; { + enum { TRY_NOTSET, TRY_DATA_DIR, TRY_ENV_HOME, TRY_CREATE } try_state; size_t len, str_len; int data_entry, ret, slash, tmp_create; const char *a, *b; char *p, *str; + try_state = TRY_NOTSET; a = b = NULL; - data_entry = -1; + data_entry = 0; tmp_create = 0; /* @@ -890,11 +966,30 @@ retry: /* case DB_APP_NONE: break; case DB_APP_DATA: - if (dbenv != NULL && dbenv->db_data_dir != NULL && - (b = dbenv->db_data_dir[++data_entry]) == NULL) { - data_entry = -1; - b = dbenv->db_data_dir[0]; + if (dbenv == NULL || dbenv->db_data_dir == NULL) { + try_state = TRY_CREATE; + break; + } + + /* + * First, step through the data_dir entries, if any, looking + * for the file. + */ + if ((b = dbenv->db_data_dir[data_entry]) != NULL) { + ++data_entry; + try_state = TRY_DATA_DIR; + break; } + + /* Second, look in the environment home directory. */ + if (try_state != TRY_ENV_HOME) { + try_state = TRY_ENV_HOME; + break; + } + + /* Third, try creation in the first data_dir entry. */ + try_state = TRY_CREATE; + b = dbenv->db_data_dir[0]; break; case DB_APP_LOG: if (dbenv != NULL) @@ -933,7 +1028,8 @@ retry: /* * If we're opening a data file, see if it exists. If it does, * return it, otherwise, try and find another one to open. */ - if (__os_exists(str, NULL) != 0 && data_entry != -1) { + if (appname == DB_APP_DATA && + __os_exists(dbenv, str, NULL) != 0 && try_state != TRY_CREATE) { __os_free(dbenv, str); b = NULL; goto retry; @@ -954,332 +1050,6 @@ retry: /* } /* - * __db_home -- - * Find the database home. - * - * PUBLIC: int __db_home __P((DB_ENV *, const char *, u_int32_t)); - */ -int -__db_home(dbenv, db_home, flags) - DB_ENV *dbenv; - const char *db_home; - u_int32_t flags; -{ - const char *p; - - /* - * Use db_home by default, this allows utilities to reasonably - * override the environment either explicitly or by using a -h - * option. Otherwise, use the environment if it's permitted - * and initialized. - */ - if ((p = db_home) == NULL && - (LF_ISSET(DB_USE_ENVIRON) || - (LF_ISSET(DB_USE_ENVIRON_ROOT) && __os_isroot())) && - (p = getenv("DB_HOME")) != NULL && p[0] == '\0') { - __db_err(dbenv, "illegal DB_HOME environment variable"); - return (EINVAL); - } - - return (p == NULL ? 0 : __os_strdup(dbenv, p, &dbenv->db_home)); -} - -#define __DB_OVFL(v, max) \ - if (v > max) { \ - __v = v; \ - __max = max; \ - goto toobig; \ - } - -/* - * __db_parse -- - * Parse a single NAME VALUE pair. - */ -static int -__db_parse(dbenv, s) - DB_ENV *dbenv; - char *s; -{ - u_long __max, __v, v1, v2, v3; - u_int32_t flags; - char *name, *p, *value, v4; - - /* - * !!! - * The constant 40 is hard-coded into format arguments to sscanf - * below, it can't be changed here without changing it there, too. - * The additional bytes are for a trailing nul byte and because we - * are reading user input -- I don't want to risk any off-by-ones. - */ - char arg[40 + 5]; - - /* - * Name/value pairs are parsed as two white-space separated strings. - * Leading and trailing white-space is trimmed from the value, but - * it may contain embedded white-space. Note: we use the isspace(3) - * macro because it's more portable, but that means that you can use - * characters like form-feed to separate the strings. - */ - name = s; - for (p = name; *p != '\0' && !isspace((int)*p); ++p) - ; - if (*p == '\0' || p == name) - goto illegal; - *p = '\0'; - for (++p; isspace((int)*p); ++p) - ; - if (*p == '\0') - goto illegal; - value = p; - for (++p; *p != '\0'; ++p) - ; - for (--p; isspace((int)*p); --p) - ; - ++p; - if (p == value) { -illegal: __db_err(dbenv, "mis-formatted name-value pair: %s", s); - return (EINVAL); - } - *p = '\0'; - - if (!strcasecmp(name, "set_cachesize")) { - if (sscanf(value, "%lu %lu %lu %c", &v1, &v2, &v3, &v4) != 3) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - __DB_OVFL(v2, UINT32_MAX); - __DB_OVFL(v3, 10000); - return (__memp_set_cachesize( - dbenv, (u_int32_t)v1, (u_int32_t)v2, (int)v3)); - } - - if (!strcasecmp(name, "set_data_dir") || - !strcasecmp(name, "db_data_dir")) /* Compatibility. */ - return (__dbenv_set_data_dir(dbenv, value)); - - if (!strcasecmp(name, "set_intermediate_dir")) {/* Undocumented. */ - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; -#ifdef INT_MAX - __DB_OVFL(v1, INT_MAX); -#endif - return (__dbenv_set_intermediate_dir(dbenv, (int)v1, 0)); - } - - if (!strcasecmp(name, "set_flags")) { - if (sscanf(value, "%40s %c", arg, &v4) != 1) - goto badarg; - - if (!strcasecmp(value, "db_auto_commit")) - return (__dbenv_set_flags(dbenv, DB_AUTO_COMMIT, 1)); - if (!strcasecmp(value, "db_cdb_alldb")) - return (__dbenv_set_flags(dbenv, DB_CDB_ALLDB, 1)); - if (!strcasecmp(value, "db_direct_db")) - return (__dbenv_set_flags(dbenv, DB_DIRECT_DB, 1)); - if (!strcasecmp(value, "db_direct_log")) - return (__dbenv_set_flags(dbenv, DB_DIRECT_LOG, 1)); - if (!strcasecmp(value, "db_dsync_log")) - return (__dbenv_set_flags(dbenv, DB_DSYNC_LOG, 1)); - if (!strcasecmp(value, "db_log_autoremove")) - return (__dbenv_set_flags(dbenv, DB_LOG_AUTOREMOVE, 1)); - if (!strcasecmp(value, "db_log_inmemory")) - return (__dbenv_set_flags(dbenv, DB_LOG_INMEMORY, 1)); - if (!strcasecmp(value, "db_nolocking")) - return (__dbenv_set_flags(dbenv, DB_NOLOCKING, 1)); - if (!strcasecmp(value, "db_nommap")) - return (__dbenv_set_flags(dbenv, DB_NOMMAP, 1)); - if (!strcasecmp(value, "db_nopanic")) - return (__dbenv_set_flags(dbenv, DB_NOPANIC, 1)); - if (!strcasecmp(value, "db_overwrite")) - return (__dbenv_set_flags(dbenv, DB_OVERWRITE, 1)); - if (!strcasecmp(value, "db_region_init")) - return (__dbenv_set_flags(dbenv, DB_REGION_INIT, 1)); - if (!strcasecmp(value, "db_txn_nosync")) - return (__dbenv_set_flags(dbenv, DB_TXN_NOSYNC, 1)); - if (!strcasecmp(value, "db_txn_write_nosync")) - return ( - __dbenv_set_flags(dbenv, DB_TXN_WRITE_NOSYNC, 1)); - if (!strcasecmp(value, "db_yieldcpu")) - return (__dbenv_set_flags(dbenv, DB_YIELDCPU, 1)); - goto badarg; - } - - if (!strcasecmp(name, "set_lg_bsize")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - return (__log_set_lg_bsize(dbenv, (u_int32_t)v1)); - } - - if (!strcasecmp(name, "set_lg_max")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - return (__log_set_lg_max(dbenv, (u_int32_t)v1)); - } - - if (!strcasecmp(name, "set_lg_regionmax")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - return (__log_set_lg_regionmax(dbenv, (u_int32_t)v1)); - } - - if (!strcasecmp(name, "set_lg_dir") || - !strcasecmp(name, "db_log_dir")) /* Compatibility. */ - return (__log_set_lg_dir(dbenv, value)); - - if (!strcasecmp(name, "set_lk_detect")) { - if (sscanf(value, "%40s %c", arg, &v4) != 1) - goto badarg; - if (!strcasecmp(value, "db_lock_default")) - flags = DB_LOCK_DEFAULT; - else if (!strcasecmp(value, "db_lock_expire")) - flags = DB_LOCK_EXPIRE; - else if (!strcasecmp(value, "db_lock_maxlocks")) - flags = DB_LOCK_MAXLOCKS; - else if (!strcasecmp(value, "db_lock_maxwrite")) - flags = DB_LOCK_MAXWRITE; - else if (!strcasecmp(value, "db_lock_minlocks")) - flags = DB_LOCK_MINLOCKS; - else if (!strcasecmp(value, "db_lock_minwrite")) - flags = DB_LOCK_MINWRITE; - else if (!strcasecmp(value, "db_lock_oldest")) - flags = DB_LOCK_OLDEST; - else if (!strcasecmp(value, "db_lock_random")) - flags = DB_LOCK_RANDOM; - else if (!strcasecmp(value, "db_lock_youngest")) - flags = DB_LOCK_YOUNGEST; - else - goto badarg; - return (__lock_set_lk_detect(dbenv, flags)); - } - - if (!strcasecmp(name, "set_lk_max")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - return (__lock_set_lk_max(dbenv, (u_int32_t)v1)); - } - - if (!strcasecmp(name, "set_lk_max_locks")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - return (__lock_set_lk_max_locks(dbenv, (u_int32_t)v1)); - } - - if (!strcasecmp(name, "set_lk_max_lockers")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - return (__lock_set_lk_max_lockers(dbenv, (u_int32_t)v1)); - } - - if (!strcasecmp(name, "set_lk_max_objects")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - return (__lock_set_lk_max_objects(dbenv, (u_int32_t)v1)); - } - - if (!strcasecmp(name, "set_lock_timeout")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - return (__lock_set_env_timeout( - dbenv, (u_int32_t)v1, DB_SET_LOCK_TIMEOUT)); - } - - if (!strcasecmp(name, "set_mp_max_openfd")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, INT_MAX); - return (__memp_set_mp_max_openfd(dbenv, (int)v1)); - } - - if (!strcasecmp(name, "set_mp_max_write")) { - if (sscanf(value, "%lu %lu %c", &v1, &v2, &v4) != 2) - goto badarg; - __DB_OVFL(v1, INT_MAX); - __DB_OVFL(v2, INT_MAX); - return (__memp_set_mp_max_write(dbenv, (int)v1, (int)v2)); - } - - if (!strcasecmp(name, "set_mp_mmapsize")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - return (__memp_set_mp_mmapsize(dbenv, (u_int32_t)v1)); - } - - if (!strcasecmp(name, "set_region_init")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1 || v1 != 1) - goto badarg; - return (__dbenv_set_flags( - dbenv, DB_REGION_INIT, v1 == 0 ? 0 : 1)); - } - - if (!strcasecmp(name, "set_shm_key")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - return (__dbenv_set_shm_key(dbenv, (long)v1)); - } - - if (!strcasecmp(name, "set_tas_spins")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - return (__dbenv_set_tas_spins(dbenv, (u_int32_t)v1)); - } - - if (!strcasecmp(name, "set_tmp_dir") || - !strcasecmp(name, "db_tmp_dir")) /* Compatibility.*/ - return (__dbenv_set_tmp_dir(dbenv, value)); - - if (!strcasecmp(name, "set_tx_max")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - return (__txn_set_tx_max(dbenv, (u_int32_t)v1)); - } - - if (!strcasecmp(name, "set_txn_timeout")) { - if (sscanf(value, "%lu %c", &v1, &v4) != 1) - goto badarg; - __DB_OVFL(v1, UINT32_MAX); - return (__lock_set_env_timeout( - dbenv, (u_int32_t)v1, DB_SET_TXN_TIMEOUT)); - } - - if (!strcasecmp(name, "set_verbose")) { - if (sscanf(value, "%40s %c", arg, &v4) != 1) - goto badarg; - - else if (!strcasecmp(value, "db_verb_deadlock")) - flags = DB_VERB_DEADLOCK; - else if (!strcasecmp(value, "db_verb_recovery")) - flags = DB_VERB_RECOVERY; - else if (!strcasecmp(value, "db_verb_replication")) - flags = DB_VERB_REPLICATION; - else if (!strcasecmp(value, "db_verb_waitsfor")) - flags = DB_VERB_WAITSFOR; - else - goto badarg; - return (__dbenv_set_verbose(dbenv, flags, 1)); - } - - __db_err(dbenv, "unrecognized name-value pair: %s", s); - return (EINVAL); - -badarg: __db_err(dbenv, "incorrect arguments for name-value pair: %s", s); - return (EINVAL); - -toobig: __db_err(dbenv, - "%s: %lu larger than maximum value %lu", s, __v, __max); - return (EINVAL); -} - -/* * __db_tmp_open -- * Create a temporary file. */ @@ -1290,7 +1060,7 @@ __db_tmp_open(dbenv, tmp_oflags, path, fhpp) char *path; DB_FH **fhpp; { - u_int32_t id; + pid_t pid; int filenum, i, isdir, ret; char *firstx, *trv; @@ -1298,12 +1068,12 @@ __db_tmp_open(dbenv, tmp_oflags, path, fhpp) * Check the target directory; if you have six X's and it doesn't * exist, this runs for a *very* long time. */ - if ((ret = __os_exists(path, &isdir)) != 0) { - __db_err(dbenv, "%s: %s", path, db_strerror(ret)); + if ((ret = __os_exists(dbenv, path, &isdir)) != 0) { + __db_err(dbenv, ret, "%s", path); return (ret); } if (!isdir) { - __db_err(dbenv, "%s: %s", path, db_strerror(EINVAL)); + __db_err(dbenv, EINVAL, "%s", path); return (EINVAL); } @@ -1312,15 +1082,16 @@ __db_tmp_open(dbenv, tmp_oflags, path, fhpp) (void)strcat(path, DB_TRAIL); /* Replace the X's with the process ID (in decimal). */ - for (trv = path + strlen(path), __os_id(&id); *--trv == 'X'; id /= 10) - *trv = '0' + (id % 10); + __os_id(dbenv, &pid, NULL); + for (trv = path + strlen(path); *--trv == 'X'; pid /= 10) + *trv = '0' + (u_char)(pid % 10); firstx = trv + 1; /* Loop, trying to open a file. */ for (filenum = 1;; filenum++) { if ((ret = __os_open(dbenv, path, tmp_oflags | DB_OSO_CREATE | DB_OSO_EXCL | DB_OSO_TEMP, - __db_omode("rw----"), fhpp)) == 0) + __db_omode(OWNER_RW), fhpp)) == 0) return (0); /* @@ -1331,14 +1102,13 @@ __db_tmp_open(dbenv, tmp_oflags, path, fhpp) * of other possible errors, we've lost. */ if (ret != EEXIST) { - __db_err(dbenv, - "tmp_open: %s: %s", path, db_strerror(ret)); + __db_err(dbenv, ret, "temporary open: %s", path); return (ret); } /* * Generate temporary file names in a backwards-compatible way. - * If id == 12345, the result is: + * If pid == 12345, the result is: * <path>/DB12345 (tried above, the first time through). * <path>/DBa2345 ... <path>/DBz2345 * <path>/DBaa345 ... <path>/DBaz345 diff --git a/db/env/env_recover.c b/db/env/env_recover.c index 1c018171f..46d927dc1 100644 --- a/db/env/env_recover.c +++ b/db/env/env_recover.c @@ -1,47 +1,35 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 1996-2006 + * Oracle Corporation. All rights reserved. * - * $Id: env_recover.c,v 11.126 2004/09/22 03:43:52 bostic Exp $ + * $Id: env_recover.c,v 12.34 2006/09/09 14:28:22 bostic Exp $ */ #include "db_config.h" -#ifndef lint -static const char copyright[] = - "Copyright (c) 1996-2004\nSleepycat Software Inc. All rights reserved.\n"; -#endif - -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#if TIME_WITH_SYS_TIME -#include <sys/time.h> -#include <time.h> -#else -#if HAVE_SYS_TIME_H -#include <sys/time.h> -#else -#include <time.h> -#endif -#endif - -#include <string.h> -#endif - #include "db_int.h" #include "dbinc/db_page.h" -#include "dbinc/db_shash.h" +#include "dbinc/fop.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" #include "dbinc/log.h" -#include "dbinc/txn.h" #include "dbinc/mp.h" -#include "dbinc/db_am.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" + +#ifndef lint +static const char copyright[] = + "Copyright (c) 1996-2006\nOracle Corporation. All rights reserved.\n"; +#endif -static int __db_log_corrupt __P((DB_ENV *, DB_LSN *)); -static int __log_earliest __P((DB_ENV *, DB_LOGC *, int32_t *, DB_LSN *)); -static double __lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int)); +static int __db_log_corrupt __P((DB_ENV *, DB_LSN *)); +static int __env_init_rec_42 __P((DB_ENV *)); +static int __env_init_rec_43 __P((DB_ENV *)); +static int __env_init_rec_45 __P((DB_ENV *)); +static int __log_earliest __P((DB_ENV *, DB_LOGC *, int32_t *, DB_LSN *)); +static double __lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int)); /* * __db_apprec -- @@ -62,6 +50,7 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) DBT data; DB_LOGC *logc; DB_LSN ckp_lsn, first_lsn, last_lsn, lowlsn, lsn, stop_lsn, tlsn; + DB_TXNHEAD *txninfo; DB_TXNREGION *region; REGENV *renv; REGINFO *infop; @@ -71,39 +60,37 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) u_int32_t hi_txn, log_size, txnid; int32_t low; int have_rec, progress, ret, t_ret; - int (**dtab) __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - char *p, *pass, t1[60], t2[60]; - void *txninfo; + char *p, *pass; + char t1[CTIME_BUFLEN], t2[CTIME_BUFLEN], time_buf[CTIME_BUFLEN]; - COMPQUIET(nfiles, (double)0); + COMPQUIET(nfiles, (double)0.001); logc = NULL; ckp_args = NULL; - dtab = NULL; hi_txn = TXN_MAXIMUM; txninfo = NULL; pass = "initial"; + ZERO_LSN(lsn); /* * XXX * Get the log size. No locking required because we're single-threaded * during recovery. */ - log_size = - ((LOG *)(((DB_LOG *)dbenv->lg_handle)->reginfo.primary))->log_size; + log_size = ((LOG *)dbenv->lg_handle->reginfo.primary)->log_size; /* * If we need to, update the env handle timestamp. */ - if (update) { + if (update && REP_ON(dbenv)) { infop = dbenv->reginfo; renv = infop->primary; (void)time(&renv->rep_timestamp); } /* Set in-recovery flags. */ - F_SET((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER); - region = ((DB_TXNMGR *)dbenv->tx_handle)->reginfo.primary; + F_SET(dbenv->lg_handle, DBLOG_RECOVER); + region = dbenv->tx_handle->reginfo.primary; F_SET(region, TXN_IN_RECOVERY); /* Allocate a cursor for the log. */ @@ -123,15 +110,20 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) if ((ret = __log_earliest(dbenv, logc, &low, &lowlsn)) != 0) goto err; if ((int32_t)dbenv->tx_timestamp < low) { - (void)snprintf(t1, sizeof(t1), - "%s", ctime(&dbenv->tx_timestamp)); + t1[sizeof(t1) - 1] = '\0'; + (void)strncpy(t1, __db_ctime( + &dbenv->tx_timestamp, time_buf), sizeof(t1) - 1); if ((p = strchr(t1, '\n')) != NULL) *p = '\0'; + + t2[sizeof(t2) - 1] = '\0'; tlow = (time_t)low; - (void)snprintf(t2, sizeof(t2), "%s", ctime(&tlow)); + (void)strncpy(t2, __db_ctime( + &tlow, time_buf), sizeof(t2) - 1); if ((p = strchr(t2, '\n')) != NULL) *p = '\0'; - __db_err(dbenv, + + __db_errx(dbenv, "Invalid recovery timestamp %s; earliest time is %s", t1, t2); ret = EINVAL; @@ -216,7 +208,7 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) if (ret == DB_NOTFOUND) ret = 0; else - __db_err(dbenv, "Last log record not found"); + __db_errx(dbenv, "Last log record not found"); goto err; } @@ -261,7 +253,7 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) if (ret == DB_NOTFOUND) ret = 0; else - __db_err(dbenv, "First log record not found"); + __db_errx(dbenv, "First log record not found"); goto err; } first_lsn = ckp_lsn; @@ -273,7 +265,7 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) /* We have a recent checkpoint. This is LSN (1). */ if ((ret = __txn_ckp_read(dbenv, data.data, &ckp_args)) != 0) { - __db_err(dbenv, + __db_errx(dbenv, "Invalid checkpoint record at [%ld][%ld]", (u_long)ckp_lsn.file, (u_long)ckp_lsn.offset); @@ -303,8 +295,8 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) * the logs and before the timestamp. */ if ((dbenv->tx_timestamp != 0 || max_lsn != NULL) && - log_compare(&lowlsn, &first_lsn) < 0) { - DB_ASSERT(have_rec == 0); + LOG_COMPARE(&lowlsn, &first_lsn) < 0) { + DB_ASSERT(dbenv, have_rec == 0); first_lsn = lowlsn; } } @@ -312,7 +304,7 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) /* Get the record at first_lsn if we don't have it already. */ if (!have_rec && (ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) != 0) { - __db_err(dbenv, "Checkpoint LSN record [%ld][%ld] not found", + __db_errx(dbenv, "Checkpoint LSN record [%ld][%ld] not found", (u_long)first_lsn.file, (u_long)first_lsn.offset); goto err; } @@ -326,8 +318,8 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) (double)((log_size - first_lsn.offset) + last_lsn.offset) / log_size; /* We are going to divide by nfiles; make sure it isn't 0. */ - if (nfiles == 0) - nfiles = (double)0.001; + if (nfiles < 0.001) + nfiles = 0.001; } /* Find a low txnid. */ @@ -346,7 +338,7 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) * above), so there is nothing to do. */ if (ret == DB_NOTFOUND) { - if (log_compare(&lsn, &last_lsn) != 0) + if (LOG_COMPARE(&lsn, &last_lsn) != 0) ret = __db_log_corrupt(dbenv, &lsn); else ret = 0; @@ -387,7 +379,7 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) pass = "backward"; for (ret = __log_c_get(logc, &lsn, &data, DB_LAST); - ret == 0 && log_compare(&lsn, &first_lsn) >= 0; + ret == 0 && LOG_COMPARE(&lsn, &first_lsn) >= 0; ret = __log_c_get(logc, &lsn, &data, DB_PREV)) { if (dbenv->db_feedback != NULL) { progress = 34 + (int)(33 * (__lsn_diff(&first_lsn, @@ -406,7 +398,7 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) } } if (ret == DB_NOTFOUND) { - if (log_compare(&lsn, &first_lsn) > 0) + if (LOG_COMPARE(&lsn, &first_lsn) > 0) ret = __db_log_corrupt(dbenv, &lsn); else ret = 0; @@ -451,7 +443,7 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) * we need to make sure that we don't try to roll * forward beyond the soon-to-be end of log. */ - if (log_compare(&lsn, &stop_lsn) >= 0) + if (LOG_COMPARE(&lsn, &stop_lsn) >= 0) break; } @@ -501,15 +493,25 @@ __db_apprec(dbenv, max_lsn, trunclsn, update, flags) #endif } +done: /* Take a checkpoint here to force any dirty data pages to disk. */ - if ((ret = __txn_checkpoint(dbenv, 0, 0, DB_FORCE)) != 0) - goto err; + if ((ret = __txn_checkpoint(dbenv, 0, 0, DB_FORCE)) != 0) { + /* + * If there was no space for the checkpoint we can + * still bring the environment up. No updates will + * be able to commit either, but the environment can + * be used read only. + */ + if (max_lsn == NULL && ret == ENOSPC) + ret = 0; + else + goto err; + } /* Close all the db files that are open. */ if ((ret = __dbreg_close_files(dbenv)) != 0) goto err; -done: if (max_lsn != NULL) { if (!IS_ZERO_LSN(((DB_TXNHEAD *)txninfo)->ckplsn)) region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn; @@ -520,6 +522,7 @@ done: /* We are going to truncate, so we'd best close the cursor. */ if (logc != NULL && (ret = __log_c_close(logc)) != 0) goto err; + logc = NULL; if ((ret = __log_vtruncate(dbenv, max_lsn, &((DB_TXNHEAD *)txninfo)->ckplsn, trunclsn)) != 0) goto err; @@ -537,7 +540,7 @@ done: if (ret == DB_NOTFOUND) ret = 0; else - __db_err(dbenv, "First log record not found"); + __db_errx(dbenv, "First log record not found"); goto err; } if ((ret = __txn_getckp(dbenv, &first_lsn)) == 0 && @@ -545,7 +548,7 @@ done: /* We have a recent checkpoint. This is LSN (1). */ if ((ret = __txn_ckp_read(dbenv, data.data, &ckp_args)) != 0) { - __db_err(dbenv, + __db_errx(dbenv, "Invalid checkpoint record at [%ld][%ld]", (u_long)first_lsn.file, (u_long)first_lsn.offset); @@ -557,19 +560,31 @@ done: if ((ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) != 0) goto err; if ((ret = __env_openfiles(dbenv, logc, - txninfo, &data, &first_lsn, NULL, nfiles, 1)) != 0) + txninfo, &data, &first_lsn, max_lsn, nfiles, 1)) != 0) goto err; - } else if (region->stat.st_nrestores == 0) + } else if (region->stat.st_nrestores == 0) { /* * If there are no prepared transactions that need resolution, * we need to reset the transaction ID space and log this fact. */ if ((ret = __txn_reset(dbenv)) != 0) goto err; + } else { + /* + * If we have restored prepared txns then they are in process + * as far as replication is concerned. + */ + if (REP_ON(dbenv)) + dbenv->rep_handle->region->op_cnt = + region->stat.st_nrestores; + if ((ret = __txn_recycle_id(dbenv)) != 0) + goto err; + } if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) { (void)time(&now); - __db_msg(dbenv, "Recovery complete at %.24s", ctime(&now)); + __db_msg(dbenv, + "Recovery complete at %.24s", __db_ctime(&now, time_buf)); __db_msg(dbenv, "%s %lx %s [%lu][%lu]", "Maximum transaction ID", (u_long)(txninfo == NULL ? @@ -580,7 +595,7 @@ done: } if (0) { -msgerr: __db_err(dbenv, +msgerr: __db_errx(dbenv, "Recovery function for LSN %lu %lu failed on %s pass", (u_long)lsn.file, (u_long)lsn.offset, pass); } @@ -591,12 +606,9 @@ err: if (logc != NULL && (t_ret = __log_c_close(logc)) != 0 && ret == 0) if (txninfo != NULL) __db_txnlist_end(dbenv, txninfo); - if (dtab != NULL) - __os_free(dbenv, dtab); - dbenv->tx_timestamp = 0; - F_CLR((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER); + F_CLR(dbenv->lg_handle, DBLOG_RECOVER); F_CLR(region, TXN_IN_RECOVERY); return (ret); @@ -627,8 +639,9 @@ __lsn_diff(low, high, current, max, is_forward) if (current->file == low->file) nf = (double)(current->offset - low->offset) / max; else if (current->offset < low->offset) - nf = (double)(current->file - low->file - 1) + - (double)(max - low->offset + current->offset) / max; + nf = (double)((current->file - low->file) - 1) + + (double)((max - low->offset) + current->offset) / + max; else nf = (double)(current->file - low->file) + (double)(current->offset - low->offset) / max; @@ -636,9 +649,9 @@ __lsn_diff(low, high, current, max, is_forward) if (current->file == high->file) nf = (double)(high->offset - current->offset) / max; else if (current->offset > high->offset) - nf = (double)(high->file - current->file - 1) + + nf = (double)((high->file - current->file) - 1) + (double) - (max - current->offset + high->offset) / max; + ((max - current->offset) + high->offset) / max; else nf = (double)(high->file - current->file) + (double)(high->offset - current->offset) / max; @@ -692,7 +705,7 @@ __log_backup(dbenv, logc, max_lsn, start_lsn, cmp) * or equal max_lsn. */ cmp_lsn = ckp_args->ckp_lsn; - lcmp = (log_compare(&cmp_lsn, max_lsn) <= 0); + lcmp = (LOG_COMPARE(&cmp_lsn, max_lsn) <= 0); } else { /* * When we're walking back through the checkpoints @@ -700,7 +713,7 @@ __log_backup(dbenv, logc, max_lsn, start_lsn, cmp) * than the max_lsn (also a ckp LSN). */ cmp_lsn = lsn; - lcmp = (log_compare(&cmp_lsn, max_lsn) < 0); + lcmp = (LOG_COMPARE(&cmp_lsn, max_lsn) < 0); } if (lcmp) { *start_lsn = cmp_lsn; @@ -717,6 +730,7 @@ __log_backup(dbenv, logc, max_lsn, start_lsn, cmp) break; } __os_free(dbenv, ckp_args); + ckp_args = NULL; } if (ckp_args != NULL) @@ -753,18 +767,18 @@ __log_earliest(dbenv, logc, lowtime, lowlsn) int cmp, ret; memset(&data, 0, sizeof(data)); + /* * Read forward through the log looking for the first checkpoint * record whose ckp_lsn is greater than first_lsn. */ - for (ret = __log_c_get(logc, &first_lsn, &data, DB_FIRST); ret == 0; ret = __log_c_get(logc, &lsn, &data, DB_NEXT)) { memcpy(&rectype, data.data, sizeof(rectype)); if (rectype != DB___txn_ckp) continue; if ((ret = __txn_ckp_read(dbenv, data.data, &ckpargs)) == 0) { - cmp = log_compare(&ckpargs->ckp_lsn, &first_lsn); + cmp = LOG_COMPARE(&ckpargs->ckp_lsn, &first_lsn); *lowlsn = ckpargs->ckp_lsn; *lowtime = ckpargs->timestamp; @@ -813,13 +827,12 @@ __env_openfiles(dbenv, logc, txninfo, * Get the log size. No locking required because we're single-threaded * during recovery. */ - log_size = - ((LOG *)(((DB_LOG *)dbenv->lg_handle)->reginfo.primary))->log_size; + log_size = ((LOG *)dbenv->lg_handle->reginfo.primary)->log_size; lsn = *open_lsn; for (;;) { if (in_recovery && dbenv->db_feedback != NULL) { - DB_ASSERT(last_lsn != NULL); + DB_ASSERT(dbenv, last_lsn != NULL); progress = (int)(33 * (__lsn_diff(open_lsn, last_lsn, &lsn, log_size, 1) / nfiles)); dbenv->db_feedback(dbenv, DB_RECOVER, progress); @@ -830,7 +843,7 @@ __env_openfiles(dbenv, logc, txninfo, in_recovery ? DB_TXN_OPENFILES : DB_TXN_POPENFILES, txninfo); if (ret != 0 && ret != DB_TXN_CKP) { - __db_err(dbenv, + __db_errx(dbenv, "Recovery function for LSN %lu %lu failed", (u_long)lsn.file, (u_long)lsn.offset); break; @@ -838,7 +851,7 @@ __env_openfiles(dbenv, logc, txninfo, if ((ret = __log_c_get(logc, &lsn, data, DB_NEXT)) != 0) { if (ret == DB_NOTFOUND) { if (last_lsn != NULL && - log_compare(&lsn, last_lsn) != 0) + LOG_COMPARE(&lsn, last_lsn) != 0) ret = __db_log_corrupt(dbenv, &lsn); else ret = 0; @@ -855,7 +868,150 @@ __db_log_corrupt(dbenv, lsnp) DB_ENV *dbenv; DB_LSN *lsnp; { - __db_err(dbenv, "Log file corrupt at LSN: [%lu][%lu]", + __db_errx(dbenv, "Log file corrupt at LSN: [%lu][%lu]", (u_long)lsnp->file, (u_long)lsnp->offset); return (EINVAL); } + +/* + * __env_init_rec -- + * + * PUBLIC: int __env_init_rec __P((DB_ENV *, u_int32_t)); + */ +int +__env_init_rec(dbenv, version) + DB_ENV *dbenv; + u_int32_t version; +{ + int ret; + + /* + * We need to prime the recovery table with the current recovery + * functions. Then we overwrite only specific entries based on + * each previous version we support. + */ + if ((ret = __env_init_rec_45(dbenv)) != 0) + return (ret); + ret = 0; + switch (version) { + /* + * There are no log record/recovery differences between + * 4.4 and 4.5. The log version changed due to checksum. + */ + case DB_LOGVERSION_45: + case DB_LOGVERSION_44: + break; + case DB_LOGVERSION_43: + ret = __env_init_rec_43(dbenv); + break; + case DB_LOGVERSION_42: + ret = __env_init_rec_42(dbenv); + break; + default: + __db_errx(dbenv, "Unknown version %lu", (u_long)version); + ret = EINVAL; + break; + } + return (ret); +} + +static int +__env_init_rec_42(dbenv) + DB_ENV *dbenv; +{ + int ret; + + if ((ret = __db_add_recovery(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size, __db_relink_42_recover, + DB___db_relink_42)) != 0) + goto err; + if ((ret = __db_add_recovery(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size, __db_pg_alloc_42_recover, + DB___db_pg_alloc_42)) != 0) + goto err; + if ((ret = __db_add_recovery(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size, __db_pg_free_42_recover, + DB___db_pg_free_42)) != 0) + goto err; + if ((ret = __db_add_recovery(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size, __db_pg_freedata_42_recover, + DB___db_pg_freedata_42)) != 0) + goto err; + if ((ret = __db_add_recovery(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size, __ham_metagroup_42_recover, + DB___ham_metagroup_42)) != 0) + goto err; + if ((ret = __db_add_recovery(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size, __ham_groupalloc_42_recover, + DB___ham_groupalloc_42)) != 0) + goto err; + if ((ret = __db_add_recovery(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size, __txn_ckp_42_recover, + DB___txn_ckp_42)) != 0) + goto err; + if ((ret = __db_add_recovery(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size, __txn_regop_42_recover, + DB___txn_regop_42)) != 0) + goto err; +err: + return (ret); +} + +static int +__env_init_rec_43(dbenv) + DB_ENV *dbenv; +{ + int ret; + + if ((ret = __db_add_recovery(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size, __bam_relink_43_recover, + DB___bam_relink_43)) != 0) + goto err; + /* + * We want to use the 4.2-based txn_regop record. + */ + if ((ret = __db_add_recovery(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size, __txn_regop_42_recover, + DB___txn_regop_42)) != 0) + goto err; +err: + return (ret); +} + +/* + * __env_init_rec_45 -- + * + */ +static int +__env_init_rec_45(dbenv) + DB_ENV *dbenv; +{ + int ret; + + if ((ret = __bam_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) + goto err; + if ((ret = __crdel_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) + goto err; + if ((ret = __db_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) + goto err; + if ((ret = __dbreg_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) + goto err; + if ((ret = __fop_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) + goto err; + if ((ret = __ham_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) + goto err; + if ((ret = __qam_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) + goto err; + if ((ret = __txn_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) + goto err; +err: + return (ret); +} diff --git a/db/env/env_region.c b/db/env/env_region.c index 17d2e65d3..76d7958d8 100644 --- a/db/env/env_region.c +++ b/db/env/env_region.c @@ -1,33 +1,22 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 1996-2006 + * Oracle Corporation. All rights reserved. * - * $Id: env_region.c,v 11.103 2004/10/15 16:59:41 bostic Exp $ + * $Id: env_region.c,v 12.19 2006/08/24 14:45:40 bostic Exp $ */ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <string.h> -#endif - #include "db_int.h" -#include "dbinc/db_shash.h" #include "dbinc/crypto.h" -#include "dbinc/lock.h" -#include "dbinc/log.h" #include "dbinc/mp.h" -#include "dbinc/txn.h" -static int __db_des_destroy __P((DB_ENV *, REGION *, int)); +static void __db_des_destroy __P((DB_ENV *, REGION *)); static int __db_des_get __P((DB_ENV *, REGINFO *, REGINFO *, REGION **)); static int __db_e_remfile __P((DB_ENV *)); static int __db_faultmem __P((DB_ENV *, void *, size_t, int)); -static void __db_region_destroy __P((DB_ENV *, REGINFO *)); /* * __db_e_attach @@ -46,37 +35,11 @@ __db_e_attach(dbenv, init_flagsp) REGION *rp, tregion; size_t size; size_t nrw; - u_int32_t mbytes, bytes; + u_int32_t bytes, i, mbytes, nregions; u_int retry_cnt; - int ret, segid; + int majver, minver, patchver, ret, segid; char buf[sizeof(DB_REGION_FMT) + 20]; -#if !defined(HAVE_MUTEX_THREADS) - /* - * !!! - * If we don't have spinlocks, we need a file descriptor for fcntl(2) - * locking. We use the file handle from the REGENV file for this - * purpose. - * - * Since we may be using shared memory regions, e.g., shmget(2), and - * not a mapped-in regular file, the backing file may be only a few - * bytes in length. So, this depends on the ability to call fcntl to - * lock file offsets much larger than the actual physical file. I - * think that's safe -- besides, very few systems actually need this - * kind of support, SunOS is the only one still in wide use of which - * I'm aware. - * - * The error case is if an application lacks spinlocks and wants to be - * threaded. That doesn't work because fcntl may lock the underlying - * process, including all its threads. - */ - if (F_ISSET(dbenv, DB_ENV_THREAD)) { - __db_err(dbenv, - "architecture lacks fast mutexes: applications cannot be threaded"); - return (EINVAL); - } -#endif - /* Initialization */ retry_cnt = 0; @@ -95,8 +58,8 @@ loop: renv = NULL; /* * We have to single-thread the creation of the REGENV region. Once - * it exists, we can do locking using locks in the region, but until - * then we have to be the only player in the game. + * it exists, we can serialize using region mutexes, but until then + * we have to be the only player in the game. * * If this is a private environment, we are only called once and there * are no possible race conditions. @@ -131,8 +94,7 @@ loop: renv = NULL; dbenv->db_mode, &dbenv->lockfhp)) == 0) goto creation; if (ret != EEXIST) { - __db_err(dbenv, - "%s: %s", infop->name, db_strerror(ret)); + __db_err(dbenv, ret, "%s", infop->name); goto err; } } @@ -179,7 +141,7 @@ loop: renv = NULL; */ if ((ret = __os_ioinfo(dbenv, infop->name, dbenv->lockfhp, &mbytes, &bytes, NULL)) != 0) { - __db_err(dbenv, "%s: %s", infop->name, db_strerror(ret)); + __db_err(dbenv, ret, "%s", infop->name); goto err; } @@ -193,7 +155,8 @@ loop: renv = NULL; /* * If the size is less than the size of a REGENV_REF structure, the * region (or, possibly, the REGENV_REF structure) has not yet been - * completely written. Wait awhile and try again. + * completely written. Shouldn't be possible, but there's no reason + * not to wait awhile and try again. * * Otherwise, if the size is the size of a REGENV_REF structure, * read it into memory and use it as a reference to the real region. @@ -206,9 +169,9 @@ loop: renv = NULL; sizeof(ref), &nrw)) != 0 || nrw < (size_t)sizeof(ref)) { if (ret == 0) ret = EIO; - __db_err(dbenv, - "%s: unable to read system-memory information from: %s", - infop->name, db_strerror(ret)); + __db_err(dbenv, ret, + "%s: unable to read system-memory information", + infop->name); goto err; } size = ref.size; @@ -217,20 +180,19 @@ loop: renv = NULL; F_SET(dbenv, DB_ENV_SYSTEM_MEM); } else if (F_ISSET(dbenv, DB_ENV_SYSTEM_MEM)) { ret = EINVAL; - __db_err(dbenv, - "%s: existing environment not created in system memory: %s", - infop->name, db_strerror(ret)); + __db_err(dbenv, ret, + "%s: existing environment not created in system memory", + infop->name); goto err; } else segid = INVALID_REGION_SEGID; +#ifndef HAVE_MUTEX_FCNTL /* - * If not doing thread locking, we need to save the file handle for - * fcntl(2) locking. Otherwise, discard the handle, we no longer - * need it, and the less contact between the buffer cache and the VM, - * the better. + * If we're not doing fcntl locking, we can close the file handle. We + * no longer need it and the less contact between the buffer cache and + * the VM, the better. */ -#ifdef HAVE_MUTEX_THREADS (void)__os_closehandle(dbenv, dbenv->lockfhp); dbenv->lockfhp = NULL; #endif @@ -253,13 +215,24 @@ loop: renv = NULL; infop->addr = (u_int8_t *)infop->addr + sizeof(REGENV); renv = infop->primary; - /* Make sure the region matches our build. */ + /* + * Make sure the region matches our build. Special case a region + * that's all nul bytes, just treat it like any other corruption. + * + * !!! + * We don't display the major/minor version from the environment, + * because it may be in a different place in the two regions. + */ if (renv->majver != DB_VERSION_MAJOR || renv->minver != DB_VERSION_MINOR) { - __db_err(dbenv, - "Program version %d.%d doesn't match environment version", - DB_VERSION_MAJOR, DB_VERSION_MINOR); - ret = DB_VERSION_MISMATCH; + if (renv->majver != 0 || renv->minver != 0) { + __db_errx(dbenv, + "Program version %d.%d doesn't match environment version %d.%d", + DB_VERSION_MAJOR, DB_VERSION_MINOR, + renv->majver, renv->minver); + ret = DB_VERSION_MISMATCH; + } else + ret = EINVAL; goto err; } @@ -277,33 +250,19 @@ loop: renv = NULL; * I'd rather play permissions games using the underlying file, but I * can't because Windows/NT filesystems won't open files mode 0. */ - if (renv->envpanic && !F_ISSET(dbenv, DB_ENV_NOPANIC)) { + if (renv->panic && !F_ISSET(dbenv, DB_ENV_NOPANIC)) { ret = __db_panic_msg(dbenv); goto err; } if (renv->magic != DB_REGION_MAGIC) goto retry; - /* Lock the environment. */ - MUTEX_LOCK(dbenv, &renv->mutex); - - /* - * Finally! We own the environment now. Repeat the panic check, it's - * possible that it was set while we waited for the lock. - */ - if (renv->envpanic && !F_ISSET(dbenv, DB_ENV_NOPANIC)) { - ret = __db_panic_msg(dbenv); - goto err_unlock; - } - /* * Get a reference to the underlying REGION information for this * environment. */ - if ((ret = __db_des_get(dbenv, infop, infop, &rp)) != 0 || rp == NULL) { - MUTEX_UNLOCK(dbenv, &renv->mutex); + if ((ret = __db_des_get(dbenv, infop, infop, &rp)) != 0 || rp == NULL) goto find_err; - } infop->rp = rp; /* @@ -312,28 +271,33 @@ loop: renv = NULL; * growing as part of its creation. We can detect this by checking the * size we originally found against the region's current size. (The * region's current size has to be final, the creator finished growing - * it before releasing the environment for us to lock.) + * it before setting the magic number in the region.) */ - if (rp->size != size) { -err_unlock: MUTEX_UNLOCK(dbenv, &renv->mutex); + if (rp->size != size) goto retry; - } /* Increment the reference count. */ + MUTEX_LOCK(dbenv, renv->mtx_regenv); ++renv->refcnt; + MUTEX_UNLOCK(dbenv, renv->mtx_regenv); /* - * Add configuration flags from our caller; return the total set of - * configuration flags for later DB_JOINENV calls. + * Check our callers configuration flags, it's an error to configure + * incompatible or additional subsystems in an existing environment. + * Return the total set of flags to the caller so they initialize the + * correct set of subsystems. */ if (init_flagsp != NULL) { - renv->init_flags |= *init_flagsp; + FLD_CLR(*init_flagsp, renv->init_flags); + if (*init_flagsp != 0) { + __db_errx(dbenv, + "configured environment flags incompatible with existing environment"); + ret = EINVAL; + goto err; + } *init_flagsp = renv->init_flags; } - /* Discard our lock. */ - MUTEX_UNLOCK(dbenv, &renv->mutex); - /* * Fault the pages into memory. Note, do this AFTER releasing the * lock, because we're only reading the pages, not writing them. @@ -349,15 +313,17 @@ creation: F_SET(infop, REGION_CREATE); /* - * Allocate room for 100 REGION structures plus overhead (we're going - * to use this space for last-ditch allocation requests), although we - * should never need anything close to that. + * Allocate room for REGION structures plus overhead. * - * Encryption passwds are stored in the env region. Add that in too. + * XXX + * Overhead is so high because encryption passwds are stored in the + * base environment region, as are replication vote arrays. This is + * a bug, not a feature, replication needs its own region. */ memset(&tregion, 0, sizeof(tregion)); - tregion.size = (roff_t)(100 * sizeof(REGION) + - dbenv->passwd_len + 4096); + nregions = dbenv->mp_ncache + 10; + tregion.size = + (roff_t)(nregions * sizeof(REGION) + dbenv->passwd_len + 16 * 1024); tregion.segid = INVALID_REGION_SEGID; if ((ret = __os_r_attach(dbenv, infop, &tregion)) != 0) goto err; @@ -391,17 +357,25 @@ creation: __db_shalloc_init(infop, tregion.size - sizeof(REGENV)); /* - * Initialize the rest of the REGENV structure, except for the magic - * number which validates the file/environment. + * Initialize the rest of the REGENV structure. (Don't set the magic + * number to the correct value, that would validate the environment). */ renv = infop->primary; - renv->envpanic = 0; + renv->magic = 0; + renv->panic = 0; + + (void)db_version(&majver, &minver, &patchver); + renv->majver = (u_int32_t)majver; + renv->minver = (u_int32_t)minver; + renv->patchver = (u_int32_t)patchver; + + (void)time(&renv->timestamp); __os_unique_id(dbenv, &renv->envid); - (void)db_version(&renv->majver, &renv->minver, &renv->patch); - SH_LIST_INIT(&renv->regionq); + + if ((ret = __mutex_alloc( + dbenv, MTX_ENV_REGION, 0, &renv->mtx_regenv)) != 0) + goto err; renv->refcnt = 1; - renv->cipher_off = INVALID_ROFF; - renv->rep_off = INVALID_ROFF; /* * Initialize init_flags to store the flags that any other environment @@ -410,27 +384,27 @@ creation: renv->init_flags = (init_flagsp == NULL) ? 0 : *init_flagsp; /* - * Lock the environment. - * - * Check the lock call return. This is the first lock we initialize - * and acquire, and we have to know if it fails. (It CAN fail, e.g., - * SunOS, when using fcntl(2) for locking and using an in-memory - * filesystem as the database home. But you knew that, I'm sure -- it - * probably wasn't even worth mentioning.) + * Set up the region array. We use an array rather than a linked list + * as we have to traverse this list after failure in some cases, and + * we don't want to infinitely loop should the application fail while + * we're manipulating the list. */ - if ((ret = __db_mutex_setup(dbenv, infop, &renv->mutex, - MUTEX_NO_RECORD | MUTEX_NO_RLOCK)) != 0) { - __db_err(dbenv, "%s: unable to initialize environment lock: %s", - infop->name, db_strerror(ret)); + renv->region_cnt = nregions; + if ((ret = + __db_shalloc(infop, nregions * sizeof(REGION), 0, &rp)) != 0) { + __db_err( + dbenv, ret, "unable to create new master region array"); goto err; } + renv->region_off = R_OFFSET(infop, rp); + for (i = 0; i < nregions; ++i, ++rp) + rp->id = INVALID_REGION_ID; - if (!F_ISSET(&renv->mutex, MUTEX_IGNORE) && - (ret = __db_mutex_lock(dbenv, &renv->mutex)) != 0) { - __db_err(dbenv, "%s: unable to acquire environment lock: %s", - infop->name, db_strerror(ret)); - goto err; - } + renv->cipher_off = INVALID_ROFF; + + renv->rep_off = INVALID_ROFF; + renv->flags = 0; + renv->op_timestamp = renv->rep_timestamp = 0; /* * Get the underlying REGION structure for this environment. Note, @@ -439,8 +413,7 @@ creation: * the REGION structure. */ if ((ret = __db_des_get(dbenv, infop, infop, &rp)) != 0) { -find_err: __db_err(dbenv, - "%s: unable to find environment", infop->name); +find_err: __db_errx(dbenv, "%s: unable to find environment", infop->name); if (ret == 0) ret = EINVAL; goto err; @@ -467,32 +440,25 @@ find_err: __db_err(dbenv, ref.segid = tregion.segid; if ((ret = __os_write( dbenv, dbenv->lockfhp, &ref, sizeof(ref), &nrw)) != 0) { - __db_err(dbenv, - "%s: unable to write out public environment ID: %s", - infop->name, db_strerror(ret)); + __db_err(dbenv, ret, + "%s: unable to write out public environment ID", + infop->name); goto err; } } +#ifndef HAVE_MUTEX_FCNTL /* - * If not doing thread locking, we need to save the file handle for - * fcntl(2) locking. Otherwise, discard the handle, we no longer - * need it, and the less contact between the buffer cache and the VM, - * the better. + * If we're not doing fcntl locking, we can close the file handle. We + * no longer need it and the less contact between the buffer cache and + * the VM, the better. */ -#if defined(HAVE_MUTEX_THREADS) if (dbenv->lockfhp != NULL) { (void)__os_closehandle(dbenv, dbenv->lockfhp); dbenv->lockfhp = NULL; } #endif - /* Validate the file. */ - renv->magic = DB_REGION_MAGIC; - - /* Discard our lock. */ - MUTEX_UNLOCK(dbenv, &renv->mutex); - /* Everything looks good, we're done. */ dbenv->reginfo = infop; return (0); @@ -529,7 +495,7 @@ retry: /* Close any open file handle. */ /* If we had a temporary error, wait awhile and try again. */ if (ret == 0) { if (++retry_cnt > 3) { - __db_err(dbenv, "unable to join the environment"); + __db_errx(dbenv, "unable to join the environment"); ret = EAGAIN; } else { __os_sleep(dbenv, retry_cnt * 3, 0); @@ -541,6 +507,35 @@ retry: /* Close any open file handle. */ } /* + * __db_e_golive -- + * Turn on the created environment. + * + * PUBLIC: int __db_e_golive __P((DB_ENV *)); + */ +int +__db_e_golive(dbenv) + DB_ENV *dbenv; +{ + REGENV *renv; + REGINFO *infop; + + infop = dbenv->reginfo; + renv = infop->primary; + + /* If we didn't create the region, there's no need for further work. */ + if (!F_ISSET(infop, REGION_CREATE)) + return (0); + + /* + * Validate the file. All other threads of control are waiting + * on this value to be written -- "Let slip the hounds of war!" + */ + renv->magic = DB_REGION_MAGIC; + + return (0); +} + +/* * __db_e_detach -- * Detach from the environment. * @@ -553,81 +548,97 @@ __db_e_detach(dbenv, destroy) { REGENV *renv; REGINFO *infop; + REGION rp; + int ret, t_ret; infop = dbenv->reginfo; renv = infop->primary; + ret = 0; if (F_ISSET(dbenv, DB_ENV_PRIVATE)) destroy = 1; - /* Lock the environment. */ - MUTEX_LOCK(dbenv, &renv->mutex); - /* Decrement the reference count. */ - if (renv->refcnt == 0) { - __db_err(dbenv, - "region %lu (environment): reference count went negative", - (u_long)infop->rp->id); - } else + MUTEX_LOCK(dbenv, renv->mtx_regenv); + if (renv->refcnt == 0) + __db_errx(dbenv, "environment reference count went negative"); + else --renv->refcnt; - - /* Release the lock. */ - MUTEX_UNLOCK(dbenv, &renv->mutex); + MUTEX_UNLOCK(dbenv, renv->mtx_regenv); /* Close the locking file handle. */ if (dbenv->lockfhp != NULL) { - (void)__os_closehandle(dbenv, dbenv->lockfhp); + if ((t_ret = + __os_closehandle(dbenv, dbenv->lockfhp)) != 0 && ret == 0) + ret = t_ret; dbenv->lockfhp = NULL; } /* - * If we are destroying the environment, destroy any system resources - * the crypto and replication systems may have acquired and put in the - * main region. + * Release the region, and kill our reference. */ if (destroy) { #ifdef HAVE_CRYPTO - (void)__crypto_region_destroy(dbenv); + /* + * Destroy any system resources the crypto subsystem may have + * acquired. + */ + if ((t_ret = __crypto_region_destroy(dbenv)) != 0 && ret == 0) + ret = t_ret; #endif - (void)__rep_region_destroy(dbenv); + /* + * Destroy any system resources the replication subsystem may + * have acquired. + */ + if ((t_ret = __rep_region_destroy(dbenv)) != 0 && ret == 0) + ret = t_ret; + + /* + * Free the REGION array. + * + * The actual underlying region structure is allocated from the + * primary shared region, and we're about to free it. Save a + * copy on our stack for the REGINFO to reference when it calls + * down into the OS layer to release the shared memory segment. + */ + rp = *infop->rp; + infop->rp = &rp; + + if (renv->region_off != INVALID_ROFF) + __db_shalloc_free( + infop, R_ADDR(infop, renv->region_off)); + + /* Discard any mutex resources we may have acquired. */ + if ((t_ret = + __mutex_free(dbenv, &renv->mtx_regenv)) != 0 && ret == 0) + ret = t_ret; } /* - * Release the region, and kill our reference. + * Set the DB_ENV->reginfo field to NULL. First, DB_ENV->remove calls + * __env_remove to do the region remove, and __envremove attached and + * then detaches from the region. We don't want to return to + * DB_ENV->remove with a non-NULL DB_ENV->reginfo field because it will + * attempt to detach again as part of its cleanup. * - * If we are destroying the environment, destroy any system resources - * backing the mutex. + * Second, DB code uses DB_ENV->reginfo to decide if it's OK to read + * the underlying region. We're about to destroy what it references, + * so it needs to be cleared. */ - if (destroy) { - (void)__db_mutex_destroy(&renv->mutex); - (void)__db_mutex_destroy(&infop->rp->mutex); - - /* - * Only free the REGION structure itself if it was separately - * allocated from the heap. - */ - if (F_ISSET(dbenv, DB_ENV_PRIVATE)) - __db_shalloc_free(infop, infop->rp); - } + dbenv->reginfo = NULL; /* Reset the addr value that we "corrected" above. */ infop->addr = infop->primary; - (void)__os_r_detach(dbenv, infop, destroy); + if ((t_ret = __os_r_detach(dbenv, infop, destroy)) != 0 && ret == 0) + ret = t_ret; if (infop->name != NULL) __os_free(dbenv, infop->name); - /* - * We set the DB_ENV->reginfo field to NULL here and discard its memory. - * DB_ENV->remove calls __dbenv_remove to do the region remove, and - * __dbenv_remove attached and then detaches from the region. We don't - * want to return to DB_ENV->remove with a non-NULL DB_ENV->reginfo - * field because it will attempt to detach again as part of its cleanup. - */ - __os_free(dbenv, dbenv->reginfo); - dbenv->reginfo = NULL; + /* Discard the DB_ENV->reginfo field's memory. */ + __os_free(dbenv, infop); - return (0); + return (ret); } /* @@ -644,33 +655,39 @@ __db_e_remove(dbenv, flags) REGENV *renv; REGINFO *infop, reginfo; REGION *rp; - u_int32_t db_env_reset; - int force, ret; + u_int32_t db_env_reset, i; + int ret; + + db_env_reset = F_ISSET(dbenv, DB_ENV_NOLOCKING | DB_ENV_NOPANIC); - force = LF_ISSET(DB_FORCE) ? 1 : 0; /* * This routine has to walk a nasty line between not looking into * the environment (which may be corrupted after an app or system * crash), and removing everything that needs removing. What we * do is: - * 1. Connect to the environment (so it better be OK). + * 1. Connect to the environment. * 2. If the environment is in use (reference count is non-zero), * return EBUSY. - * 3. Overwrite the magic number so that any threads of control - * attempting to connect will backoff and retry. - * 4. Walk the list of regions. Connect to each region and then + * 3. Panic it and overwrite the magic number so any threads of + * control attempting to connect (or racing with us) backoff + * and retry or just die. + * 4. Walk the array of regions. Connect to each region and then * disconnect with the destroy flag set. This shouldn't cause * any problems, even if the region is corrupted, because we - * should never be looking inside the region. + * never look inside the region (with the single exception of + * mutex regions on systems where we have to return resources + * to the underlying system). * 5. Walk the list of files in the directory, unlinking any * files that match a region name. Unlink the environment * file last. * * If the force flag is set, we do not acquire any locks during this * process. + * + * We're going to panic the environment, so we'll want to ignore that + * flag. */ - db_env_reset = F_ISSET(dbenv, DB_ENV_NOLOCKING | DB_ENV_NOPANIC); - if (force) + if (LF_ISSET(DB_FORCE)) F_SET(dbenv, DB_ENV_NOLOCKING); F_SET(dbenv, DB_ENV_NOPANIC); @@ -682,7 +699,7 @@ __db_e_remove(dbenv, flags) * probably isn't important. */ ret = 0; - if (force) + if (LF_ISSET(DB_FORCE)) goto remfiles; goto done; } @@ -691,14 +708,14 @@ __db_e_remove(dbenv, flags) renv = infop->primary; /* Lock the environment. */ - MUTEX_LOCK(dbenv, &renv->mutex); + MUTEX_LOCK(dbenv, renv->mtx_regenv); /* * If it's in use, we're done unless we're forcing the issue or the * environment has panic'd. (Presumably, if the environment panic'd, * the thread holding the reference count may not have cleaned up.) */ - if (renv->refcnt == 1 || renv->envpanic == 1 || force) { + if (renv->refcnt == 1 || renv->panic == 1 || LF_ISSET(DB_FORCE)) { /* * Set the panic flag and overwrite the magic number. * @@ -706,42 +723,50 @@ __db_e_remove(dbenv, flags) * From this point on, there's no going back, we pretty * much ignore errors, and just whack on whatever we can. */ - renv->envpanic = 1; renv->magic = 0; + renv->panic = 1; /* - * Unlock the environment. We should no longer need the lock - * because we've poisoned the pool, but we can't continue to - * hold it either, because other routines may want it. + * Unlock the environment -- nobody should need this lock + * because we've poisoned the pool. */ - MUTEX_UNLOCK(dbenv, &renv->mutex); + MUTEX_UNLOCK(dbenv, renv->mtx_regenv); - /* - * Attach to each sub-region and destroy it. - * - * !!! - * The REGION_CREATE_OK flag is set for Windows/95 -- regions - * are zero'd out when the last reference to the region goes - * away, in which case the underlying OS region code requires - * callers be prepared to create the region in order to join it. - */ - memset(®info, 0, sizeof(reginfo)); - for (rp = SH_LIST_FIRST(&renv->regionq, __db_region); - rp != NULL; rp = SH_LIST_NEXT(rp, q, __db_region)) { - if (rp->type == REGION_TYPE_ENV) + /* Attach to each sub-region and destroy it. */ + for (rp = R_ADDR(infop, renv->region_off), + i = 0; i < renv->region_cnt; ++i, ++rp) { + if (rp->id == INVALID_REGION_ID || + rp->type == REGION_TYPE_ENV) continue; + /* + * !!! + * The REGION_CREATE_OK flag is set for Windows/95 -- + * regions are zero'd out when the last reference to + * the region goes away, in which case the underlying + * OS region code requires callers be prepared to + * create the region in order to join it. + */ + memset(®info, 0, sizeof(reginfo)); + reginfo.id = rp->id; + reginfo.flags = REGION_CREATE_OK; /* * If we get here and can't attach and/or detach to the * region, it's a mess. Ignore errors, there's nothing * we can do about them. */ - reginfo.id = rp->id; - reginfo.flags = REGION_CREATE_OK; - if (__db_r_attach(dbenv, ®info, 0) == 0) { - R_UNLOCK(dbenv, ®info); - (void)__db_r_detach(dbenv, ®info, 1); - } + if (__db_r_attach(dbenv, ®info, 0) != 0) + continue; + +#ifdef HAVE_MUTEX_SYSTEM_RESOURCES + /* + * If destroying the mutex region, return any system + * resources to the system. + */ + if (reginfo.type == REGION_TYPE_MUTEX) + __mutex_resource_return(dbenv, ®info); +#endif + (void)__db_r_detach(dbenv, ®info, 1); } /* Destroy the environment's region. */ @@ -751,7 +776,7 @@ __db_e_remove(dbenv, flags) remfiles: (void)__db_e_remfile(dbenv); } else { /* Unlock the environment. */ - MUTEX_UNLOCK(dbenv, &renv->mutex); + MUTEX_UNLOCK(dbenv, renv->mtx_regenv); /* Discard the environment. */ (void)__db_e_detach(dbenv, 0); @@ -797,7 +822,7 @@ __db_e_remfile(dbenv) /* Get the list of file names. */ if ((ret = __os_dirlist(dbenv, dir, &names, &fcnt)) != 0) - __db_err(dbenv, "%s: %s", dir, db_strerror(ret)); + __db_err(dbenv, ret, "%s", dir); /* Restore the path, and free it. */ *p = saved_char; @@ -819,8 +844,12 @@ __db_e_remfile(dbenv) if (strncmp(names[cnt], "__dbq.", 6) == 0) continue; + /* Skip registry files. */ + if (strncmp(names[cnt], "__db.register", 13) == 0) + continue; + /* Skip replication files. */ - if (strncmp(names[cnt], "__db.rep.", 9) == 0) + if (strncmp(names[cnt], "__db.rep", 8) == 0) continue; /* @@ -846,7 +875,7 @@ __db_e_remfile(dbenv) */ if (F_ISSET(dbenv, DB_ENV_OVERWRITE) && strlen(names[cnt]) == DB_REGION_NAME_LENGTH) - (void)__db_overwrite(dbenv, path); + (void)__db_file_multi_write(dbenv, path); (void)__os_unlink(dbenv, path); __os_free(dbenv, path); } @@ -856,7 +885,7 @@ __db_e_remfile(dbenv) if (__db_appname(dbenv, DB_APP_NONE, names[lastrm], 0, NULL, &path) == 0) { if (F_ISSET(dbenv, DB_ENV_OVERWRITE)) - (void)__db_overwrite(dbenv, path); + (void)__db_file_multi_write(dbenv, path); (void)__os_unlink(dbenv, path); __os_free(dbenv, path); } @@ -877,31 +906,26 @@ __db_r_attach(dbenv, infop, size) REGINFO *infop; size_t size; { - REGENV *renv; REGION *rp; int ret; char buf[sizeof(DB_REGION_FMT) + 20]; - renv = ((REGINFO *)dbenv->reginfo)->primary; - - /* Lock the environment. */ - MUTEX_LOCK(dbenv, &renv->mutex); - /* * Find or create a REGION structure for this region. If we create * it, the REGION_CREATE flag will be set in the infop structure. */ F_CLR(infop, REGION_CREATE); - if ((ret = __db_des_get(dbenv, dbenv->reginfo, infop, &rp)) != 0) { - MUTEX_UNLOCK(dbenv, &renv->mutex); + if ((ret = __db_des_get(dbenv, dbenv->reginfo, infop, &rp)) != 0) return (ret); - } infop->dbenv = dbenv; infop->rp = rp; infop->type = rp->type; infop->id = rp->id; - /* If we're creating the region, set the desired size. */ + /* + * __db_des_get may have created the region and reset the create + * flag. If we're creating the region, set the desired size. + */ if (F_ISSET(infop, REGION_CREATE)) rp->size = (roff_t)size; @@ -932,15 +956,6 @@ __db_r_attach(dbenv, infop, size) if (F_ISSET(infop, REGION_CREATE)) __db_shalloc_init(infop, rp->size); - /* - * If the underlying REGION isn't the environment, acquire a lock - * for it and release our lock on the environment. - */ - if (infop->type != REGION_TYPE_ENV) { - MUTEX_LOCK(dbenv, &rp->mutex); - MUTEX_UNLOCK(dbenv, &renv->mutex); - } - return (0); err: /* Discard the underlying region. */ @@ -952,13 +967,10 @@ err: /* Discard the underlying region. */ /* Discard the REGION structure if we created it. */ if (F_ISSET(infop, REGION_CREATE)) { - (void)__db_des_destroy(dbenv, rp, 1); + __db_des_destroy(dbenv, rp); F_CLR(infop, REGION_CREATE); } - /* Release the environment lock. */ - MUTEX_UNLOCK(dbenv, &renv->mutex); - return (ret); } @@ -974,48 +986,28 @@ __db_r_detach(dbenv, infop, destroy) REGINFO *infop; int destroy; { - REGENV *renv; REGION *rp; - int ret, t_ret; + int ret; - renv = ((REGINFO *)dbenv->reginfo)->primary; rp = infop->rp; if (F_ISSET(dbenv, DB_ENV_PRIVATE)) destroy = 1; - /* Lock the environment. */ - MUTEX_LOCK(dbenv, &renv->mutex); - - /* Acquire the lock for the REGION. */ - MUTEX_LOCK(dbenv, &rp->mutex); - /* - * We need to call destroy on per-subsystem info before we free the - * memory associated with the region. + * When discarding the regions as we shut down a database environment, + * discard any allocated shared memory segments. This is the last time + * we use them, and db_region_destroy is the last region-specific call + * we make. */ - if (destroy) - __db_region_destroy(dbenv, infop); + if (F_ISSET(dbenv, DB_ENV_PRIVATE) && infop->primary != NULL) + __db_shalloc_free(infop, infop->primary); /* Detach from the underlying OS region. */ ret = __os_r_detach(dbenv, infop, destroy); - /* Release the REGION lock. */ - MUTEX_UNLOCK(dbenv, &rp->mutex); - - /* - * If we destroyed the region, discard the REGION structure. The only - * time this routine is called with the destroy flag set is when the - * environment is being removed, and it's likely that the only reason - * the environment is being removed is because we crashed. Don't do - * any unnecessary shared memory manipulation. - */ - if (destroy && - ((t_ret = __db_des_destroy( - dbenv, rp, F_ISSET(dbenv, DB_ENV_PRIVATE))) != 0) && ret == 0) - ret = t_ret; - - /* Release the environment lock. */ - MUTEX_UNLOCK(dbenv, &renv->mutex); + /* If we destroyed the region, discard the REGION structure. */ + if (destroy) + __db_des_destroy(dbenv, rp); /* Destroy the structure. */ if (infop->name != NULL) @@ -1036,14 +1028,9 @@ __db_des_get(dbenv, env_infop, infop, rpp) REGION **rpp; { REGENV *renv; - REGION *rp, *first_type; - u_int32_t maxid; - int ret; + REGION *rp, *empty_slot, *first_type; + u_int32_t i, maxid; - /* - * !!! - * Called with the environment already locked. - */ *rpp = NULL; renv = env_infop->primary; @@ -1054,14 +1041,20 @@ __db_des_get(dbenv, env_infop, infop, rpp) * return the "primary" region, that is, the first region that was * created of this type. * - * Track the maximum region ID so we can allocate a new region, - * note that we have to start at 1 because the primary environment - * uses ID == 1. + * Track the first empty slot and maximum region ID for new region + * allocation. + * + * MaxID starts at REGION_ID_ENV, the ID of the primary environment. */ maxid = REGION_ID_ENV; - for (first_type = NULL, - rp = SH_LIST_FIRST(&renv->regionq, __db_region); - rp != NULL; rp = SH_LIST_NEXT(rp, q, __db_region)) { + empty_slot = first_type = NULL; + for (rp = R_ADDR(env_infop, renv->region_off), + i = 0; i < renv->region_cnt; ++i, ++rp) { + if (rp->id == INVALID_REGION_ID) { + if (empty_slot == NULL) + empty_slot = rp; + continue; + } if (infop->id != INVALID_REGION_ID) { if (infop->id == rp->id) break; @@ -1075,51 +1068,48 @@ __db_des_get(dbenv, env_infop, infop, rpp) if (rp->id > maxid) maxid = rp->id; } - if (rp == NULL) + + /* If we found a matching ID (or a matching type), return it. */ + if (i >= renv->region_cnt) rp = first_type; + if (rp != NULL) { + *rpp = rp; + return (0); + } /* - * If we didn't find a region and we can't create the region, fail. - * The caller generates any error message. + * If we didn't find a region and we don't have permission to create + * the region, fail. The caller generates any error message. */ - if (rp == NULL && !F_ISSET(infop, REGION_CREATE_OK)) + if (!F_ISSET(infop, REGION_CREATE_OK)) return (ENOENT); /* - * If we didn't find a region, create and initialize a REGION structure - * for the caller. If id was set, use that value, otherwise we use the - * next available ID. + * If we didn't find a region and don't have room to create the region + * fail with an error message, there's a sizing problem. */ - if (rp == NULL) { - if ((ret = __db_shalloc(env_infop, - sizeof(REGION), MUTEX_ALIGN, &rp)) != 0) { - __db_err(dbenv, - "unable to create new master region entry: %s", - db_strerror(ret)); - return (ret); - } + if (empty_slot == NULL) { + __db_errx(dbenv, "no room remaining for additional REGIONs"); + return (ENOENT); + } - /* Initialize the region. */ - memset(rp, 0, sizeof(*rp)); - if ((ret = __db_mutex_setup(dbenv, env_infop, &rp->mutex, - MUTEX_NO_RECORD | MUTEX_NO_RLOCK)) != 0) { - __db_shalloc_free(env_infop, rp); - return (ret); - } - rp->segid = INVALID_REGION_SEGID; + /* + * Initialize a REGION structure for the caller. If id was set, use + * that value, otherwise we use the next available ID. + */ + memset(empty_slot, 0, sizeof(REGION)); + empty_slot->segid = INVALID_REGION_SEGID; - /* - * Set the type and ID; if no region ID was specified, - * allocate one. - */ - rp->type = infop->type; - rp->id = infop->id == INVALID_REGION_ID ? maxid + 1 : infop->id; + /* + * Set the type and ID; if no region ID was specified, + * allocate one. + */ + empty_slot->type = infop->type; + empty_slot->id = infop->id == INVALID_REGION_ID ? maxid + 1 : infop->id; - SH_LIST_INSERT_HEAD(&renv->regionq, rp, q, __db_region); - F_SET(infop, REGION_CREATE); - } + F_SET(infop, REGION_CREATE); - *rpp = rp; + *rpp = empty_slot; return (0); } @@ -1127,41 +1117,14 @@ __db_des_get(dbenv, env_infop, infop, rpp) * __db_des_destroy -- * Destroy a reference to a REGION. */ -static int -__db_des_destroy(dbenv, rp, shmem_safe) +static void +__db_des_destroy(dbenv, rp) DB_ENV *dbenv; REGION *rp; - int shmem_safe; { - REGINFO *infop; - - /* - * !!! - * Called with the environment already locked. - */ - infop = dbenv->reginfo; - - /* - * If we're calling during recovery, it may not be safe to access the - * shared memory, as the shared memory may have been corrupted during - * the crash. If the shared memory is safe, remove the REGION entry - * from its linked list, destroy the mutex, and free the allocated - * memory. On systems that require system mutex support, we don't - * have a choice -- safe or not, we have to destroy the mutex or we'll - * leak memory. - */ -#ifdef HAVE_MUTEX_SYSTEM_RESOURCES - (void)__db_mutex_destroy(&rp->mutex); -#else - if (shmem_safe) - (void)__db_mutex_destroy(&rp->mutex); -#endif - if (shmem_safe) { - SH_LIST_REMOVE(rp, q, __db_region); - __db_shalloc_free(infop, rp); - } + COMPQUIET(dbenv, NULL); - return (0); + rp->id = INVALID_REGION_ID; } /* @@ -1193,51 +1156,21 @@ __db_faultmem(dbenv, addr, size, created) * system can't cheat. If we're just joining the region, we can * only read the value and try to confuse the compiler sufficiently * that it doesn't figure out that we're never really using it. + * + * Touch every page (assuming pages are 512B, the smallest VM page + * size used in any general purpose processor). */ ret = 0; if (F_ISSET(dbenv, DB_ENV_REGION_INIT)) { if (created) - for (p = addr, t = (u_int8_t *)addr + size; - p < t; p += OS_VMPAGESIZE) + for (p = addr, + t = (u_int8_t *)addr + size; p < t; p += 512) p[0] = 0xdb; else - for (p = addr, t = (u_int8_t *)addr + size; - p < t; p += OS_VMPAGESIZE) + for (p = addr, + t = (u_int8_t *)addr + size; p < t; p += 512) ret |= p[0]; } return (ret); } - -/* - * __db_region_destroy -- - * Destroy per-subsystem region information. - * Called with the region already locked. - */ -static void -__db_region_destroy(dbenv, infop) - DB_ENV *dbenv; - REGINFO *infop; -{ - switch (infop->type) { - case REGION_TYPE_LOCK: - __lock_region_destroy(dbenv, infop); - break; - case REGION_TYPE_LOG: - __log_region_destroy(dbenv, infop); - break; - case REGION_TYPE_MPOOL: - __memp_region_destroy(dbenv, infop); - break; - case REGION_TYPE_TXN: - __txn_region_destroy(dbenv, infop); - break; - case REGION_TYPE_ENV: - case REGION_TYPE_MUTEX: - break; - case INVALID_REGION_TYPE: - default: - DB_ASSERT(0); - break; - } -} diff --git a/db/env/env_register.c b/db/env/env_register.c new file mode 100644 index 000000000..1936da9f2 --- /dev/null +++ b/db/env/env_register.c @@ -0,0 +1,422 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: env_register.c,v 1.30 2006/09/09 14:28:22 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +#define REGISTER_FILE "__db.register" + +#define PID_EMPTY "X 0\n" /* Unused PID entry */ +#define PID_FMT "%24lu\n" /* PID entry format */ + /* Unused PID test */ +#define PID_ISEMPTY(p) (memcmp(p, PID_EMPTY, PID_LEN) == 0) +#define PID_LEN (25) /* PID entry length */ + +#define REGISTRY_LOCK(dbenv, pos, nowait) \ + __os_fdlock(dbenv, (dbenv)->registry, (off_t)(pos), 1, nowait) +#define REGISTRY_UNLOCK(dbenv, pos) \ + __os_fdlock(dbenv, (dbenv)->registry, (off_t)(pos), 0, 0) +#define REGISTRY_EXCL_LOCK(dbenv, nowait) \ + REGISTRY_LOCK(dbenv, 1, nowait) +#define REGISTRY_EXCL_UNLOCK(dbenv) \ + REGISTRY_UNLOCK(dbenv, 1) + +static int __envreg_add __P((DB_ENV *, int *)); + +/* + * Support for portable, multi-process database environment locking, based on + * the Subversion SR (#11511). + * + * The registry feature is configured by specifying the DB_REGISTER flag to the + * DbEnv.open method. If DB_REGISTER is specified, DB opens the registry file + * in the database environment home directory. The registry file is formatted + * as follows: + * + * 12345 # process ID slot 1 + * X # empty slot + * 12346 # process ID slot 2 + * X # empty slot + * 12347 # process ID slot 3 + * 12348 # process ID slot 4 + * X 12349 # empty slot + * X # empty slot + * + * All lines are fixed-length. All lines are process ID slots. Empty slots + * are marked with leading non-digit characters. + * + * To modify the file, you get an exclusive lock on the first byte of the file. + * + * While holding any DbEnv handle, each process has an exclusive lock on the + * first byte of a process ID slot. There is a restriction on having more + * than one DbEnv handle open at a time, because Berkeley DB uses per-process + * locking to implement this feature, that is, a process may never have more + * than a single slot locked. + * + * This work requires that if a process dies or the system crashes, locks held + * by the dying processes will be dropped. (We can't use system shared + * memory-backed or filesystem-backed locks because they're persistent when a + * process dies.) On POSIX systems, we use fcntl(2) locks; on Win32 we have + * LockFileEx/UnlockFile, except for Win/9X and Win/ME which have to loop on + * Lockfile/UnlockFile. + * + * We could implement the same solution with flock locking instead of fcntl, + * but flock would require a separate file for each process of control (and + * probably each DbEnv handle) in the database environment, which is fairly + * ugly. + * + * Whenever a process opens a new DbEnv handle, it walks the registry file and + * verifies it CANNOT acquire the lock for any non-empty slot. If a lock for + * a non-empty slot is available, we know a process died holding an open handle, + * and recovery needs to be run. + * + * It's possible to get corruption in the registry file. If a write system + * call fails after partially completing, there can be corrupted entries in + * the registry file, or a partial entry at the end of the file. This is OK. + * A corrupted entry will be flagged as a non-empty line during the registry + * file walk. Since the line was corrupted by process failure, no process will + * hold a lock on the slot, which will lead to recovery being run. + * + * There can still be processes running in the environment when we recover it, + * and, in fact, there can still be processes running in the old environment + * after we're up and running in a new one. This is safe because performing + * recovery panics (and removes) the existing environment, so the window of + * vulnerability is small. Further, we check the panic flag in the DB API + * methods, when waking from spinning on a mutex, and whenever we're about to + * write to disk). The only window of corruption is if the write check of the + * panic were to complete, the region subsequently be recovered, and then the + * write continues. That's very, very unlikely to happen. This vulnerability + * already exists in Berkeley DB, too, the registry code doesn't make it any + * worse than it already is. + */ +/* + * __envreg_register -- + * Register a DB_ENV handle. + * + * PUBLIC: int __envreg_register __P((DB_ENV *, int *)); + */ +int +__envreg_register(dbenv, need_recoveryp) + DB_ENV *dbenv; + int *need_recoveryp; +{ + pid_t pid; + u_int32_t bytes, mbytes; + int ret; + char *pp; + + *need_recoveryp = 0; + dbenv->thread_id(dbenv, &pid, NULL); + pp = NULL; + + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(dbenv, "%lu: register environment", (u_long)pid); + + /* Build the path name and open the registry file. */ + if ((ret = + __db_appname(dbenv, DB_APP_NONE, REGISTER_FILE, 0, NULL, &pp)) != 0) + goto err; + if ((ret = __os_open(dbenv, pp, + DB_OSO_CREATE, __db_omode("rw-rw----"), &dbenv->registry)) != 0) + goto err; + + /* + * Wait for an exclusive lock on the file. + * + * !!! + * We're locking bytes that don't yet exist, but that's OK as far as + * I know. + */ + if ((ret = REGISTRY_EXCL_LOCK(dbenv, 0)) != 0) + goto err; + + /* + * If the file size is 0, initialize the file. + * + * Run recovery if we create the file, that means we can clean up the + * system by removing the registry file and restarting the application. + */ + if ((ret = __os_ioinfo( + dbenv, pp, dbenv->registry, &mbytes, &bytes, NULL)) != 0) + goto err; + if (mbytes == 0 && bytes == 0) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(dbenv, "%lu: creating %s", (u_long)pid, pp); + *need_recoveryp = 1; + } + + /* Register this process. */ + if ((ret = __envreg_add(dbenv, need_recoveryp)) != 0) + goto err; + + /* + * Release our exclusive lock if we don't need to run recovery. If + * we need to run recovery, DB_ENV->open will call back into register + * code once recovery has completed. + */ + if (*need_recoveryp == 0 && (ret = REGISTRY_EXCL_UNLOCK(dbenv)) != 0) + goto err; + + if (0) { +err: *need_recoveryp = 0; + + /* + * !!! + * Closing the file handle must release all of our locks. + */ + if (dbenv->registry != NULL) + (void)__os_closehandle(dbenv, dbenv->registry); + dbenv->registry = NULL; + } + + if (pp != NULL) + __os_free(dbenv, pp); + + return (ret); +} + +/* + * __envreg_add -- + * Add the process' pid to the register. + */ +static int +__envreg_add(dbenv, need_recoveryp) + DB_ENV *dbenv; + int *need_recoveryp; +{ + pid_t pid; + off_t end, pos; + size_t nr, nw; + u_int lcnt; + u_int32_t bytes, mbytes; + int need_recovery, ret; + char *p, buf[PID_LEN + 10], pid_buf[PID_LEN + 10]; + + need_recovery = 0; + COMPQUIET(p, NULL); + + /* Get a copy of our process ID. */ + dbenv->thread_id(dbenv, &pid, NULL); + snprintf(pid_buf, sizeof(pid_buf), PID_FMT, (u_long)pid); + + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(dbenv, "===== %lu: before add", (u_long)pid); + + /* + * Read the file. Skip empty slots, and check that a lock is held + * for any allocated slots. An allocated slot which we can lock + * indicates a process died holding a handle and recovery needs to + * be run. + */ + for (lcnt = 0;; ++lcnt) { + if ((ret = __os_read( + dbenv, dbenv->registry, buf, PID_LEN, &nr)) != 0) + return (ret); + if (nr == 0) + break; + + /* + * A partial record at the end of the file is possible if a + * previously un-registered process was interrupted while + * registering. + */ + if (nr != PID_LEN) { + need_recovery = 1; + break; + } + + if (PID_ISEMPTY(buf)) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(dbenv, "%02u: EMPTY", lcnt); + continue; + } + + /* + * !!! + * DB_REGISTER is implemented using per-process locking, only + * a single DB_ENV handle may be open per process. Enforce + * that restriction. + */ + if (memcmp(buf, pid_buf, PID_LEN) == 0) { + __db_errx(dbenv, + "DB_REGISTER limits processes to one open DB_ENV handle per environment"); + return (EINVAL); + } + + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) { + for (p = buf; *p == ' ';) + ++p; + buf[nr - 1] = '\0'; + } + + pos = (off_t)lcnt * PID_LEN; + if (REGISTRY_LOCK(dbenv, pos, 1) == 0) { + if ((ret = REGISTRY_UNLOCK(dbenv, pos)) != 0) + return (ret); + + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(dbenv, "%02u: %s: FAILED", lcnt, p); + + need_recovery = 1; + break; + } else + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(dbenv, "%02u: %s: LOCKED", lcnt, p); + } + + /* + * If we have to perform recovery... + * + * Mark all slots empty. Registry ignores empty slots we can't lock, + * so it doesn't matter if any of the processes are in the middle of + * exiting Berkeley DB -- they'll discard their lock when they exit. + */ + if (need_recovery) { + /* Figure out how big the file is. */ + if ((ret = __os_ioinfo( + dbenv, NULL, dbenv->registry, &mbytes, &bytes, NULL)) != 0) + return (ret); + end = (off_t)mbytes * MEGABYTE + bytes; + + /* + * Seek to the beginning of the file and overwrite slots to + * the end of the file. + * + * It's possible for there to be a partial entry at the end of + * the file if a process died when trying to register. If so, + * correct for it and overwrite it as well. + */ + if ((ret = __os_seek(dbenv, dbenv->registry, 0, 0, 0)) != 0) + return (ret); + for (lcnt = (u_int)end / PID_LEN + + ((u_int)end % PID_LEN == 0 ? 0 : 1); lcnt > 0; --lcnt) + if ((ret = __os_write(dbenv, + dbenv->registry, PID_EMPTY, PID_LEN, &nw)) != 0) + return (ret); + } + + /* + * Seek to the first process slot and add ourselves to the first empty + * slot we can lock. + */ + if ((ret = __os_seek(dbenv, dbenv->registry, 0, 0, 0)) != 0) + return (ret); + for (lcnt = 0;; ++lcnt) { + if ((ret = __os_read( + dbenv, dbenv->registry, buf, PID_LEN, &nr)) != 0) + return (ret); + if (nr == PID_LEN && !PID_ISEMPTY(buf)) + continue; + pos = (off_t)lcnt * PID_LEN; + if (REGISTRY_LOCK(dbenv, pos, 1) == 0) { + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(dbenv, + "%lu: locking slot %02u at offset %lu", + (u_long)pid, lcnt, (u_long)pos); + + if ((ret = __os_seek(dbenv, + dbenv->registry, 0, 0, (u_int32_t)pos)) != 0 || + (ret = __os_write(dbenv, + dbenv->registry, pid_buf, PID_LEN, &nw)) != 0) + return (ret); + dbenv->registry_off = (u_int32_t)pos; + break; + } + } + + if (need_recovery) + *need_recoveryp = 1; + + return (ret); +} + +/* + * __envreg_unregister -- + * Unregister a DB_ENV handle. + * + * PUBLIC: int __envreg_unregister __P((DB_ENV *, int)); + */ +int +__envreg_unregister(dbenv, recovery_failed) + DB_ENV *dbenv; + int recovery_failed; +{ + size_t nw; + int ret, t_ret; + + ret = 0; + + /* + * If recovery failed, we want to drop our locks and return, but still + * make sure any subsequent process doesn't decide everything is just + * fine and try to get into the database environment. In the case of + * an error, discard our locks, but leave our slot filled-in. + */ + if (recovery_failed) + goto err; + + /* + * Why isn't an exclusive lock necessary to discard a DB_ENV handle? + * + * We mark our process ID slot empty before we discard the process slot + * lock, and threads of control reviewing the register file ignore any + * slots which they can't lock. + */ + if ((ret = __os_seek(dbenv, + dbenv->registry, 0, 0, dbenv->registry_off)) != 0 || + (ret = __os_write( + dbenv, dbenv->registry, PID_EMPTY, PID_LEN, &nw)) != 0) + goto err; + + /* + * !!! + * This code assumes that closing the file descriptor discards all + * held locks. + * + * !!! + * There is an ordering problem here -- in the case of a process that + * failed in recovery, we're unlocking both the exclusive lock and our + * slot lock. If the OS unlocked the exclusive lock and then allowed + * another thread of control to acquire the exclusive lock before also + * also releasing our slot lock, we could race. That can't happen, I + * don't think. + */ +err: if ((t_ret = + __os_closehandle(dbenv, dbenv->registry)) != 0 && ret == 0) + ret = t_ret; + + dbenv->registry = NULL; + return (ret); +} + +/* + * __envreg_xunlock -- + * Discard the exclusive lock held by the DB_ENV handle. + * + * PUBLIC: int __envreg_xunlock __P((DB_ENV *)); + */ +int +__envreg_xunlock(dbenv) + DB_ENV *dbenv; +{ + pid_t pid; + int ret; + + dbenv->thread_id(dbenv, &pid, NULL); + + if (FLD_ISSET(dbenv->verbose, DB_VERB_REGISTER)) + __db_msg(dbenv, + "%lu: recovery completed, unlocking", (u_long)pid); + + if ((ret = REGISTRY_EXCL_UNLOCK(dbenv)) == 0) + return (ret); + + __db_err(dbenv, ret, "%s: exclusive file unlock", REGISTER_FILE); + return (__db_panic(dbenv, ret)); +} diff --git a/db/env/env_stat.c b/db/env/env_stat.c index c9ab7a112..241a1c5cd 100644 --- a/db/env/env_stat.c +++ b/db/env/env_stat.c @@ -1,21 +1,16 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 1996-2006 + * Oracle Corporation. All rights reserved. * - * $Id: env_stat.c,v 1.21 2004/10/29 17:37:23 bostic Exp $ + * $Id: env_stat.c,v 12.36 2006/09/08 19:25:15 bostic Exp $ */ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> -#endif - #include "db_int.h" #include "dbinc/db_page.h" -#include "dbinc/db_shash.h" #include "dbinc/db_am.h" #include "dbinc/lock.h" #include "dbinc/log.h" @@ -23,23 +18,27 @@ #include "dbinc/txn.h" #ifdef HAVE_STATISTICS -static int __dbenv_print_all __P((DB_ENV *, u_int32_t)); -static int __dbenv_print_stats __P((DB_ENV *, u_int32_t)); -static int __dbenv_stat_print __P((DB_ENV *, u_int32_t)); -static const char *__reg_type __P((reg_type_t)); +static int __env_print_all __P((DB_ENV *, u_int32_t)); +static int __env_print_stats __P((DB_ENV *, u_int32_t)); +static int __env_print_threads __P((DB_ENV *)); +static int __env_stat_print __P((DB_ENV *, u_int32_t)); +static char *__env_thread_state_print __P((DB_THREAD_STATE)); +static const char * + __reg_type __P((reg_type_t)); /* - * __dbenv_stat_print_pp -- + * __env_stat_print_pp -- * DB_ENV->stat_print pre/post processor. * - * PUBLIC: int __dbenv_stat_print_pp __P((DB_ENV *, u_int32_t)); + * PUBLIC: int __env_stat_print_pp __P((DB_ENV *, u_int32_t)); */ int -__dbenv_stat_print_pp(dbenv, flags) +__env_stat_print_pp(dbenv, flags) DB_ENV *dbenv; u_int32_t flags; { - int rep_check, ret; + DB_THREAD_INFO *ip; + int ret; PANIC_CHECK(dbenv); ENV_ILLEGAL_BEFORE_OPEN(dbenv, "DB_ENV->stat_print"); @@ -48,32 +47,36 @@ __dbenv_stat_print_pp(dbenv, flags) flags, DB_STAT_ALL | DB_STAT_CLEAR | DB_STAT_SUBSYSTEM)) != 0) return (ret); - rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0; - if (rep_check) - __env_rep_enter(dbenv); - ret = __dbenv_stat_print(dbenv, flags); - if (rep_check) - __env_db_rep_exit(dbenv); + ENV_ENTER(dbenv, ip); + REPLICATION_WRAP(dbenv, (__env_stat_print(dbenv, flags)), ret); + ENV_LEAVE(dbenv, ip); return (ret); } /* - * __dbenv_stat_print -- + * __env_stat_print -- * DB_ENV->stat_print method. */ static int -__dbenv_stat_print(dbenv, flags) +__env_stat_print(dbenv, flags) DB_ENV *dbenv; u_int32_t flags; { - DB *dbp; + time_t now; int ret; + char time_buf[CTIME_BUFLEN]; + + (void)time(&now); + __db_msg(dbenv, "%.24s\tLocal time", __db_ctime(&now, time_buf)); - if ((ret = __dbenv_print_stats(dbenv, flags)) != 0) + if ((ret = __env_print_stats(dbenv, flags)) != 0) return (ret); if (LF_ISSET(DB_STAT_ALL) && - (ret = __dbenv_print_all(dbenv, flags)) != 0) + (ret = __env_print_all(dbenv, flags)) != 0) + return (ret); + + if ((ret = __env_print_threads(dbenv)) != 0) return (ret); if (!LF_ISSET(DB_STAT_SUBSYSTEM)) @@ -86,6 +89,10 @@ __dbenv_stat_print(dbenv, flags) __db_msg(dbenv, "%s", DB_GLOBAL(db_line)); if ((ret = __log_stat_print(dbenv, flags)) != 0) return (ret); + + __db_msg(dbenv, "%s", DB_GLOBAL(db_line)); + if ((ret = __dbreg_stat_print(dbenv, flags)) != 0) + return (ret); } if (LOCKING_ON(dbenv)) { @@ -112,33 +119,28 @@ __dbenv_stat_print(dbenv, flags) return (ret); } - MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp); - for (dbp = LIST_FIRST(&dbenv->dblist); - dbp != NULL; dbp = LIST_NEXT(dbp, dblistlinks)) { + if (MUTEX_ON(dbenv)) { __db_msg(dbenv, "%s", DB_GLOBAL(db_line)); - __db_msg(dbenv, "%s%s%s\tDatabase name", - dbp->fname, dbp->dname == NULL ? "" : "/", - dbp->dname == NULL ? "" : dbp->dname); - if ((ret = __db_stat_print(dbp, flags)) != 0) - break; + if ((ret = __mutex_stat_print(dbenv, flags)) != 0) + return (ret); } - MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp); - return (ret); + return (0); } /* - * __dbenv_print_stats -- + * __env_print_stats -- * Display the default environment statistics. * */ static int -__dbenv_print_stats(dbenv, flags) +__env_print_stats(dbenv, flags) DB_ENV *dbenv; u_int32_t flags; { REGENV *renv; REGINFO *infop; + char time_buf[CTIME_BUFLEN]; infop = dbenv->reginfo; renv = infop->primary; @@ -147,24 +149,27 @@ __dbenv_print_stats(dbenv, flags) __db_msg(dbenv, "%s", DB_GLOBAL(db_line)); __db_msg(dbenv, "Default database environment information:"); } - __db_msg(dbenv, "%d.%d.%d\tEnvironment version", - renv->majver, renv->minver, renv->patch); STAT_HEX("Magic number", renv->magic); - STAT_LONG("Panic value", renv->envpanic); + STAT_LONG("Panic value", renv->panic); + __db_msg(dbenv, "%d.%d.%d\tEnvironment version", + renv->majver, renv->minver, renv->patchver); + __db_msg(dbenv, + "%.24s\tCreation time", __db_ctime(&renv->timestamp, time_buf)); + STAT_HEX("Environment ID", renv->envid); + __mutex_print_debug_single(dbenv, + "Primary region allocation and reference count mutex", + renv->mtx_regenv, flags); STAT_LONG("References", renv->refcnt); - __db_print_mutex(dbenv, NULL, &renv->mutex, - "The number of region locks that required waiting", flags); - return (0); } /* - * __dbenv_print_all -- + * __env_print_all -- * Display the debugging environment statistics. */ static int -__dbenv_print_all(dbenv, flags) +__env_print_all(dbenv, flags) DB_ENV *dbenv; u_int32_t flags; { @@ -176,6 +181,7 @@ __dbenv_print_all(dbenv, flags) { DB_ENV_DBLOCAL, "DB_ENV_DBLOCAL" }, { DB_ENV_DIRECT_DB, "DB_ENV_DIRECT_DB" }, { DB_ENV_DIRECT_LOG, "DB_ENV_DIRECT_LOG" }, + { DB_ENV_DSYNC_DB, "DB_ENV_DSYNC_DB" }, { DB_ENV_DSYNC_LOG, "DB_ENV_DSYNC_LOG" }, { DB_ENV_FATAL, "DB_ENV_FATAL" }, { DB_ENV_LOCKDOWN, "DB_ENV_LOCKDOWN" }, @@ -200,15 +206,13 @@ __dbenv_print_all(dbenv, flags) }; static const FN ofn[] = { { DB_CREATE, "DB_CREATE" }, - { DB_CXX_NO_EXCEPTIONS, "DB_CXX_NO_EXCEPTIONS" }, { DB_FORCE, "DB_FORCE" }, { DB_INIT_CDB, "DB_INIT_CDB" }, - { DB_INIT_LOCK, "DB_INIT_LOCK" }, + { DB_INIT_LOCK, "DB_INIT_LOCK" }, { DB_INIT_LOG, "DB_INIT_LOG" }, { DB_INIT_MPOOL, "DB_INIT_MPOOL" }, { DB_INIT_REP, "DB_INIT_REP" }, { DB_INIT_TXN, "DB_INIT_TXN" }, - { DB_JOINENV, "DB_JOINENV" }, { DB_LOCKDOWN, "DB_LOCKDOWN" }, { DB_NOMMAP, "DB_NOMMAP" }, { DB_PRIVATE, "DB_PRIVATE" }, @@ -226,51 +230,50 @@ __dbenv_print_all(dbenv, flags) static const FN vfn[] = { { DB_VERB_DEADLOCK, "DB_VERB_DEADLOCK" }, { DB_VERB_RECOVERY, "DB_VERB_RECOVERY" }, + { DB_VERB_REGISTER, "DB_VERB_REGISTER" }, { DB_VERB_REPLICATION, "DB_VERB_REPLICATION" }, { DB_VERB_WAITSFOR, "DB_VERB_WAITSFOR" }, { 0, NULL } }; + static const FN regenvfn[] = { + { DB_REGENV_REPLOCKED, "DB_REGENV_REPLOCKED" }, + { 0, NULL } + }; DB_MSGBUF mb; REGENV *renv; REGINFO *infop; - REGION *rp, regs[1024]; - size_t n; - char **p; + REGION *rp; + u_int32_t i; + char **p, time_buf[CTIME_BUFLEN]; infop = dbenv->reginfo; renv = infop->primary; DB_MSGBUF_INIT(&mb); - /* - * Lock the database environment while we get copies of the region - * information. - */ - MUTEX_LOCK(dbenv, &infop->rp->mutex); - - for (n = 0, rp = SH_LIST_FIRST(&renv->regionq, __db_region); - n < sizeof(regs) / sizeof(regs[0]) && rp != NULL; - ++n, rp = SH_LIST_NEXT(rp, q, __db_region)) { - regs[n] = *rp; - if (LF_ISSET(DB_STAT_CLEAR)) - MUTEX_CLEAR(&rp->mutex); - } - if (n > 0) - --n; - MUTEX_UNLOCK(dbenv, &infop->rp->mutex); + __db_msg(dbenv, "%s", DB_GLOBAL(db_line)); + __db_prflags(dbenv, + NULL, renv->init_flags, ofn, NULL, "\tInitialization flags"); + STAT_ULONG("Region slots", renv->region_cnt); + __db_prflags(dbenv, + NULL, renv->flags, regenvfn, NULL, "\tReplication flags"); + __db_msg(dbenv, "%.24s\tOperation timestamp", + renv->op_timestamp == 0 ? + "!Set" : __db_ctime(&renv->op_timestamp, time_buf)); + __db_msg(dbenv, "%.24s\tReplication timestamp", + renv->rep_timestamp == 0 ? + "!Set" : __db_ctime(&renv->rep_timestamp, time_buf)); - if (LF_ISSET(DB_STAT_ALL)) { - __db_msg(dbenv, "%s", DB_GLOBAL(db_line)); - __db_msg(dbenv, "Per region database environment information:"); - } - while (n > 0) { - rp = ®s[--n]; + __db_msg(dbenv, "%s", DB_GLOBAL(db_line)); + __db_msg(dbenv, "Per region database environment information:"); + for (rp = R_ADDR(infop, renv->region_off), + i = 0; i < renv->region_cnt; ++i, ++rp) { + if (rp->id == INVALID_REGION_ID) + continue; __db_msg(dbenv, "%s Region:", __reg_type(rp->type)); STAT_LONG("Region ID", rp->id); STAT_LONG("Segment ID", rp->segid); __db_dlbytes(dbenv, "Size", (u_long)0, (u_long)0, (u_long)rp->size); - __db_print_mutex(dbenv, NULL, &rp->mutex, - "The number of region locks that required waiting", flags); } __db_msg(dbenv, "%s", DB_GLOBAL(db_line)); @@ -278,6 +281,7 @@ __dbenv_print_all(dbenv, flags) STAT_ISSET("Errfile", dbenv->db_errfile); STAT_STRING("Errpfx", dbenv->db_errpfx); STAT_ISSET("Errcall", dbenv->db_errcall); + STAT_ISSET("Event", dbenv->db_event_func); STAT_ISSET("Feedback", dbenv->db_feedback); STAT_ISSET("Panic", dbenv->db_paniccall); STAT_ISSET("Malloc", dbenv->db_malloc); @@ -300,36 +304,80 @@ __dbenv_print_all(dbenv, flags) STAT_FMT("Mode", "%#o", int, dbenv->db_mode); __db_prflags(dbenv, NULL, dbenv->open_flags, ofn, NULL, "\tOpen flags"); STAT_ISSET("Lockfhp", dbenv->lockfhp); - STAT_ISSET("Rec tab", dbenv->recover_dtab); - STAT_ULONG("Rec tab slots", dbenv->recover_dtab_size); + STAT_ISSET("Recovery table", dbenv->recover_dtab); + STAT_ULONG("Number of recovery table slots", dbenv->recover_dtab_size); STAT_ISSET("RPC client", dbenv->cl_handle); STAT_LONG("RPC client ID", dbenv->cl_id); - STAT_LONG("DB ref count", dbenv->db_ref); - STAT_LONG("Shared mem key", dbenv->shm_key); - STAT_ULONG("test-and-set spin configuration", dbenv->tas_spins); - __db_print_mutex( - dbenv, NULL, dbenv->dblist_mutexp, "DB handle mutex", flags); + STAT_LONG("DB reference count", dbenv->db_ref); + STAT_LONG("Shared memory key", dbenv->shm_key); + __mutex_print_debug_single( + dbenv, "DB handle mutex", dbenv->mtx_dblist, flags); STAT_ISSET("api1 internal", dbenv->api1_internal); STAT_ISSET("api2 internal", dbenv->api2_internal); STAT_ISSET("password", dbenv->passwd); STAT_ISSET("crypto handle", dbenv->crypto_handle); - __db_print_mutex(dbenv, NULL, dbenv->mt_mutexp, "MT mutex", flags); + __mutex_print_debug_single(dbenv, "MT mutex", dbenv->mtx_mt, flags); __db_prflags(dbenv, NULL, dbenv->flags, fn, NULL, "\tFlags"); return (0); } +static char * +__env_thread_state_print(state) + DB_THREAD_STATE state; +{ + switch (state) { + case THREAD_ACTIVE: + return ("active"); + case THREAD_BLOCKED: + return ("blocked"); + case THREAD_OUT: + return ("out"); + default: + return ("unknown"); + } +} + +/* + * __env_print_threads -- + * Display the current active threads + * + */ +static int +__env_print_threads(dbenv) + DB_ENV *dbenv; +{ + DB_HASHTAB *htab; + DB_THREAD_INFO *ip; + u_int32_t i; + char buf[DB_THREADID_STRLEN]; + + htab = (DB_HASHTAB *)dbenv->thr_hashtab; + __db_msg(dbenv, "Thread status blocks:"); + for (i = 0; i < dbenv->thr_nbucket; i++) + SH_TAILQ_FOREACH(ip, &htab[i], dbth_links, __db_thread_info) { + if (ip->dbth_state == THREAD_SLOT_NOT_IN_USE) + continue; + __db_msg(dbenv, "\tprocess/thread %s: %s", + dbenv->thread_id_string( + dbenv, ip->dbth_pid, ip->dbth_tid, buf), + __env_thread_state_print(ip->dbth_state)); + } + return (0); +} + /* * __db_print_fh -- * Print out a file handle. * - * PUBLIC: void __db_print_fh __P((DB_ENV *, DB_FH *, u_int32_t)); + * PUBLIC: void __db_print_fh __P((DB_ENV *, const char *, DB_FH *, u_int32_t)); */ void -__db_print_fh(dbenv, fh, flags) +__db_print_fh(dbenv, tag, fh, flags) DB_ENV *dbenv; + const char *tag; DB_FH *fh; u_int32_t flags; { @@ -340,7 +388,13 @@ __db_print_fh(dbenv, fh, flags) { 0, NULL } }; - __db_print_mutex(dbenv, NULL, fh->mutexp, "file-handle.mutex", flags); + if (fh == NULL) { + STAT_ISSET(tag, fh); + return; + } + + __mutex_print_debug_single( + dbenv, "file-handle.mutex", fh->mtx_fh, flags); STAT_LONG("file-handle.reference count", fh->ref); STAT_LONG("file-handle.file descriptor", fh->fd); @@ -368,6 +422,11 @@ __db_print_fileid(dbenv, id, suffix) DB_MSGBUF mb; int i; + if (id == NULL) { + STAT_ISSET("ID", id); + return; + } + DB_MSGBUF_INIT(&mb); for (i = 0; i < DB_FILE_ID_LEN; ++i, ++id) { __db_msgadd(dbenv, &mb, "%x", (u_int)*id); @@ -380,95 +439,6 @@ __db_print_fileid(dbenv, id, suffix) } /* - * __db_print_mutex -- - * Print out mutex statistics. - * - * PUBLIC: void __db_print_mutex - * PUBLIC: __P((DB_ENV *, DB_MSGBUF *, DB_MUTEX *, const char *, u_int32_t)); - */ -void -__db_print_mutex(dbenv, mbp, mutex, suffix, flags) - DB_ENV *dbenv; - DB_MSGBUF *mbp; - DB_MUTEX *mutex; - const char *suffix; - u_int32_t flags; -{ - DB_MSGBUF mb; - u_long value; - int standalone; - - /* If we don't have a mutex, point that out and return. */ - if (mutex == NULL) { - STAT_ISSET(suffix, mutex); - return; - } - - if (mbp == NULL) { - DB_MSGBUF_INIT(&mb); - mbp = &mb; - standalone = 1; - } else - standalone = 0; - - /* - * !!! - * We may not hold the mutex lock -- that's OK, we're only reading - * the statistics. - */ - if ((value = mutex->mutex_set_wait) < 10000000) - __db_msgadd(dbenv, mbp, "%lu", value); - else - __db_msgadd(dbenv, mbp, "%luM", value / 1000000); - - /* - * If standalone, append the mutex percent and the locker information - * after the suffix line. Otherwise, append it after the counter. - * - * The setting of "suffix" tracks "standalone" -- if standalone, expect - * a suffix and prefix it with a <tab>, otherwise, it's optional. This - * isn't a design, it's just the semantics we happen to need right now. - */ - if (standalone) { - if (suffix == NULL) /* Defense. */ - suffix = ""; - - __db_msgadd(dbenv, &mb, "\t%s (%d%%", suffix, - DB_PCT(mutex->mutex_set_wait, - mutex->mutex_set_wait + mutex->mutex_set_nowait)); -#ifdef DIAGNOSTIC -#ifdef HAVE_MUTEX_THREADS - if (mutex->locked != 0) - __db_msgadd(dbenv, &mb, "/%lu", (u_long)mutex->locked); -#else - if (mutex->pid != 0) - __db_msgadd(dbenv, &mb, "/%lu", (u_long)mutex->pid); -#endif -#endif - __db_msgadd(dbenv, &mb, ")"); - - DB_MSGBUF_FLUSH(dbenv, mbp); - } else { - __db_msgadd(dbenv, mbp, "/%d%%", DB_PCT(mutex->mutex_set_wait, - mutex->mutex_set_wait + mutex->mutex_set_nowait)); -#ifdef DIAGNOSTIC -#ifdef HAVE_MUTEX_THREADS - if (mutex->locked != 0) - __db_msgadd(dbenv, &mb, "/%lu", (u_long)mutex->locked); -#else - if (mutex->pid != 0) - __db_msgadd(dbenv, &mb, "/%lu", (u_long)mutex->pid); -#endif -#endif - if (suffix != NULL) - __db_msgadd(dbenv, mbp, "%s", suffix); - } - - if (LF_ISSET(DB_STAT_CLEAR)) - MUTEX_CLEAR(mutex); -} - -/* * __db_dl -- * Display a big value. * @@ -510,12 +480,13 @@ __db_dl_pct(dbenv, msg, value, pct, tag) /* * Two formats: if less than 10 million, display as the number, if - * greater than 10 million display as ###M. + * greater than 10 million, round it off and display as ###M. */ if (value < 10000000) __db_msgadd(dbenv, &mb, "%lu\t%s", value, msg); else - __db_msgadd(dbenv, &mb, "%luM\t%s", value / 1000000, msg); + __db_msgadd(dbenv, + &mb, "%luM\t%s", (value + 500000) / 1000000, msg); if (tag == NULL) __db_msgadd(dbenv, &mb, " (%d%%)", pct); else @@ -602,11 +573,11 @@ __db_print_reginfo(dbenv, infop, s) STAT_STRING("Region type", __reg_type(infop->type)); STAT_ULONG("Region ID", infop->id); STAT_STRING("Region name", infop->name); - STAT_HEX("Original region address", infop->addr_orig); - STAT_HEX("Region address", infop->addr); - STAT_HEX("Region primary address", infop->primary); + STAT_POINTER("Original region address", infop->addr_orig); + STAT_POINTER("Region address", infop->addr); + STAT_POINTER("Region primary address", infop->primary); STAT_ULONG("Region maximum allocation", infop->max_alloc); - STAT_ULONG("Region allocated", infop->max_alloc); + STAT_ULONG("Region allocated", infop->allocated); __db_prflags(dbenv, NULL, infop->flags, fn, NULL, "\tRegion flags"); } @@ -650,12 +621,12 @@ int __db_stat_not_built(dbenv) DB_ENV *dbenv; { - __db_err(dbenv, "Library build did not include statistics support"); + __db_errx(dbenv, "Library build did not include statistics support"); return (DB_OPNOTSUP); } int -__dbenv_stat_print_pp(dbenv, flags) +__env_stat_print_pp(dbenv, flags) DB_ENV *dbenv; u_int32_t flags; { |