diff options
author | jbj <devnull@localhost> | 2003-12-15 21:42:09 +0000 |
---|---|---|
committer | jbj <devnull@localhost> | 2003-12-15 21:42:09 +0000 |
commit | 8960e3895f7af91126465368dff8fbb36ab4e853 (patch) | |
tree | 3c515e39dde0e88edeb806ea87d08524ba25c761 /db/env | |
parent | 752cac72e220dcad4e6fce39508e714e59e3e0a1 (diff) | |
download | librpm-tizen-8960e3895f7af91126465368dff8fbb36ab4e853.tar.gz librpm-tizen-8960e3895f7af91126465368dff8fbb36ab4e853.tar.bz2 librpm-tizen-8960e3895f7af91126465368dff8fbb36ab4e853.zip |
- upgrade to db-4.2.52.
CVS patchset: 6972
CVS date: 2003/12/15 21:42:09
Diffstat (limited to 'db/env')
-rw-r--r-- | db/env/db_salloc.c | 6 | ||||
-rw-r--r-- | db/env/db_shash.c | 9 | ||||
-rw-r--r-- | db/env/env_file.c | 24 | ||||
-rw-r--r-- | db/env/env_method.c | 742 | ||||
-rw-r--r-- | db/env/env_open.c | 998 | ||||
-rw-r--r-- | db/env/env_recover.c | 754 | ||||
-rw-r--r-- | db/env/env_region.c | 412 |
7 files changed, 2030 insertions, 915 deletions
diff --git a/db/env/db_salloc.c b/db/env/db_salloc.c index 0961ac420..3fe9adaa7 100644 --- a/db/env/db_salloc.c +++ b/db/env/db_salloc.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996-2002 + * Copyright (c) 1996-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "Id: db_salloc.c,v 11.16 2002/08/24 20:27:25 bostic Exp "; +static const char revid[] = "$Id: db_salloc.c,v 11.17 2003/01/08 04:42:01 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -60,7 +60,7 @@ __db_shalloc_init(area, size) /* * __db_shalloc_size -- - * Return size of the shared region, including alignment. + * Return the space needed for an allocation, including alignment. * * PUBLIC: int __db_shalloc_size __P((size_t, size_t)); */ diff --git a/db/env/db_shash.c b/db/env/db_shash.c index 1c33b3830..6c8e2dc42 100644 --- a/db/env/db_shash.c +++ b/db/env/db_shash.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: db_shash.c,v 11.3 2000/02/14 02:59:49 bostic Exp $"; +static const char revid[] = "$Id: db_shash.c,v 11.7 2003/01/08 04:42:06 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -30,6 +30,7 @@ static const struct { u_int32_t power; u_int32_t prime; } list[] = { + { 32, 37}, /* 2^5 */ { 64, 67}, /* 2^6 */ { 128, 131}, /* 2^7 */ { 256, 257}, /* 2^8 */ @@ -89,8 +90,8 @@ __db_tablesize(n_buckets) * * Ref: Sedgewick, Algorithms in C, "Hash Functions" */ - if (n_buckets < 64) - n_buckets = 64; + if (n_buckets < 32) + n_buckets = 32; for (i = 0;; ++i) { if (list[i].power == 0) { diff --git a/db/env/env_file.c b/db/env/env_file.c index c7b0fd2e0..6bcfad72b 100644 --- a/db/env/env_file.c +++ b/db/env/env_file.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2002 + * Copyright (c) 2002-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "Id: env_file.c,v 1.5 2002/03/08 17:47:18 sue Exp "; +static const char revid[] = "$Id: env_file.c,v 1.8 2003/05/24 14:57:52 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -20,7 +20,7 @@ static const char revid[] = "Id: env_file.c,v 1.5 2002/03/08 17:47:18 sue Exp "; #include "db_int.h" static int __db_overwrite_pass __P((DB_ENV *, - const char *, DB_FH *, u_int32_t, u_int32_t, u_int32_t)); + const char *, DB_FH *, u_int32_t, u_int32_t, int)); /* * __db_fileinit -- @@ -98,12 +98,11 @@ __db_overwrite(dbenv, path) DB_ENV *dbenv; const char *path; { - DB_FH fh, *fhp; + DB_FH *fhp; u_int32_t mbytes, bytes; int ret; - fhp = &fh; - if ((ret = __os_open(dbenv, path, DB_OSO_REGION, 0, fhp)) == 0 && + if ((ret = __os_open(dbenv, path, DB_OSO_REGION, 0, &fhp)) == 0 && (ret = __os_ioinfo(dbenv, path, fhp, &mbytes, &bytes, NULL)) == 0) { /* * !!! @@ -112,19 +111,19 @@ __db_overwrite(dbenv, path) * or logging filesystems will require operating system support. */ if ((ret = __db_overwrite_pass( - dbenv, path, fhp, mbytes, bytes, 0xff)) != 0) + dbenv, path, fhp, mbytes, bytes, 255)) != 0) goto err; if ((ret = __db_overwrite_pass( - dbenv, path, fhp, mbytes, bytes, 0x00)) != 0) + dbenv, path, fhp, mbytes, bytes, 0)) != 0) goto err; if ((ret = __db_overwrite_pass( - dbenv, path, fhp, mbytes, bytes, 0xff)) != 0) + dbenv, path, fhp, mbytes, bytes, 255)) != 0) goto err; } else __db_err(dbenv, "%s: %s", path, db_strerror(ret)); -err: if (F_ISSET(fhp, DB_FH_VALID)) - __os_closehandle(dbenv, fhp); +err: if (fhp != NULL) + (void)__os_closehandle(dbenv, fhp); return (ret); } @@ -137,7 +136,8 @@ __db_overwrite_pass(dbenv, path, fhp, mbytes, bytes, pattern) DB_ENV *dbenv; const char *path; DB_FH *fhp; - u_int32_t mbytes, bytes, pattern; + int pattern; + u_int32_t mbytes, bytes; { size_t len, nw; int i, ret; diff --git a/db/env/env_method.c b/db/env/env_method.c index c5f45df71..ac0136920 100644 --- a/db/env/env_method.c +++ b/db/env/env_method.c @@ -1,24 +1,24 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1999, 2000 + * Copyright (c) 1999-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: env_method.c,v 11.31 2000/11/30 00:58:35 ubell Exp $"; +static const char revid[] = "$Id: env_method.c,v 11.113 2003/09/11 17:36:41 sue Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <string.h> +#ifdef HAVE_RPC +#include <rpc/rpc.h> #endif -#ifdef HAVE_RPC -#include "db_server.h" +#include <string.h> #endif /* @@ -29,39 +29,43 @@ static const char revid[] = "$Id: env_method.c,v 11.31 2000/11/30 00:58:35 ubell #define DB_INITIALIZE_DB_GLOBALS 1 #include "db_int.h" -#include "db_shash.h" -#include "db_page.h" -#include "db_am.h" -#include "lock.h" -#include "log.h" -#include "mp.h" -#include "txn.h" +#include "dbinc/crypto.h" +#include "dbinc/hmac.h" +#include "dbinc/db_shash.h" +#include "dbinc/db_page.h" +#include "dbinc/db_am.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" #ifdef HAVE_RPC -#include "gen_client_ext.h" -#include "rpc_client_ext.h" +#include "dbinc_auto/db_server.h" +#include "dbinc_auto/rpc_client_ext.h" #endif +static int __dbenv_init __P((DB_ENV *)); static void __dbenv_err __P((const DB_ENV *, int, const char *, ...)); static void __dbenv_errx __P((const DB_ENV *, const char *, ...)); -static int __dbenv_set_data_dir __P((DB_ENV *, const char *)); -static void __dbenv_set_errcall __P((DB_ENV *, void (*)(const char *, char *))); -static void __dbenv_set_errfile __P((DB_ENV *, FILE *)); -static void __dbenv_set_errpfx __P((DB_ENV *, const char *)); +static int __dbenv_get_home __P((DB_ENV *, const char **)); +static int __dbenv_set_app_dispatch __P((DB_ENV *, + int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops))); +static int __dbenv_get_data_dirs __P((DB_ENV *, const char ***)); static int __dbenv_set_feedback __P((DB_ENV *, void (*)(DB_ENV *, int, int))); -static int __dbenv_set_flags __P((DB_ENV *, u_int32_t, int)); -static int __dbenv_set_mutexlocks __P((DB_ENV *, int)); -static int __dbenv_set_paniccall __P((DB_ENV *, void (*)(DB_ENV *, int))); -static int __dbenv_set_recovery_init __P((DB_ENV *, int (*)(DB_ENV *))); -static int __dbenv_set_server_noclnt - __P((DB_ENV *, char *, long, long, u_int32_t)); -static int __dbenv_set_shm_key __P((DB_ENV *, long)); -static int __dbenv_set_tmp_dir __P((DB_ENV *, const char *)); -static int __dbenv_set_verbose __P((DB_ENV *, u_int32_t, int)); +static void __dbenv_map_flags __P((DB_ENV *, u_int32_t *, u_int32_t *)); +static int __dbenv_get_flags __P((DB_ENV *, u_int32_t *)); +static int __dbenv_set_rpc_server_noclnt + __P((DB_ENV *, void *, const char *, long, long, u_int32_t)); +static int __dbenv_get_shm_key __P((DB_ENV *, long *)); +static int __dbenv_get_tas_spins __P((DB_ENV *, u_int32_t *)); +static int __dbenv_get_tmp_dir __P((DB_ENV *, const char **)); +static int __dbenv_get_verbose __P((DB_ENV *, u_int32_t, int *)); /* * db_env_create -- * DB_ENV constructor. + * + * EXTERN: int db_env_create __P((DB_ENV **, u_int32_t)); */ int db_env_create(dbenvpp, flags) @@ -73,23 +77,25 @@ db_env_create(dbenvpp, flags) /* * !!! + * Our caller has not yet had the opportunity to reset the panic + * state or turn off mutex locking, and so we can neither check + * the panic state or acquire a mutex in the DB_ENV create path. + * + * !!! * We can't call the flags-checking routines, we don't have an * environment yet. */ - if (flags != 0 && flags != DB_CLIENT) + if (flags != 0 && !LF_ISSET(DB_RPCCLIENT)) return (EINVAL); - if ((ret = __os_calloc(NULL, 1, sizeof(*dbenv), &dbenv)) != 0) return (ret); #ifdef HAVE_RPC - if (LF_ISSET(DB_CLIENT)) + if (LF_ISSET(DB_RPCCLIENT)) F_SET(dbenv, DB_ENV_RPCCLIENT); #endif - ret = __dbenv_init(dbenv); - - if (ret != 0) { - __os_free(dbenv, sizeof(*dbenv)); + if ((ret = __dbenv_init(dbenv)) != 0) { + __os_free(NULL, dbenv); return (ret); } @@ -100,61 +106,100 @@ db_env_create(dbenvpp, flags) /* * __dbenv_init -- * Initialize a DB_ENV structure. - * - * PUBLIC: int __dbenv_init __P((DB_ENV *)); */ -int +static int __dbenv_init(dbenv) DB_ENV *dbenv; { + int ret; + /* + * !!! + * Our caller has not yet had the opportunity to reset the panic + * state or turn off mutex locking, and so we can neither check + * the panic state or acquire a mutex in the DB_ENV create path. + * * Set up methods that are the same in both normal and RPC */ dbenv->err = __dbenv_err; dbenv->errx = __dbenv_errx; dbenv->set_errcall = __dbenv_set_errcall; + dbenv->get_errfile = __dbenv_get_errfile; dbenv->set_errfile = __dbenv_set_errfile; + dbenv->get_errpfx = __dbenv_get_errpfx; dbenv->set_errpfx = __dbenv_set_errpfx; #ifdef HAVE_RPC if (F_ISSET(dbenv, DB_ENV_RPCCLIENT)) { - dbenv->close = __dbcl_env_close; - dbenv->open = __dbcl_env_open; + dbenv->close = __dbcl_env_close_wrap; + dbenv->dbremove = __dbcl_env_dbremove; + dbenv->dbrename = __dbcl_env_dbrename; + dbenv->get_home = __dbcl_env_get_home; + dbenv->get_open_flags = __dbcl_env_get_open_flags; + dbenv->open = __dbcl_env_open_wrap; dbenv->remove = __dbcl_env_remove; + dbenv->set_alloc = __dbcl_env_alloc; + dbenv->set_app_dispatch = __dbcl_set_app_dispatch; + dbenv->get_data_dirs = __dbcl_get_data_dirs; dbenv->set_data_dir = __dbcl_set_data_dir; + dbenv->get_encrypt_flags = __dbcl_env_get_encrypt_flags; + dbenv->set_encrypt = __dbcl_env_encrypt; dbenv->set_feedback = __dbcl_env_set_feedback; + dbenv->get_flags = __dbcl_env_get_flags; dbenv->set_flags = __dbcl_env_flags; - dbenv->set_mutexlocks = __dbcl_set_mutex_locks; dbenv->set_paniccall = __dbcl_env_paniccall; - dbenv->set_recovery_init = __dbcl_set_recovery_init; - dbenv->set_server = __dbcl_envserver; + dbenv->set_rpc_server = __dbcl_envrpcserver; + dbenv->get_shm_key = __dbcl_get_shm_key; dbenv->set_shm_key = __dbcl_set_shm_key; + dbenv->get_tas_spins = __dbcl_get_tas_spins; + dbenv->set_tas_spins = __dbcl_set_tas_spins; + dbenv->get_timeout = __dbcl_get_timeout; + dbenv->set_timeout = __dbcl_set_timeout; + dbenv->get_tmp_dir = __dbcl_get_tmp_dir; dbenv->set_tmp_dir = __dbcl_set_tmp_dir; + dbenv->get_verbose = __dbcl_get_verbose; dbenv->set_verbose = __dbcl_set_verbose; } else { #endif - dbenv->close = __dbenv_close; + dbenv->close = __dbenv_close_pp; + dbenv->dbremove = __dbenv_dbremove_pp; + dbenv->dbrename = __dbenv_dbrename_pp; dbenv->open = __dbenv_open; dbenv->remove = __dbenv_remove; + dbenv->get_home = __dbenv_get_home; + dbenv->get_open_flags = __dbenv_get_open_flags; + dbenv->set_alloc = __dbenv_set_alloc; + dbenv->set_app_dispatch = __dbenv_set_app_dispatch; + dbenv->get_data_dirs = __dbenv_get_data_dirs; dbenv->set_data_dir = __dbenv_set_data_dir; + dbenv->get_encrypt_flags = __dbenv_get_encrypt_flags; + dbenv->set_encrypt = __dbenv_set_encrypt; dbenv->set_feedback = __dbenv_set_feedback; + dbenv->get_flags = __dbenv_get_flags; dbenv->set_flags = __dbenv_set_flags; - dbenv->set_mutexlocks = __dbenv_set_mutexlocks; dbenv->set_paniccall = __dbenv_set_paniccall; - dbenv->set_recovery_init = __dbenv_set_recovery_init; - dbenv->set_server = __dbenv_set_server_noclnt; + dbenv->set_rpc_server = __dbenv_set_rpc_server_noclnt; + dbenv->get_shm_key = __dbenv_get_shm_key; dbenv->set_shm_key = __dbenv_set_shm_key; + dbenv->get_tas_spins = __dbenv_get_tas_spins; + dbenv->set_tas_spins = __dbenv_set_tas_spins; + dbenv->get_tmp_dir = __dbenv_get_tmp_dir; dbenv->set_tmp_dir = __dbenv_set_tmp_dir; + dbenv->get_verbose = __dbenv_get_verbose; dbenv->set_verbose = __dbenv_set_verbose; #ifdef HAVE_RPC } #endif dbenv->shm_key = INVALID_REGION_SEGID; - dbenv->db_mutexlocks = 1; + dbenv->db_ref = 0; + + __os_spin(dbenv); __log_dbenv_create(dbenv); /* Subsystem specific. */ __lock_dbenv_create(dbenv); __memp_dbenv_create(dbenv); + if ((ret = __rep_dbenv_create(dbenv)) != 0) + return (ret); __txn_dbenv_create(dbenv); return (0); @@ -165,7 +210,7 @@ __dbenv_init(dbenv) * Error message, including the standard error string. */ static void -#ifdef __STDC__ +#ifdef STDC_HEADERS __dbenv_err(const DB_ENV *dbenv, int error, const char *fmt, ...) #else __dbenv_err(dbenv, error, fmt, va_alist) @@ -175,16 +220,7 @@ __dbenv_err(dbenv, error, fmt, va_alist) va_dcl #endif { - va_list ap; - -#ifdef __STDC__ - va_start(ap, fmt); -#else - va_start(ap); -#endif - __db_real_err(dbenv, error, 1, 1, fmt, ap); - - va_end(ap); + DB_REAL_ERR(dbenv, error, 1, 1, fmt); } /* @@ -192,7 +228,7 @@ __dbenv_err(dbenv, error, fmt, va_alist) * Error message. */ static void -#ifdef __STDC__ +#ifdef STDC_HEADERS __dbenv_errx(const DB_ENV *dbenv, const char *fmt, ...) #else __dbenv_errx(dbenv, fmt, va_alist) @@ -201,76 +237,397 @@ __dbenv_errx(dbenv, fmt, va_alist) va_dcl #endif { - va_list ap; + DB_REAL_ERR(dbenv, 0, 0, 1, fmt); +} + +static int +__dbenv_get_home(dbenv, homep) + DB_ENV *dbenv; + const char **homep; +{ + ENV_ILLEGAL_BEFORE_OPEN(dbenv, "DB_ENV->get_home"); + *homep = dbenv->db_home; + return (0); +} + +/* + * __dbenv_set_alloc -- + * {DB_ENV,DB}->set_alloc. + * + * PUBLIC: int __dbenv_set_alloc __P((DB_ENV *, void *(*)(size_t), + * PUBLIC: void *(*)(void *, size_t), void (*)(void *))); + */ +int +__dbenv_set_alloc(dbenv, mal_func, real_func, free_func) + DB_ENV *dbenv; + void *(*mal_func) __P((size_t)); + void *(*real_func) __P((void *, size_t)); + void (*free_func) __P((void *)); +{ + ENV_ILLEGAL_AFTER_OPEN(dbenv, "DB_ENV->set_alloc"); + + dbenv->db_malloc = mal_func; + dbenv->db_realloc = real_func; + dbenv->db_free = free_func; + return (0); +} + +/* + * __dbenv_set_app_dispatch -- + * Set the transaction abort recover function. + */ +static int +__dbenv_set_app_dispatch(dbenv, app_dispatch) + DB_ENV *dbenv; + int (*app_dispatch) __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); +{ + ENV_ILLEGAL_AFTER_OPEN(dbenv, "DB_ENV->set_app_dispatch"); + + dbenv->app_dispatch = app_dispatch; + return (0); +} + +/* + * __dbenv_get_encrypt_flags -- + * {DB_ENV,DB}->get_encrypt_flags. + * + * PUBLIC: int __dbenv_get_encrypt_flags __P((DB_ENV *, u_int32_t *)); + */ +int +__dbenv_get_encrypt_flags(dbenv, flagsp) + DB_ENV *dbenv; + u_int32_t *flagsp; +{ +#ifdef HAVE_CRYPTO + DB_CIPHER *db_cipher; + + db_cipher = dbenv->crypto_handle; + if (db_cipher != NULL && db_cipher->alg == CIPHER_AES) + *flagsp = DB_ENCRYPT_AES; + else + *flagsp = 0; + return (0); +#else + COMPQUIET(flagsp, 0); + __db_err(dbenv, + "library build did not include support for cryptography"); + return (DB_OPNOTSUP); +#endif +} + +/* + * __dbenv_set_encrypt -- + * DB_ENV->set_encrypt. + * + * PUBLIC: int __dbenv_set_encrypt __P((DB_ENV *, const char *, u_int32_t)); + */ +int +__dbenv_set_encrypt(dbenv, passwd, flags) + DB_ENV *dbenv; + const char *passwd; + u_int32_t flags; +{ +#ifdef HAVE_CRYPTO + DB_CIPHER *db_cipher; + int ret; + + ENV_ILLEGAL_AFTER_OPEN(dbenv, "DB_ENV->set_encrypt"); +#define OK_CRYPTO_FLAGS (DB_ENCRYPT_AES) -#ifdef __STDC__ - va_start(ap, fmt); + if (flags != 0 && LF_ISSET(~OK_CRYPTO_FLAGS)) + return (__db_ferr(dbenv, "DB_ENV->set_encrypt", 0)); + + if (passwd == NULL || strlen(passwd) == 0) { + __db_err(dbenv, "Empty password specified to set_encrypt"); + return (EINVAL); + } + if (!CRYPTO_ON(dbenv)) { + if ((ret = __os_calloc(dbenv, 1, sizeof(DB_CIPHER), &db_cipher)) + != 0) + goto err; + dbenv->crypto_handle = db_cipher; + } else + db_cipher = (DB_CIPHER *)dbenv->crypto_handle; + + if (dbenv->passwd != NULL) + __os_free(dbenv, dbenv->passwd); + if ((ret = __os_strdup(dbenv, passwd, &dbenv->passwd)) != 0) { + __os_free(dbenv, db_cipher); + goto err; + } + /* + * We're going to need this often enough to keep around + */ + dbenv->passwd_len = strlen(dbenv->passwd) + 1; + /* + * The MAC key is for checksumming, and is separate from + * the algorithm. So initialize it here, even if they + * are using CIPHER_ANY. + */ + __db_derive_mac((u_int8_t *)dbenv->passwd, + dbenv->passwd_len, db_cipher->mac_key); + switch (flags) { + case 0: + F_SET(db_cipher, CIPHER_ANY); + break; + case DB_ENCRYPT_AES: + if ((ret = __crypto_algsetup(dbenv, db_cipher, CIPHER_AES, 0)) + != 0) + goto err1; + break; + default: /* Impossible. */ + break; + } + return (0); + +err1: + __os_free(dbenv, dbenv->passwd); + __os_free(dbenv, db_cipher); + dbenv->crypto_handle = NULL; +err: + return (ret); #else - va_start(ap); + COMPQUIET(passwd, NULL); + COMPQUIET(flags, 0); + + __db_err(dbenv, + "library build did not include support for cryptography"); + return (DB_OPNOTSUP); #endif - __db_real_err(dbenv, 0, 0, 1, fmt, ap); +} + +static void +__dbenv_map_flags(dbenv, inflagsp, outflagsp) + DB_ENV *dbenv; + u_int32_t *inflagsp, *outflagsp; +{ + COMPQUIET(dbenv, NULL); - va_end(ap); + if (FLD_ISSET(*inflagsp, DB_AUTO_COMMIT)) { + FLD_SET(*outflagsp, DB_ENV_AUTO_COMMIT); + FLD_CLR(*inflagsp, DB_AUTO_COMMIT); + } + if (FLD_ISSET(*inflagsp, DB_CDB_ALLDB)) { + FLD_SET(*outflagsp, DB_ENV_CDB_ALLDB); + FLD_CLR(*inflagsp, DB_CDB_ALLDB); + } + if (FLD_ISSET(*inflagsp, DB_DIRECT_DB)) { + FLD_SET(*outflagsp, DB_ENV_DIRECT_DB); + FLD_CLR(*inflagsp, DB_DIRECT_DB); + } + if (FLD_ISSET(*inflagsp, DB_DIRECT_LOG)) { + FLD_SET(*outflagsp, DB_ENV_DIRECT_LOG); + FLD_CLR(*inflagsp, DB_DIRECT_LOG); + } + if (FLD_ISSET(*inflagsp, DB_LOG_AUTOREMOVE)) { + FLD_SET(*outflagsp, DB_ENV_LOG_AUTOREMOVE); + FLD_CLR(*inflagsp, DB_LOG_AUTOREMOVE); + } + if (FLD_ISSET(*inflagsp, DB_NOLOCKING)) { + FLD_SET(*outflagsp, DB_ENV_NOLOCKING); + FLD_CLR(*inflagsp, DB_NOLOCKING); + } + if (FLD_ISSET(*inflagsp, DB_NOMMAP)) { + FLD_SET(*outflagsp, DB_ENV_NOMMAP); + FLD_CLR(*inflagsp, DB_NOMMAP); + } + if (FLD_ISSET(*inflagsp, DB_NOPANIC)) { + FLD_SET(*outflagsp, DB_ENV_NOPANIC); + FLD_CLR(*inflagsp, DB_NOPANIC); + } + if (FLD_ISSET(*inflagsp, DB_OVERWRITE)) { + FLD_SET(*outflagsp, DB_ENV_OVERWRITE); + FLD_CLR(*inflagsp, DB_OVERWRITE); + } + if (FLD_ISSET(*inflagsp, DB_REGION_INIT)) { + FLD_SET(*outflagsp, DB_ENV_REGION_INIT); + FLD_CLR(*inflagsp, DB_REGION_INIT); + } + if (FLD_ISSET(*inflagsp, DB_TIME_NOTGRANTED)) { + FLD_SET(*outflagsp, DB_ENV_TIME_NOTGRANTED); + FLD_CLR(*inflagsp, DB_TIME_NOTGRANTED); + } + if (FLD_ISSET(*inflagsp, DB_TXN_NOSYNC)) { + FLD_SET(*outflagsp, DB_ENV_TXN_NOSYNC); + FLD_CLR(*inflagsp, DB_TXN_NOSYNC); + } + if (FLD_ISSET(*inflagsp, DB_TXN_NOT_DURABLE)) { + FLD_SET(*outflagsp, DB_ENV_TXN_NOT_DURABLE); + FLD_CLR(*inflagsp, DB_TXN_NOT_DURABLE); + } + if (FLD_ISSET(*inflagsp, DB_TXN_WRITE_NOSYNC)) { + FLD_SET(*outflagsp, DB_ENV_TXN_WRITE_NOSYNC); + FLD_CLR(*inflagsp, DB_TXN_WRITE_NOSYNC); + } + if (FLD_ISSET(*inflagsp, DB_YIELDCPU)) { + FLD_SET(*outflagsp, DB_ENV_YIELDCPU); + FLD_CLR(*inflagsp, DB_YIELDCPU); + } } static int -__dbenv_set_flags(dbenv, flags, onoff) +__dbenv_get_flags(dbenv, flagsp) + DB_ENV *dbenv; + u_int32_t *flagsp; +{ + static const u_int32_t env_flags[] = { + DB_AUTO_COMMIT, + DB_CDB_ALLDB, + DB_DIRECT_DB, + DB_DIRECT_LOG, + DB_LOG_AUTOREMOVE, + DB_NOLOCKING, + DB_NOMMAP, + DB_NOPANIC, + DB_OVERWRITE, + DB_REGION_INIT, + DB_TIME_NOTGRANTED, + DB_TXN_NOSYNC, + DB_TXN_NOT_DURABLE, + DB_TXN_WRITE_NOSYNC, + DB_YIELDCPU, + 0 + }; + u_int32_t f, flags, mapped_flag; + int i; + + flags = 0; + for (i = 0; (f = env_flags[i]) != 0; i++) { + mapped_flag = 0; + __dbenv_map_flags(dbenv, &f, &mapped_flag); + DB_ASSERT(f == 0); + if (F_ISSET(dbenv, mapped_flag) == mapped_flag) + LF_SET(env_flags[i]); + } + + /* Special cases */ + if (dbenv->reginfo != NULL && + ((REGENV *)((REGINFO *)dbenv->reginfo)->primary)->envpanic != 0) { + LF_SET(DB_PANIC_ENVIRONMENT); + } + + *flagsp = flags; + return (0); +} + +/* + * __dbenv_set_flags -- + * DB_ENV->set_flags. + * + * PUBLIC: int __dbenv_set_flags __P((DB_ENV *, u_int32_t, int)); + */ +int +__dbenv_set_flags(dbenv, flags, on) DB_ENV *dbenv; u_int32_t flags; - int onoff; + int on; { -#define OK_FLAGS (DB_CDB_ALLDB | DB_NOMMAP | DB_TXN_NOSYNC) + u_int32_t mapped_flags; + int ret; - if (LF_ISSET(~OK_FLAGS)) - return (__db_ferr(dbenv, "DBENV->set_flags", 0)); +#define OK_FLAGS \ + (DB_AUTO_COMMIT | DB_CDB_ALLDB | DB_DIRECT_DB | DB_DIRECT_LOG | \ + DB_LOG_AUTOREMOVE | DB_NOLOCKING | DB_NOMMAP | DB_NOPANIC | \ + DB_OVERWRITE | DB_PANIC_ENVIRONMENT | DB_REGION_INIT | \ + DB_TIME_NOTGRANTED | DB_TXN_NOSYNC | DB_TXN_NOT_DURABLE | \ + DB_TXN_WRITE_NOSYNC | DB_YIELDCPU) - if (LF_ISSET(DB_CDB_ALLDB)) { - ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_flags: DB_CDB_ALLDB"); - if (onoff) - F_SET(dbenv, DB_ENV_CDB_ALLDB); - else - F_CLR(dbenv, DB_ENV_CDB_ALLDB); - } - if (LF_ISSET(DB_NOMMAP)) { - if (onoff) - F_SET(dbenv, DB_ENV_NOMMAP); - else - F_CLR(dbenv, DB_ENV_NOMMAP); + if (LF_ISSET(~OK_FLAGS)) + return (__db_ferr(dbenv, "DB_ENV->set_flags", 0)); + if (on) { + if ((ret = __db_fcchk(dbenv, "DB_ENV->set_flags", + flags, DB_TXN_NOSYNC, DB_TXN_NOT_DURABLE)) != 0) + return (ret); + if ((ret = __db_fcchk(dbenv, "DB_ENV->set_flags", + flags, DB_TXN_NOSYNC, DB_TXN_WRITE_NOSYNC)) != 0) + return (ret); + if ((ret = __db_fcchk(dbenv, "DB_ENV->set_flags", + flags, DB_TXN_NOT_DURABLE, DB_TXN_WRITE_NOSYNC)) != 0) + return (ret); + if (LF_ISSET(DB_DIRECT_DB | + DB_DIRECT_LOG) && __os_have_direct() == 0) { + __db_err(dbenv, + "DB_ENV->set_flags: direct I/O is not supported by this platform"); + return (EINVAL); + } } - if (LF_ISSET(DB_TXN_NOSYNC)) { - if (onoff) - F_SET(dbenv, DB_ENV_TXN_NOSYNC); - else - F_CLR(dbenv, DB_ENV_TXN_NOSYNC); + + if (LF_ISSET(DB_CDB_ALLDB)) + ENV_ILLEGAL_AFTER_OPEN(dbenv, + "DB_ENV->set_flags: DB_CDB_ALLDB"); + if (LF_ISSET(DB_PANIC_ENVIRONMENT)) { + ENV_ILLEGAL_BEFORE_OPEN(dbenv, + "DB_ENV->set_flags: DB_PANIC_ENVIRONMENT"); + PANIC_SET(dbenv, on); } + if (LF_ISSET(DB_REGION_INIT)) + ENV_ILLEGAL_AFTER_OPEN(dbenv, + "DB_ENV->set_flags: DB_REGION_INIT"); + + mapped_flags = 0; + __dbenv_map_flags(dbenv, &flags, &mapped_flags); + if (on) + F_SET(dbenv, mapped_flags); + else + F_CLR(dbenv, mapped_flags); return (0); } static int +__dbenv_get_data_dirs(dbenv, dirpp) + DB_ENV *dbenv; + const char ***dirpp; +{ + *dirpp = (const char **)dbenv->db_data_dir; + return (0); +} + +/* + * __dbenv_set_data_dir -- + * DB_ENV->set_dta_dir. + * + * PUBLIC: int __dbenv_set_data_dir __P((DB_ENV *, const char *)); + */ +int __dbenv_set_data_dir(dbenv, dir) DB_ENV *dbenv; const char *dir; { int ret; + /* + * The array is NULL-terminated so it can be returned by get_data_dirs + * without a length. + */ + #define DATA_INIT_CNT 20 /* Start with 20 data slots. */ if (dbenv->db_data_dir == NULL) { if ((ret = __os_calloc(dbenv, DATA_INIT_CNT, sizeof(char **), &dbenv->db_data_dir)) != 0) return (ret); dbenv->data_cnt = DATA_INIT_CNT; - } else if (dbenv->data_next == dbenv->data_cnt - 1) { + } else if (dbenv->data_next == dbenv->data_cnt - 2) { dbenv->data_cnt *= 2; if ((ret = __os_realloc(dbenv, - dbenv->data_cnt * sizeof(char **), - NULL, &dbenv->db_data_dir)) != 0) + (u_int)dbenv->data_cnt * sizeof(char **), + &dbenv->db_data_dir)) != 0) return (ret); } - return (__os_strdup(dbenv, - dir, &dbenv->db_data_dir[dbenv->data_next++])); + + ret = __os_strdup(dbenv, + dir, &dbenv->db_data_dir[dbenv->data_next++]); + dbenv->db_data_dir[dbenv->data_next] = NULL; + return (ret); } -static void +/* + * __dbenv_set_errcall -- + * {DB_ENV,DB}->set_errcall. + * + * PUBLIC: void __dbenv_set_errcall + * PUBLIC: __P((DB_ENV *, void (*)(const char *, char *))); + */ +void __dbenv_set_errcall(dbenv, errcall) DB_ENV *dbenv; void (*errcall) __P((const char *, char *)); @@ -278,7 +635,27 @@ __dbenv_set_errcall(dbenv, errcall) dbenv->db_errcall = errcall; } -static void +/* + * __dbenv_get_errfile -- + * {DB_ENV,DB}->get_errfile. + * + * PUBLIC: void __dbenv_get_errfile __P((DB_ENV *, FILE **)); + */ +void +__dbenv_get_errfile(dbenv, errfilep) + DB_ENV *dbenv; + FILE **errfilep; +{ + *errfilep = dbenv->db_errfile; +} + +/* + * __dbenv_set_errfile -- + * {DB_ENV,DB}->set_errfile. + * + * PUBLIC: void __dbenv_set_errfile __P((DB_ENV *, FILE *)); + */ +void __dbenv_set_errfile(dbenv, errfile) DB_ENV *dbenv; FILE *errfile; @@ -286,7 +663,27 @@ __dbenv_set_errfile(dbenv, errfile) dbenv->db_errfile = errfile; } -static void +/* + * __dbenv_get_errpfx -- + * {DB_ENV,DB}->get_errpfx. + * + * PUBLIC: void __dbenv_get_errpfx __P((DB_ENV *, const char **)); + */ +void +__dbenv_get_errpfx(dbenv, errpfxp) + DB_ENV *dbenv; + const char **errpfxp; +{ + *errpfxp = dbenv->db_errpfx; +} + +/* + * __dbenv_set_errpfx -- + * {DB_ENV,DB}->set_errpfx. + * + * PUBLIC: void __dbenv_set_errpfx __P((DB_ENV *, const char *)); + */ +void __dbenv_set_errpfx(dbenv, errpfx) DB_ENV *dbenv; const char *errpfx; @@ -303,69 +700,135 @@ __dbenv_set_feedback(dbenv, feedback) return (0); } -static int -__dbenv_set_mutexlocks(dbenv, onoff) +/* + * __dbenv_set_paniccall -- + * {DB_ENV,DB}->set_paniccall. + * + * PUBLIC: int __dbenv_set_paniccall __P((DB_ENV *, void (*)(DB_ENV *, int))); + */ +int +__dbenv_set_paniccall(dbenv, paniccall) DB_ENV *dbenv; - int onoff; + void (*paniccall) __P((DB_ENV *, int)); { - dbenv->db_mutexlocks = onoff; + dbenv->db_paniccall = paniccall; return (0); } static int -__dbenv_set_paniccall(dbenv, paniccall) +__dbenv_get_shm_key(dbenv, shm_keyp) DB_ENV *dbenv; - void (*paniccall) __P((DB_ENV *, int)); + long *shm_keyp; /* !!!: really a key_t *. */ { - dbenv->db_paniccall = paniccall; + *shm_keyp = dbenv->shm_key; return (0); } -static int -__dbenv_set_recovery_init(dbenv, recovery_init) +/* + * __dbenv_set_shm_key -- + * DB_ENV->set_shm_key. + * + * PUBLIC: int __dbenv_set_shm_key __P((DB_ENV *, long)); + */ +int +__dbenv_set_shm_key(dbenv, shm_key) DB_ENV *dbenv; - int (*recovery_init) __P((DB_ENV *)); + long shm_key; /* !!!: really a key_t. */ { - ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_recovery_init"); - - dbenv->db_recovery_init = recovery_init; + ENV_ILLEGAL_AFTER_OPEN(dbenv, "DB_ENV->set_shm_key"); + dbenv->shm_key = shm_key; return (0); } static int -__dbenv_set_shm_key(dbenv, shm_key) +__dbenv_get_tas_spins(dbenv, tas_spinsp) DB_ENV *dbenv; - long shm_key; /* !!!: really a key_t. */ + u_int32_t *tas_spinsp; { - ENV_ILLEGAL_AFTER_OPEN(dbenv, "set_shm_key"); + *tas_spinsp = dbenv->tas_spins; + return (0); +} - dbenv->shm_key = shm_key; +/* + * __dbenv_set_tas_spins -- + * DB_ENV->set_tas_spins. + * + * PUBLIC: int __dbenv_set_tas_spins __P((DB_ENV *, u_int32_t)); + */ +int +__dbenv_set_tas_spins(dbenv, tas_spins) + DB_ENV *dbenv; + u_int32_t tas_spins; +{ + dbenv->tas_spins = tas_spins; return (0); } static int +__dbenv_get_tmp_dir(dbenv, dirp) + DB_ENV *dbenv; + const char **dirp; +{ + *dirp = dbenv->db_tmp_dir; + return (0); +} + +/* + * __dbenv_set_tmp_dir -- + * DB_ENV->set_tmp_dir. + * + * PUBLIC: int __dbenv_set_tmp_dir __P((DB_ENV *, const char *)); + */ +int __dbenv_set_tmp_dir(dbenv, dir) DB_ENV *dbenv; const char *dir; { if (dbenv->db_tmp_dir != NULL) - __os_freestr(dbenv->db_tmp_dir); + __os_free(dbenv, dbenv->db_tmp_dir); return (__os_strdup(dbenv, dir, &dbenv->db_tmp_dir)); } static int -__dbenv_set_verbose(dbenv, which, onoff) +__dbenv_get_verbose(dbenv, which, onoffp) DB_ENV *dbenv; u_int32_t which; - int onoff; + int *onoffp; { switch (which) { case DB_VERB_CHKPOINT: case DB_VERB_DEADLOCK: case DB_VERB_RECOVERY: + case DB_VERB_REPLICATION: case DB_VERB_WAITSFOR: - if (onoff) + *onoffp = FLD_ISSET(dbenv->verbose, which) ? 1 : 0; + break; + default: + return (EINVAL); + } + return (0); +} + +/* + * __dbenv_set_verbose -- + * DB_ENV->set_verbose. + * + * PUBLIC: int __dbenv_set_verbose __P((DB_ENV *, u_int32_t, int)); + */ +int +__dbenv_set_verbose(dbenv, which, on) + DB_ENV *dbenv; + u_int32_t which; + int on; +{ + switch (which) { + case DB_VERB_CHKPOINT: + case DB_VERB_DEADLOCK: + case DB_VERB_RECOVERY: + case DB_VERB_REPLICATION: + case DB_VERB_WAITSFOR: + if (on) FLD_SET(dbenv->verbose, which); else FLD_CLR(dbenv->verbose, which); @@ -387,7 +850,7 @@ __db_mi_env(dbenv, name) DB_ENV *dbenv; const char *name; { - __db_err(dbenv, "%s: method meaningless in shared environment", name); + __db_err(dbenv, "%s: method not permitted in shared environment", name); return (EINVAL); } @@ -403,59 +866,66 @@ __db_mi_open(dbenv, name, after) const char *name; int after; { - __db_err(dbenv, - "%s: method meaningless %s open", name, after ? "after" : "before"); + __db_err(dbenv, "%s: method not permitted %s handle's open method", + name, after ? "after" : "before"); return (EINVAL); } /* * __db_env_config -- - * Method or function called without subsystem being configured. + * Method or function called without required configuration. * - * PUBLIC: int __db_env_config __P((DB_ENV *, int)); + * PUBLIC: int __db_env_config __P((DB_ENV *, char *, u_int32_t)); */ int -__db_env_config(dbenv, subsystem) +__db_env_config(dbenv, i, flags) DB_ENV *dbenv; - int subsystem; + char *i; + u_int32_t flags; { - const char *name; + char *sub; - switch (subsystem) { + switch (flags) { case DB_INIT_LOCK: - name = "lock"; + sub = "locking"; break; case DB_INIT_LOG: - name = "log"; + sub = "logging"; break; case DB_INIT_MPOOL: - name = "mpool"; + sub = "memory pool"; + break; + case DB_INIT_REP: + sub = "replication"; break; case DB_INIT_TXN: - name = "txn"; + sub = "transaction"; break; default: - name = "unknown"; + sub = "<unspecified>"; break; } __db_err(dbenv, - "%s interface called with environment not configured for that subsystem", - name); + "%s interface requires an environment configured for the %s subsystem", + i, sub); return (EINVAL); } static int -__dbenv_set_server_noclnt(dbenv, host, tsec, ssec, flags) +__dbenv_set_rpc_server_noclnt(dbenv, cl, host, tsec, ssec, flags) DB_ENV *dbenv; - char *host; + void *cl; + const char *host; long tsec, ssec; u_int32_t flags; { COMPQUIET(host, NULL); + COMPQUIET(cl, NULL); COMPQUIET(tsec, 0); COMPQUIET(ssec, 0); COMPQUIET(flags, 0); - __db_err(dbenv, "set_server method meaningless in non-RPC enviroment"); - return (__db_eopnotsup(dbenv)); + __db_err(dbenv, + "set_rpc_server method not permitted in non-RPC environment"); + return (DB_OPNOTSUP); } diff --git a/db/env/env_open.c b/db/env/env_open.c index 2007b4266..d4d2313ed 100644 --- a/db/env/env_open.c +++ b/db/env/env_open.c @@ -1,14 +1,14 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: env_open.c,v 11.34 2000/12/21 19:20:00 bostic Exp $"; +static const char revid[] = "$Id: env_open.c,v 11.144 2003/09/13 18:39:34 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -17,30 +17,32 @@ static const char revid[] = "$Id: env_open.c,v 11.34 2000/12/21 19:20:00 bostic #include <ctype.h> #include <stdlib.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" -#include "db_page.h" -#include "db_shash.h" -#include "btree.h" -#include "hash.h" -#include "qam.h" -#include "lock.h" -#include "log.h" -#include "mp.h" -#include "txn.h" -#include "clib_ext.h" +#include "dbinc/crypto.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/btree.h" +#include "dbinc/hash.h" +#include "dbinc/fop.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/qam.h" +#include "dbinc/txn.h" -static int __dbenv_config __P((DB_ENV *, const char *, u_int32_t)); -static int __dbenv_refresh __P((DB_ENV *)); -static int __db_home __P((DB_ENV *, const char *, u_int32_t)); static int __db_parse __P((DB_ENV *, char *)); -static int __db_tmp_open __P((DB_ENV *, u_int32_t, char *, DB_FH *)); +static int __db_tmp_open __P((DB_ENV *, u_int32_t, char *, DB_FH **)); +static int __dbenv_config __P((DB_ENV *, const char *, u_int32_t)); +static int __dbenv_refresh __P((DB_ENV *, u_int32_t, int)); +static int __dbenv_remove_int __P((DB_ENV *, const char *, u_int32_t)); /* * db_version -- * Return version information. + * + * EXTERN: char *db_version __P((int *, int *, int *)); */ char * db_version(majverp, minverp, patchp) @@ -57,7 +59,7 @@ db_version(majverp, minverp, patchp) /* * __dbenv_open -- - * Initialize an environment. + * DB_ENV->open. * * PUBLIC: int __dbenv_open __P((DB_ENV *, const char *, u_int32_t, int)); */ @@ -68,45 +70,79 @@ __dbenv_open(dbenv, db_home, flags, mode) u_int32_t flags; int mode; { - DB_ENV *rm_dbenv; - int ret; - u_int32_t init_flags; + DB_MPOOL *dbmp; + u_int32_t init_flags, orig_flags; + int rep_check, ret; + + orig_flags = dbenv->flags; + rep_check = 0; #undef OKFLAGS #define OKFLAGS \ - DB_CREATE | DB_INIT_CDB | DB_INIT_LOCK | DB_INIT_LOG | \ - DB_INIT_MPOOL | DB_INIT_TXN | DB_JOINENV | DB_LOCKDOWN | \ - DB_PRIVATE | DB_RECOVER | DB_RECOVER_FATAL | DB_SYSTEM_MEM | \ - DB_THREAD | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT + (DB_CREATE | DB_INIT_CDB | DB_INIT_LOCK | DB_INIT_LOG | \ + DB_INIT_MPOOL | DB_INIT_REP | DB_INIT_TXN | DB_JOINENV | \ + DB_LOCKDOWN | DB_PRIVATE | DB_RECOVER | DB_RECOVER_FATAL | \ + DB_SYSTEM_MEM | DB_THREAD | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT) #undef OKFLAGS_CDB #define OKFLAGS_CDB \ - DB_CREATE | DB_INIT_CDB | DB_INIT_MPOOL | DB_LOCKDOWN | \ + (DB_CREATE | DB_INIT_CDB | DB_INIT_MPOOL | DB_LOCKDOWN | \ DB_PRIVATE | DB_SYSTEM_MEM | DB_THREAD | \ - DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT + DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT) /* * Flags saved in the init_flags field of the environment, representing - * flags to DBENV->set_flags and DBENV->open that need to be set. + * flags to DB_ENV->set_flags and DB_ENV->open that need to be set. */ #define DB_INITENV_CDB 0x0001 /* DB_INIT_CDB */ #define DB_INITENV_CDB_ALLDB 0x0002 /* DB_INIT_CDB_ALLDB */ #define DB_INITENV_LOCK 0x0004 /* DB_INIT_LOCK */ #define DB_INITENV_LOG 0x0008 /* DB_INIT_LOG */ #define DB_INITENV_MPOOL 0x0010 /* DB_INIT_MPOOL */ -#define DB_INITENV_TXN 0x0020 /* DB_INIT_TXN */ +#define DB_INITENV_REP 0x0020 /* DB_INIT_REP */ +#define DB_INITENV_TXN 0x0040 /* DB_INIT_TXN */ - if ((ret = __db_fchk(dbenv, "DBENV->open", flags, OKFLAGS)) != 0) + if ((ret = __db_fchk(dbenv, "DB_ENV->open", flags, OKFLAGS)) != 0) return (ret); if (LF_ISSET(DB_INIT_CDB) && - (ret = __db_fchk(dbenv, "DBENV->open", flags, OKFLAGS_CDB)) != 0) + (ret = __db_fchk(dbenv, "DB_ENV->open", flags, OKFLAGS_CDB)) != 0) return (ret); if ((ret = __db_fcchk(dbenv, - "DBENV->open", flags, DB_PRIVATE, DB_SYSTEM_MEM)) != 0) + "DB_ENV->open", flags, DB_PRIVATE, DB_SYSTEM_MEM)) != 0) return (ret); - if ((ret = __db_fcchk(dbenv, "DBENV->open", flags, DB_JOINENV, + if ((ret = __db_fcchk(dbenv, + "DB_ENV->open", flags, DB_RECOVER, DB_RECOVER_FATAL)) != 0) + return (ret); + if ((ret = __db_fcchk(dbenv, "DB_ENV->open", flags, DB_JOINENV, DB_CREATE | DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | - DB_INIT_TXN | DB_PRIVATE)) != 0) + DB_INIT_REP | DB_INIT_TXN | + DB_PRIVATE | DB_RECOVER | DB_RECOVER_FATAL)) != 0) return (ret); + if (LF_ISSET(DB_INIT_REP) && !LF_ISSET(DB_INIT_TXN)) { + __db_err(dbenv, "Replication must be used with transactions"); + return (EINVAL); + } + if (LF_ISSET(DB_INIT_REP) && !LF_ISSET(DB_INIT_LOCK)) { + __db_err(dbenv, "Replication must be used with locking"); + return (EINVAL); + } + if (F_ISSET(dbenv, DB_ENV_TXN_NOT_DURABLE) && !LF_ISSET(DB_INIT_TXN)) { + __db_err(dbenv, + "Setting non-durability only valid with transactions"); + return (EINVAL); + } + + /* + * Currently we support one kind of mutex that is intra-process only, + * POSIX 1003.1 pthreads, because a variety of systems don't support + * the full pthreads API, and our only alternative is test-and-set. + */ +#ifdef HAVE_MUTEX_THREAD_ONLY + if (!LF_ISSET(DB_PRIVATE)) { + __db_err(dbenv, + "Berkeley DB library configured to support only DB_PRIVATE environments"); + return (EINVAL); + } +#endif /* * If we're doing recovery, destroy the environment so that we create @@ -126,24 +162,24 @@ __dbenv_open(dbenv, db_home, flags, mode) * not, we just want to nail any files that are left-over for whatever * reason, from whatever session. */ - if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) { - if ((ret = db_env_create(&rm_dbenv, 0)) != 0) - return (ret); - if ((ret = dbenv->remove(rm_dbenv, db_home, DB_FORCE)) != 0) + if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) + if ((ret = __dbenv_remove_int(dbenv, db_home, DB_FORCE)) != 0 || + (ret = __dbenv_refresh(dbenv, orig_flags, 0)) != 0) return (ret); - } /* Initialize the DB_ENV structure. */ if ((ret = __dbenv_config(dbenv, db_home, flags)) != 0) goto err; - /* Convert the DBENV->open flags to internal flags. */ + /* Convert the DB_ENV->open flags to internal flags. */ if (LF_ISSET(DB_CREATE)) F_SET(dbenv, DB_ENV_CREATE); if (LF_ISSET(DB_LOCKDOWN)) F_SET(dbenv, DB_ENV_LOCKDOWN); if (LF_ISSET(DB_PRIVATE)) F_SET(dbenv, DB_ENV_PRIVATE); + if (LF_ISSET(DB_RECOVER_FATAL)) + F_SET(dbenv, DB_ENV_FATAL); if (LF_ISSET(DB_SYSTEM_MEM)) F_SET(dbenv, DB_ENV_SYSTEM_MEM); if (LF_ISSET(DB_THREAD)) @@ -156,13 +192,14 @@ __dbenv_open(dbenv, db_home, flags, mode) * Create/join the environment. We pass in the flags that * will be of interest to an environment joining later; if * we're not the ones to do the create, we - * pull out whatever has been stored, if we don't do a create. + * pull out whatever has been stored. */ init_flags = 0; init_flags |= (LF_ISSET(DB_INIT_CDB) ? DB_INITENV_CDB : 0); init_flags |= (LF_ISSET(DB_INIT_LOCK) ? DB_INITENV_LOCK : 0); init_flags |= (LF_ISSET(DB_INIT_LOG) ? DB_INITENV_LOG : 0); init_flags |= (LF_ISSET(DB_INIT_MPOOL) ? DB_INITENV_MPOOL : 0); + init_flags |= (LF_ISSET(DB_INIT_REP) ? DB_INITENV_REP : 0); init_flags |= (LF_ISSET(DB_INIT_TXN) ? DB_INITENV_TXN : 0); init_flags |= (F_ISSET(dbenv, DB_ENV_CDB_ALLDB) ? DB_INITENV_CDB_ALLDB : 0); @@ -181,10 +218,11 @@ __dbenv_open(dbenv, db_home, flags, mode) LF_SET((init_flags & DB_INITENV_LOCK) ? DB_INIT_LOCK : 0); LF_SET((init_flags & DB_INITENV_LOG) ? DB_INIT_LOG : 0); LF_SET((init_flags & DB_INITENV_MPOOL) ? DB_INIT_MPOOL : 0); + LF_SET((init_flags & DB_INITENV_REP) ? DB_INIT_REP : 0); LF_SET((init_flags & DB_INITENV_TXN) ? DB_INIT_TXN : 0); if (LF_ISSET(DB_INITENV_CDB_ALLDB) && - (ret = dbenv->set_flags(dbenv, DB_CDB_ALLDB, 1)) != 0) + (ret = __dbenv_set_flags(dbenv, DB_CDB_ALLDB, 1)) != 0) goto err; } @@ -193,37 +231,58 @@ __dbenv_open(dbenv, db_home, flags, mode) LF_SET(DB_INIT_LOCK); F_SET(dbenv, DB_ENV_CDB); } - - /* Initialize the DB list, and its mutex if appropriate. */ - LIST_INIT(&dbenv->dblist); - if (F_ISSET(dbenv, DB_ENV_THREAD)) { - if ((ret = __db_mutex_alloc(dbenv, - dbenv->reginfo, (MUTEX **)&dbenv->dblist_mutexp)) != 0) - return (ret); - if ((ret = __db_mutex_init(dbenv, - dbenv->dblist_mutexp, 0, MUTEX_THREAD)) != 0) { - __db_mutex_free(dbenv, dbenv->reginfo, - dbenv->dblist_mutexp); - return (ret); - } + if (LF_ISSET(DB_RECOVER | + DB_RECOVER_FATAL) && !LF_ISSET(DB_INIT_TXN)) { + __db_err(dbenv, + "DB_RECOVER and DB_RECOVER_FATAL require DB_TXN_INIT in DB_ENV->open"); + ret = EINVAL; + goto err; } + /* Save the flags passed to DB_ENV->open. */ + dbenv->open_flags = flags; /* - * Initialize the subsystems. Transactions imply logging but do not - * imply locking. While almost all applications want both locking - * and logging, it would not be unreasonable for a single threaded - * process to want transactions for atomicity guarantees, but not - * necessarily need concurrency. + * Initialize the subsystems. + * + * Initialize the replication area first, so that we can lock out this + * call if we're currently running recovery for replication. */ + if (LF_ISSET(DB_INIT_REP)) { + if ((ret = __rep_open(dbenv)) != 0) + goto err; + } + + rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0; + if (rep_check) + __env_rep_enter(dbenv); + if (LF_ISSET(DB_INIT_MPOOL)) if ((ret = __memp_open(dbenv)) != 0) goto err; + /* + * Initialize the ciphering area prior to any running of recovery so + * that we can initialize the keys, etc. before recovery. + * + * !!! + * This must be after the mpool init, but before the log initialization + * because log_open may attempt to run log_recover during its open. + */ + if ((ret = __crypto_region_init(dbenv)) != 0) + goto err; + + /* + * Transactions imply logging but do not imply locking. While almost + * all applications want both locking and logging, it would not be + * unreasonable for a single threaded process to want transactions for + * atomicity guarantees, but not necessarily need concurrency. + */ if (LF_ISSET(DB_INIT_LOG | DB_INIT_TXN)) if ((ret = __log_open(dbenv)) != 0) goto err; if (LF_ISSET(DB_INIT_LOCK)) if ((ret = __lock_open(dbenv)) != 0) goto err; + if (LF_ISSET(DB_INIT_TXN)) { if ((ret = __txn_open(dbenv)) != 0) goto err; @@ -232,44 +291,105 @@ __dbenv_open(dbenv, db_home, flags, mode) * If the application is running with transactions, initialize * the function tables. */ - if ((ret = __bam_init_recover(dbenv)) != 0) + if ((ret = __bam_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) goto err; - if ((ret = __crdel_init_recover(dbenv)) != 0) + if ((ret = __crdel_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) goto err; - if ((ret = __db_init_recover(dbenv)) != 0) + if ((ret = __db_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) goto err; - if ((ret = __ham_init_recover(dbenv)) != 0) + if ((ret = __dbreg_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) goto err; - if ((ret = __log_init_recover(dbenv)) != 0) + if ((ret = __fop_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) goto err; - if ((ret = __qam_init_recover(dbenv)) != 0) + if ((ret = __ham_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) goto err; - if ((ret = __txn_init_recover(dbenv)) != 0) + if ((ret = __qam_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) goto err; - - /* - * If the application specified their own recovery - * initialization function, call it. - */ - if (dbenv->db_recovery_init != NULL && - (ret = dbenv->db_recovery_init(dbenv)) != 0) + if ((ret = __txn_init_recover(dbenv, &dbenv->recover_dtab, + &dbenv->recover_dtab_size)) != 0) goto err; /* Perform recovery for any previous run. */ if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && - (ret = __db_apprec(dbenv, + (ret = __db_apprec(dbenv, NULL, NULL, 1, LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))) != 0) goto err; } + + /* + * Initialize the DB list, and its mutex as necessary. If the env + * handle isn't free-threaded we don't need a mutex because there + * will never be more than a single DB handle on the list. If the + * mpool wasn't initialized, then we can't ever open a DB handle. + * + * We also need to initialize the MT mutex as necessary, so do them + * both. If we error, __dbenv_refresh() will clean up. + * + * !!! + * This must come after the __memp_open call above because if we are + * recording mutexes for system resources, we will do it in the mpool + * region for environments and db handles. So, the mpool region must + * already be initialized. + */ + LIST_INIT(&dbenv->dblist); + if (F_ISSET(dbenv, DB_ENV_THREAD) && LF_ISSET(DB_INIT_MPOOL)) { + dbmp = dbenv->mp_handle; + if ((ret = __db_mutex_setup( + dbenv, dbmp->reginfo, &dbenv->dblist_mutexp, + MUTEX_ALLOC | MUTEX_THREAD)) != 0) + goto err; + if ((ret = __db_mutex_setup( + dbenv, dbmp->reginfo, &dbenv->mt_mutexp, + MUTEX_ALLOC | MUTEX_THREAD)) != 0) + goto err; + } + + /* + * If we've created the regions, are running with transactions, and did + * not just run recovery, we need to log the fact that the transaction + * IDs got reset. + * + * If we ran recovery, there may be prepared-but-not-yet-committed + * transactions that need to be resolved. Recovery resets the minimum + * transaction ID and logs the reset if that's appropriate, so we + * don't need to do anything here in the recover case. + */ + if (TXN_ON(dbenv) && + F_ISSET((REGINFO *)dbenv->reginfo, REGION_CREATE) && + !LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && + (ret = __txn_reset(dbenv)) != 0) + goto err; + + if (rep_check) + __env_rep_exit(dbenv); + return (0); -err: (void)__dbenv_refresh(dbenv); +err: /* If we fail after creating the regions, remove them. */ + if (dbenv->reginfo != NULL && + F_ISSET((REGINFO *)dbenv->reginfo, REGION_CREATE)) { + ret = __db_panic(dbenv, ret); + + /* Refresh the DB_ENV so we can use it to call remove. */ + (void)__dbenv_refresh(dbenv, orig_flags, rep_check); + (void)__dbenv_remove_int(dbenv, db_home, DB_FORCE); + (void)__dbenv_refresh(dbenv, orig_flags, 0); + } else + (void)__dbenv_refresh(dbenv, orig_flags, rep_check); + return (ret); } /* * __dbenv_remove -- - * Discard an environment. + * DB_ENV->remove. * * PUBLIC: int __dbenv_remove __P((DB_ENV *, const char *, u_int32_t)); */ @@ -283,43 +403,45 @@ __dbenv_remove(dbenv, db_home, flags) #undef OKFLAGS #define OKFLAGS \ - DB_FORCE | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT + (DB_FORCE | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT) /* Validate arguments. */ - if ((ret = __db_fchk(dbenv, "DBENV->remove", flags, OKFLAGS)) != 0) - goto err; + if ((ret = __db_fchk(dbenv, "DB_ENV->remove", flags, OKFLAGS)) != 0) + return (ret); - /* - * A hard-to-debug error is calling DBENV->remove after open. That's - * not legal. You have to close the original, already opened handle - * and then allocate a new DBENV handle to use for DBENV->remove. - */ - if (F_ISSET(dbenv, DB_ENV_OPEN_CALLED)) { - __db_err(dbenv, - "DBENV handle opened, not usable for remove method."); - return (EINVAL); - } + ENV_ILLEGAL_AFTER_OPEN(dbenv, "DB_ENV->remove"); - /* Initialize the DB_ENV structure. */ - if ((ret = __dbenv_config(dbenv, db_home, flags)) != 0) - goto err; + ret = __dbenv_remove_int(dbenv, db_home, flags); - /* Remove the environment. */ - ret = __db_e_remove(dbenv, LF_ISSET(DB_FORCE) ? 1 : 0); - - /* Discard any resources we've acquired. */ -err: if ((t_ret = __dbenv_refresh(dbenv)) != 0 && ret == 0) + if ((t_ret = __dbenv_close(dbenv, 0)) != 0 && ret == 0) ret = t_ret; - memset(dbenv, CLEAR_BYTE, sizeof(DB_ENV)); - __os_free(dbenv, sizeof(DB_ENV)); - return (ret); } /* + * __dbenv_remove_int -- + * Discard an environment, internal version. + */ +static int +__dbenv_remove_int(dbenv, db_home, flags) + DB_ENV *dbenv; + const char *db_home; + u_int32_t flags; +{ + int ret; + + /* Initialize the DB_ENV structure. */ + if ((ret = __dbenv_config(dbenv, db_home, flags)) != 0) + return (ret); + + /* Remove the environment. */ + return (__db_e_remove(dbenv, flags)); +} + +/* * __dbenv_config -- - * Initialize the DB_ENV structure. + * Minor initialization of the DB_ENV structure, read the DB_CONFIG file. */ static int __dbenv_config(dbenv, db_home, flags) @@ -329,148 +451,269 @@ __dbenv_config(dbenv, db_home, flags) { FILE *fp; int ret; - char *lp, buf[MAXPATHLEN * 2]; + char *p, buf[256]; - /* Set the database home. */ + /* + * Set the database home. Do this before calling __db_appname, + * it uses the home directory. + */ if ((ret = __db_home(dbenv, db_home, flags)) != 0) return (ret); - /* - * Parse the config file. - * - * !!! - * Don't use sprintf(3)/snprintf(3) -- the former is dangerous, and - * the latter isn't standard, and we're manipulating strings handed - * us by the application. - */ - if (dbenv->db_home != NULL) { -#define CONFIG_NAME "/DB_CONFIG" - if (strlen(dbenv->db_home) + - strlen(CONFIG_NAME) + 1 > sizeof(buf)) { - ret = ENAMETOOLONG; - return (ret); - } - (void)strcpy(buf, dbenv->db_home); - (void)strcat(buf, CONFIG_NAME); - if ((fp = fopen(buf, "r")) != NULL) { - while (fgets(buf, sizeof(buf), fp) != NULL) { - if ((lp = strchr(buf, '\n')) == NULL) { - __db_err(dbenv, - "%s: line too long", CONFIG_NAME); - (void)fclose(fp); - ret = EINVAL; - return (ret); - } - *lp = '\0'; - if (buf[0] == '\0' || - buf[0] == '#' || isspace((int)buf[0])) - continue; - - if ((ret = __db_parse(dbenv, buf)) != 0) { - (void)fclose(fp); - return (ret); - } + /* Parse the config file. */ + if ((ret = + __db_appname(dbenv, DB_APP_NONE, "DB_CONFIG", 0, NULL, &p)) != 0) + return (ret); + + fp = fopen(p, "r"); + __os_free(dbenv, p); + + if (fp != NULL) { + while (fgets(buf, sizeof(buf), fp) != NULL) { + if ((p = strchr(buf, '\n')) != NULL) + *p = '\0'; + else if (strlen(buf) + 1 == sizeof(buf)) { + __db_err(dbenv, "DB_CONFIG: line too long"); + (void)fclose(fp); + return (EINVAL); + } + if (buf[0] == '\0' || + buf[0] == '#' || isspace((int)buf[0])) + continue; + + if ((ret = __db_parse(dbenv, buf)) != 0) { + (void)fclose(fp); + return (ret); } - (void)fclose(fp); } + (void)fclose(fp); } - /* Set up the tmp directory path. */ - if (dbenv->db_tmp_dir == NULL && (ret = __os_tmpdir(dbenv, flags)) != 0) - return (ret); - /* - * The locking file descriptor is rarely on. Set the fd to -1, not - * because it's ever tested, but to make sure we catch mistakes. + * If no temporary directory path was specified in the config file, + * choose one. */ - if ((ret = - __os_calloc(dbenv, - 1, sizeof(*dbenv->lockfhp), &dbenv->lockfhp)) != 0) + if (dbenv->db_tmp_dir == NULL && (ret = __os_tmpdir(dbenv, flags)) != 0) return (ret); - dbenv->lockfhp->fd = -1; - /* - * Flag that the DB_ENV structure has been initialized. Note, this - * must be set before calling into the subsystems as it's used during - * file naming. - */ + /* Flag that the DB_ENV structure has been initialized. */ F_SET(dbenv, DB_ENV_OPEN_CALLED); return (0); } /* - * __dbenv_close -- - * DB_ENV destructor. + * __dbenv_close_pp -- + * DB_ENV->close pre/post processor. * - * PUBLIC: int __dbenv_close __P((DB_ENV *, u_int32_t)); + * PUBLIC: int __dbenv_close_pp __P((DB_ENV *, u_int32_t)); */ int -__dbenv_close(dbenv, flags) +__dbenv_close_pp(dbenv, flags) DB_ENV *dbenv; u_int32_t flags; { - int ret; + int rep_check, ret, t_ret; - COMPQUIET(flags, 0); + ret = 0; PANIC_CHECK(dbenv); - ret = __dbenv_refresh(dbenv); + /* + * Validate arguments, but as a DB_ENV handle destructor, we can't + * fail. + */ + if (flags != 0 && + (t_ret = __db_ferr(dbenv, "DB_ENV->close", 0)) != 0 && ret == 0) + ret = t_ret; + + rep_check = IS_ENV_REPLICATED(dbenv) ? 1 : 0; + if (rep_check) + __env_rep_enter(dbenv); + + if ((t_ret = __dbenv_close(dbenv, rep_check)) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} - /* Discard the structure if we allocated it. */ - if (!F_ISSET(dbenv, DB_ENV_USER_ALLOC)) { - memset(dbenv, CLEAR_BYTE, sizeof(DB_ENV)); - __os_free(dbenv, sizeof(DB_ENV)); +/* + * __dbenv_close -- + * DB_ENV->close. + * + * PUBLIC: int __dbenv_close __P((DB_ENV *, int)); + */ +int +__dbenv_close(dbenv, rep_check) + DB_ENV *dbenv; + int rep_check; +{ + int ret, t_ret; + char **p; + + ret = 0; + + /* + * Before checking the reference count, we have to see if we were in + * the middle of restoring transactions and need to close the open + * files. + */ + if (TXN_ON(dbenv) && (t_ret = __txn_preclose(dbenv)) != 0 && ret == 0) + ret = t_ret; + + if (REP_ON(dbenv) && + (t_ret = __rep_preclose(dbenv, 1)) != 0 && ret == 0) + ret = t_ret; + + if (dbenv->db_ref != 0) { + __db_err(dbenv, + "Database handles open during environment close"); + if (ret == 0) + ret = EINVAL; + } + + /* + * Detach from the regions and undo the allocations done by + * DB_ENV->open. + */ + if ((t_ret = __dbenv_refresh(dbenv, 0, rep_check)) != 0 && ret == 0) + ret = t_ret; + + /* Do per-subsystem destruction. */ + __lock_dbenv_close(dbenv); + + if ((t_ret = __rep_dbenv_close(dbenv)) != 0 && ret == 0) + ret = t_ret; + +#ifdef HAVE_CRYPTO + /* + * Crypto comes last, because higher level close functions needs + * cryptography. + */ + if ((t_ret = __crypto_dbenv_close(dbenv)) != 0 && ret == 0) + ret = t_ret; +#endif + + /* Release any string-based configuration parameters we've copied. */ + if (dbenv->db_log_dir != NULL) + __os_free(dbenv, dbenv->db_log_dir); + if (dbenv->db_tmp_dir != NULL) + __os_free(dbenv, dbenv->db_tmp_dir); + if (dbenv->db_data_dir != NULL) { + for (p = dbenv->db_data_dir; *p != NULL; ++p) + __os_free(dbenv, *p); + __os_free(dbenv, dbenv->db_data_dir); } + /* Discard the structure. */ + memset(dbenv, CLEAR_BYTE, sizeof(DB_ENV)); + __os_free(NULL, dbenv); + return (ret); } /* * __dbenv_refresh -- - * Refresh the DB_ENV structure, releasing any allocated resources. + * Refresh the DB_ENV structure, releasing resources allocated by + * DB_ENV->open, and returning it to the state it was in just before + * open was called. (Note that this means that any state set by + * pre-open configuration functions must be preserved.) */ static int -__dbenv_refresh(dbenv) +__dbenv_refresh(dbenv, orig_flags, rep_check) DB_ENV *dbenv; + u_int32_t orig_flags; + int rep_check; { + DB_MPOOL *dbmp; int ret, t_ret; - char **p; ret = 0; /* * Close subsystems, in the reverse order they were opened (txn * must be first, it may want to discard locks and flush the log). + * + * !!! + * Note that these functions, like all of __dbenv_refresh, only undo + * the effects of __dbenv_open. Functions that undo work done by + * db_env_create or by a configurator function should go in + * __dbenv_close. */ - if (TXN_ON(dbenv)) { - if ((t_ret = __txn_close(dbenv)) != 0 && ret == 0) - ret = t_ret; - } + if (TXN_ON(dbenv) && + (t_ret = __txn_dbenv_refresh(dbenv)) != 0 && ret == 0) + ret = t_ret; - if (LOCKING_ON(dbenv)) { - if ((t_ret = __lock_close(dbenv)) != 0 && ret == 0) - ret = t_ret; - } - __lock_dbenv_close(dbenv); + if (LOGGING_ON(dbenv) && + (t_ret = __log_dbenv_refresh(dbenv)) != 0 && ret == 0) + ret = t_ret; - if (LOGGING_ON(dbenv)) { - if ((t_ret = __log_close(dbenv)) != 0 && ret == 0) - ret = t_ret; + /* + * Locking should come after logging, because closing log results + * in files closing which may require locks being released. + */ + if (LOCKING_ON(dbenv) && + (t_ret = __lock_dbenv_refresh(dbenv)) != 0 && ret == 0) + ret = t_ret; + + /* + * Discard DB list and its mutex. + * Discard the MT mutex. + * + * !!! + * This must be done before we close the mpool region because we + * may have allocated the DB handle mutex in the mpool region. + * It must be done *after* we close the log region, though, because + * we close databases and try to acquire the mutex when we close + * log file handles. Ick. + */ + LIST_INIT(&dbenv->dblist); + if (dbenv->dblist_mutexp != NULL) { + dbmp = dbenv->mp_handle; + __db_mutex_free(dbenv, dbmp->reginfo, dbenv->dblist_mutexp); + } + if (dbenv->mt_mutexp != NULL) { + dbmp = dbenv->mp_handle; + __db_mutex_free(dbenv, dbmp->reginfo, dbenv->mt_mutexp); + } + if (dbenv->mt != NULL) { + __os_free(dbenv, dbenv->mt); + dbenv->mt = NULL; } if (MPOOL_ON(dbenv)) { - if ((t_ret = __memp_close(dbenv)) != 0 && ret == 0) + /* + * If it's a private environment, flush the contents to disk. + * Recovery would have put everything back together, but it's + * faster and cleaner to flush instead. + */ + if (F_ISSET(dbenv, DB_ENV_PRIVATE) && + (t_ret = __memp_sync(dbenv, NULL)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __memp_dbenv_refresh(dbenv)) != 0 && ret == 0) ret = t_ret; } - /* Discard DB list and its mutex. */ - LIST_INIT(&dbenv->dblist); - if (dbenv->dblist_mutexp != NULL) - __db_mutex_free(dbenv, dbenv->reginfo, dbenv->dblist_mutexp); + /* + * If we're included in a shared replication handle count, this + * is our last chance to decrement that count. + * + * !!! + * We can't afford to do anything dangerous after we decrement the + * handle count, of course, as replication may be proceeding with + * client recovery. However, since we're discarding the regions + * as soon as we drop the handle count, there's little opportunity + * to do harm. + */ + if (rep_check) + __env_rep_exit(dbenv); /* Detach from the region. */ + /* + * Must come after we call __env_rep_exit above. + */ + __rep_dbenv_refresh(dbenv); + if (dbenv->reginfo != NULL) { if ((t_ret = __db_e_detach(dbenv, 0)) != 0 && ret == 0) ret = t_ret; @@ -481,51 +724,22 @@ __dbenv_refresh(dbenv) */ } - /* Clean up the structure. */ - dbenv->db_panic = 0; - + /* Undo changes and allocations done by __dbenv_open. */ if (dbenv->db_home != NULL) { - __os_freestr(dbenv->db_home); + __os_free(dbenv, dbenv->db_home); dbenv->db_home = NULL; } - if (dbenv->db_log_dir != NULL) { - __os_freestr(dbenv->db_log_dir); - dbenv->db_log_dir = NULL; - } - if (dbenv->db_tmp_dir != NULL) { - __os_freestr(dbenv->db_tmp_dir); - dbenv->db_tmp_dir = NULL; - } - if (dbenv->db_data_dir != NULL) { - for (p = dbenv->db_data_dir; *p != NULL; ++p) - __os_freestr(*p); - __os_free(dbenv->db_data_dir, - dbenv->data_cnt * sizeof(char **)); - dbenv->db_data_dir = NULL; - } - dbenv->data_cnt = dbenv->data_next = 0; + dbenv->open_flags = 0; dbenv->db_mode = 0; - if (dbenv->lockfhp != NULL) { - __os_free(dbenv->lockfhp, sizeof(*dbenv->lockfhp)); - dbenv->lockfhp = NULL; + if (dbenv->recover_dtab != NULL) { + __os_free(dbenv, dbenv->recover_dtab); + dbenv->recover_dtab = NULL; + dbenv->recover_dtab_size = 0; } - if (dbenv->dtab != NULL) { - __os_free(dbenv->dtab, - dbenv->dtab_size * sizeof(dbenv->dtab[0])); - dbenv->dtab = NULL; - dbenv->dtab_size = 0; - } - - dbenv->mp_mmapsize = 0; - dbenv->links.tqe_next = NULL; - dbenv->links.tqe_prev = NULL; - dbenv->xa_rmid = 0; - dbenv->xa_txn = 0; - - F_CLR(dbenv, ~(DB_ENV_STANDALONE | DB_ENV_USER_ALLOC)); + dbenv->flags = orig_flags; return (ret); } @@ -548,156 +762,103 @@ __dbenv_refresh(dbenv) } /* + * __dbenv_get_open_flags + * Retrieve the flags passed to DB_ENV->open. + * + * PUBLIC: int __dbenv_get_open_flags __P((DB_ENV *, u_int32_t *)); + */ +int +__dbenv_get_open_flags(dbenv, flagsp) + DB_ENV *dbenv; + u_int32_t *flagsp; +{ + ENV_ILLEGAL_BEFORE_OPEN(dbenv, "DB_ENV->get_open_flags"); + + *flagsp = dbenv->open_flags; + return (0); +} + +/* * __db_appname -- * Given an optional DB environment, directory and file name and type - * of call, build a path based on the DBENV->open rules, and return + * of call, build a path based on the DB_ENV->open rules, and return * it in allocated space. * * PUBLIC: int __db_appname __P((DB_ENV *, APPNAME, - * PUBLIC: const char *, const char *, u_int32_t, DB_FH *, char **)); + * PUBLIC: const char *, u_int32_t, DB_FH **, char **)); */ int -__db_appname(dbenv, appname, dir, file, tmp_oflags, fhp, namep) +__db_appname(dbenv, appname, file, tmp_oflags, fhpp, namep) DB_ENV *dbenv; APPNAME appname; - const char *dir, *file; + const char *file; u_int32_t tmp_oflags; - DB_FH *fhp; + DB_FH **fhpp; char **namep; { - DB_ENV etmp; size_t len, str_len; - int data_entry, ret, slash, tmp_create, tmp_free; - const char *a, *b, *c; + int data_entry, ret, slash, tmp_create; + const char *a, *b; char *p, *str; - a = b = c = NULL; + a = b = NULL; data_entry = -1; - tmp_create = tmp_free = 0; + tmp_create = 0; /* - * We don't return a name when creating temporary files, just a - * file handle. Default to an error now. + * We don't return a name when creating temporary files, just a file + * handle. Default to an error now. */ - if (fhp != NULL) - F_CLR(fhp, DB_FH_VALID); + if (fhpp != NULL) + *fhpp = NULL; if (namep != NULL) *namep = NULL; /* * Absolute path names are never modified. If the file is an absolute - * path, we're done. If the directory is, simply append the file and - * return. + * path, we're done. */ if (file != NULL && __os_abspath(file)) return (__os_strdup(dbenv, file, namep)); - if (dir != NULL && __os_abspath(dir)) { - a = dir; - goto done; - } - /* - * DB_ENV DIR APPNAME RESULT - * ------------------------------------------- - * null null none <tmp>/file - * null set none DIR/file - * set null none DB_HOME/file - * set set none DB_HOME/DIR/file - * - * DB_ENV FILE APPNAME RESULT - * ------------------------------------------- - * null null DB_APP_DATA <tmp>/<create> - * null set DB_APP_DATA ./file - * set null DB_APP_DATA <tmp>/<create> - * set set DB_APP_DATA DB_HOME/DB_DATA_DIR/file - * - * DB_ENV DIR APPNAME RESULT - * ------------------------------------------- - * null null DB_APP_LOG <tmp>/file - * null set DB_APP_LOG DIR/file - * set null DB_APP_LOG DB_HOME/DB_LOG_DIR/file - * set set DB_APP_LOG DB_HOME/DB_LOG_DIR/DIR/file - * - * DB_ENV APPNAME RESULT - * ------------------------------------------- - * null DB_APP_TMP* <tmp>/<create> - * set DB_APP_TMP* DB_HOME/DB_TMP_DIR/<create> + /* Everything else is relative to the environment home. */ + if (dbenv != NULL) + a = dbenv->db_home; + +retry: /* + * DB_APP_NONE: + * DB_HOME/file + * DB_APP_DATA: + * DB_HOME/DB_DATA_DIR/file + * DB_APP_LOG: + * DB_HOME/DB_LOG_DIR/file + * DB_APP_TMP: + * DB_HOME/DB_TMP_DIR/<create> */ -retry: switch (appname) { + switch (appname) { case DB_APP_NONE: - if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_OPEN_CALLED)) { - if (dir == NULL) - goto tmp; - a = dir; - } else { - a = dbenv->db_home; - b = dir; - } break; case DB_APP_DATA: - if (dir != NULL) { - __db_err(dbenv, - "DB_APP_DATA: illegal directory specification"); - return (EINVAL); - } - - if (file == NULL) { - tmp_create = 1; - goto tmp; - } - if (dbenv != NULL && F_ISSET(dbenv, DB_ENV_OPEN_CALLED)) { - a = dbenv->db_home; - if (dbenv->db_data_dir != NULL && - (b = dbenv->db_data_dir[++data_entry]) == NULL) { - data_entry = -1; - b = dbenv->db_data_dir[0]; - } + if (dbenv != NULL && dbenv->db_data_dir != NULL && + (b = dbenv->db_data_dir[++data_entry]) == NULL) { + data_entry = -1; + b = dbenv->db_data_dir[0]; } break; case DB_APP_LOG: - if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_OPEN_CALLED)) { - if (dir == NULL) - goto tmp; - a = dir; - } else { - a = dbenv->db_home; + if (dbenv != NULL) b = dbenv->db_log_dir; - c = dir; - } break; case DB_APP_TMP: - if (dir != NULL || file != NULL) { - __db_err(dbenv, - "DB_APP_TMP: illegal directory or file specification"); - return (EINVAL); - } - - tmp_create = 1; - if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_OPEN_CALLED)) - goto tmp; - else { - a = dbenv->db_home; + if (dbenv != NULL) b = dbenv->db_tmp_dir; - } + tmp_create = 1; break; } - /* Reference a file from the appropriate temporary directory. */ - if (0) { -tmp: if (dbenv == NULL || !F_ISSET(dbenv, DB_ENV_OPEN_CALLED)) { - memset(&etmp, 0, sizeof(etmp)); - if ((ret = __os_tmpdir(&etmp, DB_USE_ENVIRON)) != 0) - return (ret); - tmp_free = 1; - a = etmp.db_tmp_dir; - } else - a = dbenv->db_tmp_dir; - } - -done: len = + len = (a == NULL ? 0 : strlen(a) + 1) + (b == NULL ? 0 : strlen(b) + 1) + - (c == NULL ? 0 : strlen(c) + 1) + (file == NULL ? 0 : strlen(file) + 1); /* @@ -705,13 +866,10 @@ done: len = * temporary space that we're going to need to create a temporary file * name. */ -#define DB_TRAIL "BDBXXXXXX" +#define DB_TRAIL "BDBXXXXX" str_len = len + sizeof(DB_TRAIL) + 10; - if ((ret = __os_malloc(dbenv, str_len, NULL, &str)) != 0) { - if (tmp_free) - __os_freestr(etmp.db_tmp_dir); + if ((ret = __os_malloc(dbenv, str_len, &str)) != 0) return (ret); - } slash = 0; p = str; @@ -720,31 +878,25 @@ done: len = DB_ADDSTR(file); *p = '\0'; - /* Discard any space allocated to find the temp directory. */ - if (tmp_free) { - __os_freestr(etmp.db_tmp_dir); - tmp_free = 0; - } - /* * If we're opening a data file, see if it exists. If it does, * return it, otherwise, try and find another one to open. */ - if (data_entry != -1 && __os_exists(str, NULL) != 0) { - __os_free(str, str_len); - a = b = c = NULL; + if (__os_exists(str, NULL) != 0 && data_entry != -1) { + __os_free(dbenv, str); + b = NULL; goto retry; } /* Create the file if so requested. */ if (tmp_create && - (ret = __db_tmp_open(dbenv, tmp_oflags, str, fhp)) != 0) { - __os_free(str, str_len); + (ret = __db_tmp_open(dbenv, tmp_oflags, str, fhpp)) != 0) { + __os_free(dbenv, str); return (ret); } if (namep == NULL) - __os_free(str, str_len); + __os_free(dbenv, str); else *namep = str; return (0); @@ -753,8 +905,10 @@ done: len = /* * __db_home -- * Find the database home. + * + * PUBLIC: int __db_home __P((DB_ENV *, const char *, u_int32_t)); */ -static int +int __db_home(dbenv, db_home, flags) DB_ENV *dbenv; const char *db_home; @@ -779,6 +933,13 @@ __db_home(dbenv, db_home, flags) return (p == NULL ? 0 : __os_strdup(dbenv, p, &dbenv->db_home)); } +#define __DB_OVFL(v, max) \ + if (v > max) { \ + __v = v; \ + __max = max; \ + goto toobig; \ + } + /* * __db_parse -- * Parse a single NAME VALUE pair. @@ -788,16 +949,18 @@ __db_parse(dbenv, s) DB_ENV *dbenv; char *s; { - u_long v1, v2, v3; + u_long __max, __v, v1, v2, v3; u_int32_t flags; char *name, *p, *value, v4; /* * !!! - * The value of 40 is hard-coded into format arguments to sscanf + * The constant 40 is hard-coded into format arguments to sscanf * below, it can't be changed here without changing it there, too. + * The additional bytes are for a trailing nul byte and because we + * are reading user input -- I don't want to risk any off-by-ones. */ - char arg[40]; + char arg[40 + 5]; /* * Name/value pairs are parsed as two white-space separated strings. @@ -831,47 +994,92 @@ illegal: __db_err(dbenv, "mis-formatted name-value pair: %s", s); if (!strcasecmp(name, "set_cachesize")) { if (sscanf(value, "%lu %lu %lu %c", &v1, &v2, &v3, &v4) != 3) goto badarg; - return (dbenv->set_cachesize(dbenv, v1, v2, v3)); + __DB_OVFL(v1, UINT32_T_MAX); + __DB_OVFL(v2, UINT32_T_MAX); + __DB_OVFL(v3, 10000); + return (__memp_set_cachesize( + dbenv, (u_int32_t)v1, (u_int32_t)v2, (int)v3)); } if (!strcasecmp(name, "set_data_dir") || !strcasecmp(name, "db_data_dir")) /* Compatibility. */ - return (dbenv->set_data_dir(dbenv, value)); + return (__dbenv_set_data_dir(dbenv, value)); if (!strcasecmp(name, "set_flags")) { if (sscanf(value, "%40s %c", arg, &v4) != 1) goto badarg; + if (!strcasecmp(value, "db_auto_commit")) + return (__dbenv_set_flags(dbenv, DB_AUTO_COMMIT, 1)); if (!strcasecmp(value, "db_cdb_alldb")) - return (dbenv->set_flags(dbenv, DB_CDB_ALLDB, 1)); + return (__dbenv_set_flags(dbenv, DB_CDB_ALLDB, 1)); + if (!strcasecmp(value, "db_direct_db")) + return (__dbenv_set_flags(dbenv, DB_DIRECT_DB, 1)); + if (!strcasecmp(value, "db_direct_log")) + return (__dbenv_set_flags(dbenv, DB_DIRECT_LOG, 1)); + if (!strcasecmp(value, "db_log_autoremove")) + return (__dbenv_set_flags(dbenv, DB_LOG_AUTOREMOVE, 1)); + if (!strcasecmp(value, "db_nolocking")) + return (__dbenv_set_flags(dbenv, DB_NOLOCKING, 1)); if (!strcasecmp(value, "db_nommap")) - return (dbenv->set_flags(dbenv, DB_NOMMAP, 1)); + return (__dbenv_set_flags(dbenv, DB_NOMMAP, 1)); + if (!strcasecmp(value, "db_nopanic")) + return (__dbenv_set_flags(dbenv, DB_NOPANIC, 1)); + if (!strcasecmp(value, "db_overwrite")) + return (__dbenv_set_flags(dbenv, DB_OVERWRITE, 1)); + if (!strcasecmp(value, "db_region_init")) + return (__dbenv_set_flags(dbenv, DB_REGION_INIT, 1)); if (!strcasecmp(value, "db_txn_nosync")) - return (dbenv->set_flags(dbenv, DB_TXN_NOSYNC, 1)); + return (__dbenv_set_flags(dbenv, DB_TXN_NOSYNC, 1)); + if (!strcasecmp(value, "db_txn_not_durable")) + return ( + __dbenv_set_flags(dbenv, DB_TXN_NOT_DURABLE, 1)); + if (!strcasecmp(value, "db_txn_write_nosync")) + return ( + __dbenv_set_flags(dbenv, DB_TXN_WRITE_NOSYNC, 1)); + if (!strcasecmp(value, "db_yieldcpu")) + return (__dbenv_set_flags(dbenv, DB_YIELDCPU, 1)); goto badarg; } if (!strcasecmp(name, "set_lg_bsize")) { if (sscanf(value, "%lu %c", &v1, &v4) != 1) goto badarg; - return (dbenv->set_lg_bsize(dbenv, v1)); + __DB_OVFL(v1, UINT32_T_MAX); + return (__log_set_lg_bsize(dbenv, (u_int32_t)v1)); } if (!strcasecmp(name, "set_lg_max")) { if (sscanf(value, "%lu %c", &v1, &v4) != 1) goto badarg; - return (dbenv->set_lg_max(dbenv, v1)); + __DB_OVFL(v1, UINT32_T_MAX); + return (__log_set_lg_max(dbenv, (u_int32_t)v1)); + } + + if (!strcasecmp(name, "set_lg_regionmax")) { + if (sscanf(value, "%lu %c", &v1, &v4) != 1) + goto badarg; + __DB_OVFL(v1, UINT32_T_MAX); + return (__log_set_lg_regionmax(dbenv, (u_int32_t)v1)); } if (!strcasecmp(name, "set_lg_dir") || !strcasecmp(name, "db_log_dir")) /* Compatibility. */ - return (dbenv->set_lg_dir(dbenv, value)); + return (__log_set_lg_dir(dbenv, value)); if (!strcasecmp(name, "set_lk_detect")) { if (sscanf(value, "%40s %c", arg, &v4) != 1) goto badarg; if (!strcasecmp(value, "db_lock_default")) flags = DB_LOCK_DEFAULT; + else if (!strcasecmp(value, "db_lock_expire")) + flags = DB_LOCK_EXPIRE; + else if (!strcasecmp(value, "db_lock_maxlocks")) + flags = DB_LOCK_MAXLOCKS; + else if (!strcasecmp(value, "db_lock_minlocks")) + flags = DB_LOCK_MINLOCKS; + else if (!strcasecmp(value, "db_lock_minwrite")) + flags = DB_LOCK_MINWRITE; else if (!strcasecmp(value, "db_lock_oldest")) flags = DB_LOCK_OLDEST; else if (!strcasecmp(value, "db_lock_random")) @@ -880,65 +1088,89 @@ illegal: __db_err(dbenv, "mis-formatted name-value pair: %s", s); flags = DB_LOCK_YOUNGEST; else goto badarg; - return (dbenv->set_lk_detect(dbenv, flags)); + return (__lock_set_lk_detect(dbenv, flags)); } if (!strcasecmp(name, "set_lk_max")) { if (sscanf(value, "%lu %c", &v1, &v4) != 1) goto badarg; - return (dbenv->set_lk_max(dbenv, v1)); + __DB_OVFL(v1, UINT32_T_MAX); + return (__lock_set_lk_max(dbenv, (u_int32_t)v1)); } if (!strcasecmp(name, "set_lk_max_locks")) { if (sscanf(value, "%lu %c", &v1, &v4) != 1) goto badarg; - return (dbenv->set_lk_max_locks(dbenv, v1)); + __DB_OVFL(v1, UINT32_T_MAX); + return (__lock_set_lk_max_locks(dbenv, (u_int32_t)v1)); } if (!strcasecmp(name, "set_lk_max_lockers")) { if (sscanf(value, "%lu %c", &v1, &v4) != 1) goto badarg; - return (dbenv->set_lk_max_lockers(dbenv, v1)); + __DB_OVFL(v1, UINT32_T_MAX); + return (__lock_set_lk_max_lockers(dbenv, (u_int32_t)v1)); } if (!strcasecmp(name, "set_lk_max_objects")) { if (sscanf(value, "%lu %c", &v1, &v4) != 1) goto badarg; - return (dbenv->set_lk_max_objects(dbenv, v1)); + __DB_OVFL(v1, UINT32_T_MAX); + return (__lock_set_lk_max_objects(dbenv, (u_int32_t)v1)); + } + + if (!strcasecmp(name, "set_lock_timeout")) { + if (sscanf(value, "%lu %c", &v1, &v4) != 1) + goto badarg; + __DB_OVFL(v1, UINT32_T_MAX); + return (__lock_set_env_timeout( + dbenv, (u_int32_t)v1, DB_SET_LOCK_TIMEOUT)); } if (!strcasecmp(name, "set_mp_mmapsize")) { if (sscanf(value, "%lu %c", &v1, &v4) != 1) goto badarg; - return (dbenv->set_mp_mmapsize(dbenv, v1)); + __DB_OVFL(v1, UINT32_T_MAX); + return (__memp_set_mp_mmapsize(dbenv, (u_int32_t)v1)); } if (!strcasecmp(name, "set_region_init")) { if (sscanf(value, "%lu %c", &v1, &v4) != 1 || v1 != 1) goto badarg; - return (db_env_set_region_init(v1)); + return (__dbenv_set_flags( + dbenv, DB_REGION_INIT, v1 == 0 ? 0 : 1)); } if (!strcasecmp(name, "set_shm_key")) { if (sscanf(value, "%lu %c", &v1, &v4) != 1) goto badarg; - return (dbenv->set_shm_key(dbenv, (long)v1)); + return (__dbenv_set_shm_key(dbenv, (long)v1)); } if (!strcasecmp(name, "set_tas_spins")) { if (sscanf(value, "%lu %c", &v1, &v4) != 1) goto badarg; - return (db_env_set_tas_spins(v1)); + __DB_OVFL(v1, UINT32_T_MAX); + return (__dbenv_set_tas_spins(dbenv, (u_int32_t)v1)); } if (!strcasecmp(name, "set_tmp_dir") || !strcasecmp(name, "db_tmp_dir")) /* Compatibility.*/ - return (dbenv->set_tmp_dir(dbenv, value)); + return (__dbenv_set_tmp_dir(dbenv, value)); if (!strcasecmp(name, "set_tx_max")) { if (sscanf(value, "%lu %c", &v1, &v4) != 1) goto badarg; - return (dbenv->set_tx_max(dbenv, v1)); + __DB_OVFL(v1, UINT32_T_MAX); + return (__txn_set_tx_max(dbenv, (u_int32_t)v1)); + } + + if (!strcasecmp(name, "set_txn_timeout")) { + if (sscanf(value, "%lu %c", &v1, &v4) != 1) + goto badarg; + __DB_OVFL(v1, UINT32_T_MAX); + return (__lock_set_env_timeout( + dbenv, (u_int32_t)v1, DB_SET_TXN_TIMEOUT)); } if (!strcasecmp(name, "set_verbose")) { @@ -951,11 +1183,13 @@ illegal: __db_err(dbenv, "mis-formatted name-value pair: %s", s); flags = DB_VERB_DEADLOCK; else if (!strcasecmp(value, "db_verb_recovery")) flags = DB_VERB_RECOVERY; + else if (!strcasecmp(value, "db_verb_replication")) + flags = DB_VERB_REPLICATION; else if (!strcasecmp(value, "db_verb_waitsfor")) flags = DB_VERB_WAITSFOR; else goto badarg; - return (dbenv->set_verbose(dbenv, flags, 1)); + return (__dbenv_set_verbose(dbenv, flags, 1)); } __db_err(dbenv, "unrecognized name-value pair: %s", s); @@ -963,6 +1197,10 @@ illegal: __db_err(dbenv, "mis-formatted name-value pair: %s", s); badarg: __db_err(dbenv, "incorrect arguments for name-value pair: %s", s); return (EINVAL); + +toobig: __db_err(dbenv, + "%s: %lu larger than maximum value %lu", s, __v, __max); + return (EINVAL); } /* @@ -970,13 +1208,13 @@ badarg: __db_err(dbenv, "incorrect arguments for name-value pair: %s", s); * Create a temporary file. */ static int -__db_tmp_open(dbenv, tmp_oflags, path, fhp) +__db_tmp_open(dbenv, tmp_oflags, path, fhpp) DB_ENV *dbenv; u_int32_t tmp_oflags; char *path; - DB_FH *fhp; + DB_FH **fhpp; { - u_long pid; + u_int32_t id; int mode, isdir, ret; const char *p; char *trv; @@ -1001,12 +1239,9 @@ __db_tmp_open(dbenv, tmp_oflags, path, fhp) for (p = DB_TRAIL; (*++trv = *p) != '\0'; ++p) ; - /* - * Replace the X's with the process ID. Pid should be a pid_t, - * but we use unsigned long for portability. - */ - for (pid = getpid(); *--trv == 'X'; pid /= 10) - switch (pid % 10) { + /* Replace the X's with the process ID. */ + for (__os_id(&id); *--trv == 'X'; id /= 10) + switch (id % 10) { case 0: *trv = '0'; break; case 1: *trv = '1'; break; case 2: *trv = '2'; break; @@ -1017,6 +1252,8 @@ __db_tmp_open(dbenv, tmp_oflags, path, fhp) case 7: *trv = '7'; break; case 8: *trv = '8'; break; case 9: *trv = '9'; break; + default: /* Impossible. */ + break; } ++trv; @@ -1026,7 +1263,8 @@ __db_tmp_open(dbenv, tmp_oflags, path, fhp) /* Loop, trying to open a file. */ for (;;) { if ((ret = __os_open(dbenv, path, - tmp_oflags | DB_OSO_CREATE | DB_OSO_EXCL, mode, fhp)) == 0) + tmp_oflags | DB_OSO_CREATE | DB_OSO_EXCL | DB_OSO_TEMP, + mode, fhpp)) == 0) return (0); /* diff --git a/db/env/env_recover.c b/db/env/env_recover.c index bc5e47605..80de4c1c7 100644 --- a/db/env/env_recover.c +++ b/db/env/env_recover.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2003 * Sleepycat Software. All rights reserved. */ @@ -9,9 +9,9 @@ #ifndef lint static const char copyright[] = - "Copyright (c) 1996-2000\nSleepycat Software Inc. All rights reserved.\n"; + "Copyright (c) 1996-2003\nSleepycat Software Inc. All rights reserved.\n"; static const char revid[] = - "$Id: env_recover.c,v 11.33 2001/01/04 22:38:42 ubell Exp $"; + "$Id: env_recover.c,v 11.112 2003/09/13 18:46:20 bostic Exp $"; #endif #ifndef NO_SYSTEM_INCLUDES @@ -32,37 +32,65 @@ static const char revid[] = #endif #include "db_int.h" -#include "db_page.h" -#include "db_dispatch.h" -#include "db_am.h" -#include "log.h" -#include "txn.h" +#include "dbinc/db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/log.h" +#include "dbinc/txn.h" +#include "dbinc/mp.h" +#include "dbinc/db_am.h" -static float __lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int)); -static int __log_earliest __P((DB_ENV *, int32_t *, DB_LSN *)); +static int __log_backup __P((DB_ENV *, DB_LOGC *, DB_LSN *, DB_LSN *)); +static int __log_earliest __P((DB_ENV *, DB_LOGC *, int32_t *, DB_LSN *)); +static double __lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int)); /* * __db_apprec -- - * Perform recovery. + * Perform recovery. If max_lsn is non-NULL, then we are trying + * to synchronize this system up with another system that has a max + * LSN of max_lsn, so we need to roll back sufficiently far for that + * to work. See __log_backup for details. * - * PUBLIC: int __db_apprec __P((DB_ENV *, u_int32_t)); + * PUBLIC: int __db_apprec __P((DB_ENV *, DB_LSN *, DB_LSN *, u_int32_t, + * PUBLIC: u_int32_t)); */ int -__db_apprec(dbenv, flags) +__db_apprec(dbenv, max_lsn, trunclsn, update, flags) DB_ENV *dbenv; - u_int32_t flags; + DB_LSN *max_lsn, *trunclsn; + u_int32_t update, flags; { DBT data; - DB_LSN ckp_lsn, first_lsn, last_lsn, lowlsn, lsn, open_lsn; + DB_LOGC *logc; + DB_LSN ckp_lsn, first_lsn, last_lsn, lowlsn, lsn, stop_lsn; + DB_REP *db_rep; DB_TXNREGION *region; + REP *rep; __txn_ckp_args *ckp_args; time_t now, tlow; - float nfiles; - int32_t low; - int is_thread, progress, ret; + int32_t log_size, low; + double nfiles; + int have_rec, is_thread, progress, ret, t_ret; + int (**dtab) __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); + u_int32_t hi_txn, txnid; + char *p, *pass, t1[60], t2[60]; void *txninfo; - COMPQUIET(nfiles, (float)0); + COMPQUIET(nfiles, (double)0); + + logc = NULL; + ckp_args = NULL; + dtab = NULL; + hi_txn = TXN_MAXIMUM; + txninfo = NULL; + pass = "initial"; + + /* + * XXX + * Get the log size. No locking required because we're single-threaded + * during recovery. + */ + log_size = + ((LOG *)(((DB_LOG *)dbenv->lg_handle)->reginfo.primary))->log_size; /* * Save the state of the thread flag -- we don't need it on at the @@ -70,60 +98,93 @@ __db_apprec(dbenv, flags) */ is_thread = F_ISSET(dbenv, DB_ENV_THREAD) ? 1 : 0; F_CLR(dbenv, DB_ENV_THREAD); + + /* + * If we need to, update the env handle timestamp. The timestamp + * field can be updated here without acquiring the rep mutex + * because recovery is single-threaded, even in the case of + * replication. + */ + if (update && (db_rep = dbenv->rep_handle) != NULL && + (rep = db_rep->region) != NULL) + (void)time(&rep->timestamp); + + /* Set in-recovery flags. */ F_SET((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER); + region = ((DB_TXNMGR *)dbenv->tx_handle)->reginfo.primary; + F_SET(region, TXN_IN_RECOVERY); + + /* Allocate a cursor for the log. */ + if ((ret = __log_cursor(dbenv, &logc)) != 0) + goto err; /* - * If the user is specifying recover to a particular point in time, - * verify that the logs present are sufficient to do this. + * If the user is specifying recovery to a particular point in time + * or to a particular LSN, find the point to start recovery from. */ ZERO_LSN(lowlsn); - if (dbenv->tx_timestamp != 0) { - if ((ret = __log_earliest(dbenv, &low, &lowlsn)) != 0) - return (ret); + if (max_lsn != NULL) { + if ((ret = __log_backup(dbenv, logc, max_lsn, &lowlsn)) != 0) + goto err; + } else if (dbenv->tx_timestamp != 0) { + if ((ret = __log_earliest(dbenv, logc, &low, &lowlsn)) != 0) + goto err; if ((int32_t)dbenv->tx_timestamp < low) { - char t1[30], t2[30]; - - strcpy(t1, ctime(&dbenv->tx_timestamp)); + (void)snprintf(t1, sizeof(t1), + "%s", ctime(&dbenv->tx_timestamp)); + if ((p = strchr(t1, '\n')) != NULL) + *p = '\0'; tlow = (time_t)low; - strcpy(t2, ctime(&tlow)); + (void)snprintf(t2, sizeof(t2), "%s", ctime(&tlow)); + if ((p = strchr(t2, '\n')) != NULL) + *p = '\0'; __db_err(dbenv, - "Invalid recovery timestamp %.*s; earliest time is %.*s", - 24, t1, 24, t2); - return (EINVAL); + "Invalid recovery timestamp %s; earliest time is %s", + t1, t2); + ret = EINVAL; + goto err; } } - /* Initialize the transaction list. */ - if ((ret = __db_txnlist_init(dbenv, &txninfo)) != 0) - return (ret); - /* * Recovery is done in three passes: * Pass #0: - * We need to find the position from which we will open files - * We need to open files beginning with the last to next - * checkpoint because we might have crashed after writing the - * last checkpoint record, but before having written out all - * the open file information. + * We need to find the position from which we will open files. + * We need to open files beginning with the earlier of the + * most recent checkpoint LSN and a checkpoint LSN before the + * recovery timestamp, if specified. We need to be before the + * most recent checkpoint LSN because we are going to collect + * information about which transactions were begun before we + * start rolling forward. Those that were should never be undone + * because queue cannot use LSNs to determine what operations can + * safely be aborted and it cannot rollback operations in + * transactions for which there may be records not processed + * during recovery. We need to consider earlier points in time + * in case we are recovering to a particular timestamp. * * Pass #1: - * Read forward through the log from the second to last checkpoint - * opening and closing files so that at the end of the log we have - * the "current" set of files open. + * Read forward through the log from the position found in pass 0 + * opening and closing files, and recording transactions for which + * we've seen their first record (the transaction's prev_lsn is + * 0,0). At the end of this pass, we know all transactions for + * which we've seen begins and we have the "current" set of files + * open. * * Pass #2: * Read backward through the log undoing any uncompleted TXNs. - * There are three cases: - * 1. If doing catastrophic recovery, we read to the beginning - * of the log + * There are four cases: + * 1. If doing catastrophic recovery, we read to the + * beginning of the log * 2. If we are doing normal reovery, then we have to roll - * back to the most recent checkpoint that occurs - * before the most recent checkpoint LSN, which is - * returned by __log_findckp(). + * back to the most recent checkpoint LSN. * 3. If we are recovering to a point in time, then we have * to roll back to the checkpoint whose ckp_lsn is earlier * than the specified time. __log_earliest will figure * this out for us. + * 4. If we are recovering back to a particular LSN, then + * we have to roll back to the checkpoint whose ckp_lsn + * is earlier than the max_lsn. __log_backup will figure + * that out for us. * In case 2, "uncompleted TXNs" include all those who commited * after the user's specified timestamp. * @@ -133,6 +194,14 @@ __db_apprec(dbenv, flags) * specified rollback point). During this pass, checkpoint * file information is ignored, and file openings and closings * are redone. + * + * ckp_lsn -- lsn of the last checkpoint or the first in the log. + * first_lsn -- the lsn where the forward passes begin. + * last_lsn -- the last lsn in the log, used for feedback + * lowlsn -- the lsn we are rolling back to, if we are recovering + * to a point in time. + * lsn -- temporary use lsn. + * stop_lsn -- the point at which forward roll should stop */ /* @@ -143,132 +212,193 @@ __db_apprec(dbenv, flags) * same amount of time (a false assumption) and then use the %-age * of the amount of log traversed to figure out how much of the * pass we've accomplished. + * + * If we can't find any log records, we're kind of done. */ +#ifdef UMRW + ZERO_LSN(last_lsn); +#endif memset(&data, 0, sizeof(data)); - if (dbenv->db_feedback != NULL && - (ret = log_get(dbenv, &last_lsn, &data, DB_LAST)) != 0) - goto out; + if ((ret = __log_c_get(logc, &last_lsn, &data, DB_LAST)) != 0) { + if (ret == DB_NOTFOUND) + ret = 0; + else + __db_err(dbenv, "Last log record not found"); + goto err; + } + + do { + /* txnid is after rectype, which is a u_int32. */ + memcpy(&txnid, + (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid)); + + if (txnid != 0) + break; + } while ((ret = __log_c_get(logc, &lsn, &data, DB_PREV)) == 0); /* - * Pass #0 - * Find the second to last checkpoint in the log. This is the point - * from which we want to begin pass #1 (the open files pass). + * There are no transactions, so there is nothing to do unless + * we're recovering to an LSN. If we are, we need to proceed since + * we'll still need to do a vtruncate based on information we haven't + * yet collected. */ - ckp_args = NULL; + if (ret == DB_NOTFOUND) + ret = 0; + else if (ret != 0) + goto err; - if (LF_ISSET(DB_RECOVER_FATAL)) { - if ((ret = log_get(dbenv, &ckp_lsn, &data, DB_FIRST)) != 0) { - if (ret == DB_NOTFOUND) - ret = 0; - else - __db_err(dbenv, "First log record not found"); - goto out; + hi_txn = txnid; + + /* + * Pass #0 + * Find the LSN from which we begin OPENFILES. + * + * If this is a catastrophic recovery, or if no checkpoint exists + * in the log, the LSN is the first LSN in the log. + * + * Otherwise, it is the minimum of (1) the LSN in the last checkpoint + * and (2) the LSN in the checkpoint before any specified recovery + * timestamp or max_lsn. + */ + /* + * Get the first LSN in the log; it's an initial default + * even if this is not a catastrophic recovery. + */ + if ((ret = __log_c_get(logc, &ckp_lsn, &data, DB_FIRST)) != 0) { + if (ret == DB_NOTFOUND) + ret = 0; + else + __db_err(dbenv, "First log record not found"); + goto err; + } + first_lsn = ckp_lsn; + have_rec = 1; + + if (!LF_ISSET(DB_RECOVER_FATAL)) { + if ((ret = __txn_getckp(dbenv, &ckp_lsn)) == 0 && + (ret = __log_c_get(logc, &ckp_lsn, &data, DB_SET)) == 0) { + /* We have a recent checkpoint. This is LSN (1). */ + if ((ret = __txn_ckp_read(dbenv, + data.data, &ckp_args)) != 0) { + __db_err(dbenv, + "Invalid checkpoint record at [%ld][%ld]", + (u_long)ckp_lsn.file, + (u_long)ckp_lsn.offset); + goto err; + } + first_lsn = ckp_args->ckp_lsn; + have_rec = 0; } - open_lsn = ckp_lsn; - } else if ((ret = - log_get(dbenv, &ckp_lsn, &data, DB_CHECKPOINT)) != 0) { + /* - * If we don't find a checkpoint, start from the beginning. - * If that fails, we're done. Note, we do not require that - * there be log records if we're performing recovery. + * If LSN (2) exists, use it if it's before LSN (1). + * (If LSN (1) doesn't exist, first_lsn is the + * beginning of the log, so will "win" this check.) + * + * XXX + * In the recovery-to-a-timestamp case, lowlsn is chosen by + * __log_earliest, and is the checkpoint LSN of the + * *earliest* checkpoint in the unreclaimed log. I + * (krinsky) believe that we could optimize this by looking + * instead for the LSN of the *latest* checkpoint before + * the timestamp of interest, but I'm not sure that this + * is worth doing right now. (We have to look for lowlsn + * and low anyway, to make sure the requested timestamp is + * somewhere in the logs we have, and all that's required + * is that we pick *some* checkpoint after the beginning of + * the logs and before the timestamp. */ -first: if ((ret = log_get(dbenv, &ckp_lsn, &data, DB_FIRST)) != 0) { - if (ret == DB_NOTFOUND) - ret = 0; - else - __db_err(dbenv, "First log record not found"); - goto out; + if ((dbenv->tx_timestamp != 0 || max_lsn != NULL) && + log_compare(&lowlsn, &first_lsn) < 0) { + DB_ASSERT(have_rec == 0); + first_lsn = lowlsn; } - open_lsn = ckp_lsn; - } else if ((ret = __txn_ckp_read(dbenv, data.data, &ckp_args)) != 0) { - __db_err(dbenv, "Invalid checkpoint record at [%ld][%ld]\n", - (u_long)ckp_lsn.file, (u_long)ckp_lsn.offset); - goto out; - } else if (IS_ZERO_LSN(ckp_args->last_ckp) || - (ret = log_get(dbenv, &ckp_args->last_ckp, &data, DB_SET)) != 0) - goto first; - else - open_lsn = ckp_args->last_ckp; + } + + /* Get the record at first_lsn if we don't have it already. */ + if (!have_rec && + (ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) != 0) { + __db_err(dbenv, "Checkpoint LSN record [%ld][%ld] not found", + (u_long)first_lsn.file, (u_long)first_lsn.offset); + goto err; + } if (dbenv->db_feedback != NULL) { - if (last_lsn.file == open_lsn.file) - nfiles = (float)(last_lsn.offset - open_lsn.offset) / - dbenv->lg_max; + if (last_lsn.file == first_lsn.file) + nfiles = (double) + (last_lsn.offset - first_lsn.offset) / log_size; else - nfiles = (float)(last_lsn.file - open_lsn.file) + - (float)(dbenv->lg_max - open_lsn.offset + - last_lsn.offset) / dbenv->lg_max; + nfiles = (double)(last_lsn.file - first_lsn.file) + + (double)(log_size - first_lsn.offset + + last_lsn.offset) / log_size; /* We are going to divide by nfiles; make sure it isn't 0. */ if (nfiles == 0) - nfiles = (float)0.001; + nfiles = (double)0.001; } + /* Find a low txnid. */ + ret = 0; + if (hi_txn != 0) do { + /* txnid is after rectype, which is a u_int32. */ + memcpy(&txnid, + (u_int8_t *)data.data + sizeof(u_int32_t), sizeof(txnid)); + + if (txnid != 0) + break; + } while ((ret = __log_c_get(logc, &lsn, &data, DB_NEXT)) == 0); + + /* + * There are no transactions and we're not recovering to an LSN (see + * above), so there is nothing to do. + */ + if (ret == DB_NOTFOUND) + ret = 0; + + /* Reset to the first lsn. */ + if (ret != 0 || + (ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) != 0) + goto err; + + /* Initialize the transaction list. */ + if ((ret = + __db_txnlist_init(dbenv, txnid, hi_txn, max_lsn, &txninfo)) != 0) + goto err; + /* * Pass #1 - * Now, ckp_lsn is either the lsn of the last checkpoint - * or the lsn of the first record in the log. Open_lsn is - * the second to last checkpoint or the beinning of the log; - * begin the open files pass from that lsn, and proceed to - * the end of the log. + * Run forward through the log starting at the first relevant lsn. */ - lsn = open_lsn; - for (;;) { - if (dbenv->db_feedback != NULL) { - progress = (int)(33 * (__lsn_diff(&open_lsn, - &last_lsn, &lsn, dbenv->lg_max, 1) / nfiles)); - dbenv->db_feedback(dbenv, DB_RECOVER, progress); - } - ret = __db_dispatch(dbenv, - &data, &lsn, DB_TXN_OPENFILES, txninfo); - if (ret != 0 && ret != DB_TXN_CKP) - goto msgerr; - if ((ret = log_get(dbenv, &lsn, &data, DB_NEXT)) != 0) { - if (ret == DB_NOTFOUND) - break; - goto out; - } - } + if ((ret = __env_openfiles(dbenv, logc, + txninfo, &data, &first_lsn, &last_lsn, nfiles, 1)) != 0) + goto err; + + /* If there were no transactions, then we can bail out early. */ + if (hi_txn == 0 && max_lsn == NULL) + goto done; /* * Pass #2. * - * Before we can begin pass #2, backward roll phase, we determine how - * far back in the log to recover. If we are doing catastrophic - * recovery, then we go as far back as we have files. If we are - * doing normal recovery, we go as back to the most recent checkpoint - * that occurs before the most recent checkpoint LSN. If we are - * recovering to a point in time, then rollback to the checkpoint whose - * ckp_lsn precedes the first log record (and then roll forward to - * the appropriate timestamp in Pass #3). + * We used first_lsn to tell us how far back we need to recover, + * use it here. */ - if (LF_ISSET(DB_RECOVER_FATAL)) { - ZERO_LSN(first_lsn); - } else if (dbenv->tx_timestamp != 0) - first_lsn = lowlsn; - else - if ((ret = __log_findckp(dbenv, &first_lsn)) == DB_NOTFOUND) { - /* - * We don't require that log files exist if recovery - * was specified. - */ - ret = 0; - goto out; - } if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) __db_err(dbenv, "Recovery starting from [%lu][%lu]", (u_long)first_lsn.file, (u_long)first_lsn.offset); - for (ret = log_get(dbenv, &lsn, &data, DB_LAST); - ret == 0 && log_compare(&lsn, &first_lsn) > 0; - ret = log_get(dbenv, &lsn, &data, DB_PREV)) { + pass = "backward"; + for (ret = __log_c_get(logc, &lsn, &data, DB_LAST); + ret == 0 && log_compare(&lsn, &first_lsn) >= 0; + ret = __log_c_get(logc, &lsn, &data, DB_PREV)) { if (dbenv->db_feedback != NULL) { - progress = 34 + (int)(33 * (__lsn_diff(&open_lsn, - &last_lsn, &lsn, dbenv->lg_max, 0) / nfiles)); + progress = 34 + (int)(33 * (__lsn_diff(&first_lsn, + &last_lsn, &lsn, log_size, 0) / nfiles)); dbenv->db_feedback(dbenv, DB_RECOVER, progress); } - ret = __db_dispatch(dbenv, - &data, &lsn, DB_TXN_BACKWARD_ROLL, txninfo); + ret = __db_dispatch(dbenv, dbenv->recover_dtab, + dbenv->recover_dtab_size, &data, &lsn, + DB_TXN_BACKWARD_ROLL, txninfo); if (ret != 0) { if (ret != DB_TXN_CKP) goto msgerr; @@ -277,85 +407,184 @@ first: if ((ret = log_get(dbenv, &ckp_lsn, &data, DB_FIRST)) != 0) { } } if (ret != 0 && ret != DB_NOTFOUND) - goto out; + goto err; /* - * Pass #3. + * Pass #3. If we are recovering to a timestamp or to an LSN, + * we need to make sure that we don't roll-forward beyond that + * point because there may be non-transactional operations (e.g., + * closes that would fail). The last_lsn variable is used for + * feedback calculations, but use it to set an initial stopping + * point for the forward pass, and then reset appropriately to + * derive a real stop_lsn that tells how far the forward pass + * should go. */ - for (ret = log_get(dbenv, &lsn, &data, DB_NEXT); - ret == 0; ret = log_get(dbenv, &lsn, &data, DB_NEXT)) { + pass = "forward"; + stop_lsn = last_lsn; + if (max_lsn != NULL || dbenv->tx_timestamp != 0) + stop_lsn = ((DB_TXNHEAD *)txninfo)->maxlsn; + + for (ret = __log_c_get(logc, &lsn, &data, DB_NEXT); + ret == 0; ret = __log_c_get(logc, &lsn, &data, DB_NEXT)) { + /* + * If we are recovering to a timestamp or an LSN, + * we need to make sure that we don't try to roll + * forward beyond the soon-to-be end of log. + */ + if (log_compare(&lsn, &stop_lsn) > 0) + break; + if (dbenv->db_feedback != NULL) { - progress = 67 + (int)(33 * (__lsn_diff(&open_lsn, - &last_lsn, &lsn, dbenv->lg_max, 1) / nfiles)); + progress = 67 + (int)(33 * (__lsn_diff(&first_lsn, + &last_lsn, &lsn, log_size, 1) / nfiles)); dbenv->db_feedback(dbenv, DB_RECOVER, progress); } - ret = __db_dispatch(dbenv, - &data, &lsn, DB_TXN_FORWARD_ROLL, txninfo); + ret = __db_dispatch(dbenv, dbenv->recover_dtab, + dbenv->recover_dtab_size, &data, &lsn, + DB_TXN_FORWARD_ROLL, txninfo); if (ret != 0) { if (ret != DB_TXN_CKP) goto msgerr; else ret = 0; } + } - if (ret != DB_NOTFOUND) - goto out; + if (ret != 0 && ret != DB_NOTFOUND) + goto err; /* - * Process any pages that were on the limbo list - * and move them to the free list. Do this - * before checkpointing the database. + * Process any pages that were on the limbo list and move them to + * the free list. Do this before checkpointing the database. */ - if ((ret = __db_do_the_limbo(dbenv, txninfo)) != 0) - goto out; + if ((ret = __db_do_the_limbo(dbenv, NULL, NULL, txninfo, + dbenv->tx_timestamp != 0 ? LIMBO_TIMESTAMP : LIMBO_RECOVER)) != 0) + goto err; - /* - * Now set the last checkpoint lsn and the current time, - * take a checkpoint, and reset the txnid. - */ - (void)time(&now); - region = ((DB_TXNMGR *)dbenv->tx_handle)->reginfo.primary; - region->last_txnid = ((DB_TXNHEAD *)txninfo)->maxid; - region->last_ckp = ckp_lsn; - region->time_ckp = (u_int32_t)now; + if (max_lsn == NULL) + region->last_txnid = ((DB_TXNHEAD *)txninfo)->maxid; - /* - * Take two checkpoints so that we don't re-recover any of the - * work we've already done. - */ - if ((ret = txn_checkpoint(dbenv, 0, 0, DB_FORCE)) != 0) - goto out; + if (dbenv->tx_timestamp != 0) { + /* We are going to truncate, so we'd best close the cursor. */ + if (logc != NULL && (ret = __log_c_close(logc)) != 0) + goto err; + logc = NULL; + /* Flush everything to disk, we are losing the log. */ + if ((ret = __memp_sync(dbenv, NULL)) != 0) + goto err; + region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn; + __log_vtruncate(dbenv, &((DB_TXNHEAD *)txninfo)->maxlsn, + &((DB_TXNHEAD *)txninfo)->ckplsn, trunclsn); + /* + * Generate logging compensation records. + * If we crash during/after vtruncate we may have + * pages missing from the free list since they + * if we roll things further back from here. + * These pages are only known in memory at this pont. + */ + if ((ret = __db_do_the_limbo(dbenv, + NULL, NULL, txninfo, LIMBO_COMPENSATE)) != 0) + goto err; + } - /* Now close all the db files that are open. */ - __log_close_files(dbenv); + /* Take a checkpoint here to force any dirty data pages to disk. */ + if ((ret = __txn_checkpoint(dbenv, 0, 0, DB_FORCE)) != 0) + goto err; - if ((ret = txn_checkpoint(dbenv, 0, 0, DB_FORCE)) != 0) - goto out; - region->last_txnid = TXN_MINIMUM; + /* Close all the db files that are open. */ + if ((ret = __dbreg_close_files(dbenv)) != 0) + goto err; + +done: + if (max_lsn != NULL) { + region->last_ckp = ((DB_TXNHEAD *)txninfo)->ckplsn; + + /* We are going to truncate, so we'd best close the cursor. */ + if (logc != NULL && (ret = __log_c_close(logc)) != 0) + goto err; + __log_vtruncate(dbenv, + max_lsn, &((DB_TXNHEAD *)txninfo)->ckplsn, trunclsn); + + /* + * Now we need to open files that should be open in order for + * client processing to continue. However, since we've + * truncated the log, we need to recompute from where the + * openfiles pass should begin. + */ + if ((ret = __log_cursor(dbenv, &logc)) != 0) + goto err; + if ((ret = + __log_c_get(logc, &first_lsn, &data, DB_FIRST)) != 0) { + if (ret == DB_NOTFOUND) + ret = 0; + else + __db_err(dbenv, "First log record not found"); + goto err; + } + if ((ret = __txn_getckp(dbenv, &first_lsn)) == 0 && + (ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) == 0) { + /* We have a recent checkpoint. This is LSN (1). */ + if ((ret = __txn_ckp_read(dbenv, + data.data, &ckp_args)) != 0) { + __db_err(dbenv, + "Invalid checkpoint record at [%ld][%ld]", + (u_long)first_lsn.file, + (u_long)first_lsn.offset); + goto err; + } + first_lsn = ckp_args->ckp_lsn; + } + if ((ret = __log_c_get(logc, &first_lsn, &data, DB_SET)) != 0) + goto err; + if ((ret = __env_openfiles(dbenv, logc, + txninfo, &data, &first_lsn, NULL, nfiles, 1)) != 0) + goto err; + } else if (region->stat.st_nrestores == 0) + /* + * If there are no prepared transactions that need resolution, + * we need to reset the transaction ID space and log this fact. + */ + if ((ret = __txn_reset(dbenv)) != 0) + goto err; if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) { + (void)time(&now); __db_err(dbenv, "Recovery complete at %.24s", ctime(&now)); __db_err(dbenv, "%s %lx %s [%lu][%lu]", "Maximum transaction ID", - ((DB_TXNHEAD *)txninfo)->maxid, + (u_long)(txninfo == NULL ? + TXN_MINIMUM : ((DB_TXNHEAD *)txninfo)->maxid), "Recovery checkpoint", (u_long)region->last_ckp.file, (u_long)region->last_ckp.offset); } if (0) { -msgerr: __db_err(dbenv, "Recovery function for LSN %lu %lu failed", - (u_long)lsn.file, (u_long)lsn.offset); +msgerr: __db_err(dbenv, + "Recovery function for LSN %lu %lu failed on %s pass", + (u_long)lsn.file, (u_long)lsn.offset, pass); } -out: if (is_thread) - F_SET(dbenv, DB_ENV_THREAD); - __db_txnlist_end(dbenv, txninfo); +err: if (logc != NULL && (t_ret = __log_c_close(logc)) != 0 && ret == 0) + ret = t_ret; + + if (txninfo != NULL) + __db_txnlist_end(dbenv, txninfo); + + if (dtab != NULL) + __os_free(dbenv, dtab); + if (ckp_args != NULL) - __os_free(ckp_args, sizeof(*ckp_args)); - F_CLR((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER); + __os_free(dbenv, ckp_args); dbenv->tx_timestamp = 0; + + /* Restore the state of the thread flag, clear in-recovery flags. */ + if (is_thread) + F_SET(dbenv, DB_ENV_THREAD); + F_CLR((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER); + F_CLR(region, TXN_IN_RECOVERY); + return (ret); } @@ -365,13 +594,13 @@ out: if (is_thread) * we are moving backward, we are computing high - current. max is * the number of bytes per logfile. */ -static float +static double __lsn_diff(low, high, current, max, is_forward) DB_LSN *low, *high, *current; u_int32_t max; int is_forward; { - float nf; + double nf; /* * There are three cases in each direction. If you are in the @@ -382,27 +611,78 @@ __lsn_diff(low, high, current, max, is_forward) */ if (is_forward) { if (current->file == low->file) - nf = (float)(current->offset - low->offset) / max; + nf = (double)(current->offset - low->offset) / max; else if (current->offset < low->offset) - nf = (float)(current->file - low->file - 1) + - (float)(max - low->offset + current->offset) / max; + nf = (double)(current->file - low->file - 1) + + (double)(max - low->offset + current->offset) / max; else - nf = (float)(current->file - low->file) + - (float)(current->offset - low->offset) / max; + nf = (double)(current->file - low->file) + + (double)(current->offset - low->offset) / max; } else { if (current->file == high->file) - nf = (float)(high->offset - current->offset) / max; + nf = (double)(high->offset - current->offset) / max; else if (current->offset > high->offset) - nf = (float)(high->file - current->file - 1) + - (float)(max - current->offset + high->offset) / max; + nf = (double)(high->file - current->file - 1) + + (double) + (max - current->offset + high->offset) / max; else - nf = (float)(high->file - current->file) + - (float)(high->offset - current->offset) / max; + nf = (double)(high->file - current->file) + + (double)(high->offset - current->offset) / max; } return (nf); } /* + * __log_backup -- + * + * This is used to find the earliest log record to process when a client + * is trying to sync up with a master whose max LSN is less than this + * client's max lsn; we want to roll back everything after that + * + * Find the latest checkpoint whose ckp_lsn is less than the max lsn. + */ +static int +__log_backup(dbenv, logc, max_lsn, start_lsn) + DB_ENV *dbenv; + DB_LOGC *logc; + DB_LSN *max_lsn, *start_lsn; +{ + DB_LSN lsn; + DBT data; + __txn_ckp_args *ckp_args; + int ret; + + memset(&data, 0, sizeof(data)); + ckp_args = NULL; + + /* + * Follow checkpoints through the log until we find one with + * a ckp_lsn less than max_lsn. + */ + if ((ret = __txn_getckp(dbenv, &lsn)) != 0) + goto err; + while ((ret = __log_c_get(logc, &lsn, &data, DB_SET)) == 0) { + if ((ret = __txn_ckp_read(dbenv, data.data, &ckp_args)) != 0) + return (ret); + if (log_compare(&ckp_args->ckp_lsn, max_lsn) <= 0) { + *start_lsn = ckp_args->ckp_lsn; + break; + } + + lsn = ckp_args->last_ckp; + if (IS_ZERO_LSN(lsn)) + break; + __os_free(dbenv, ckp_args); + } + + if (ckp_args != NULL) + __os_free(dbenv, ckp_args); +err: if (IS_ZERO_LSN(*start_lsn) && (ret == 0 || ret == DB_NOTFOUND)) + ret = __log_c_get(logc, start_lsn, &data, DB_FIRST); + return (ret); +} + +/* * __log_earliest -- * * Return the earliest recovery point for the log files present. The @@ -410,8 +690,9 @@ __lsn_diff(low, high, current, max, is_forward) * whose checkpoint LSN is greater than the first LSN we process. */ static int -__log_earliest(dbenv, lowtime, lowlsn) +__log_earliest(dbenv, logc, lowtime, lowlsn) DB_ENV *dbenv; + DB_LOGC *logc; int32_t *lowtime; DB_LSN *lowlsn; { @@ -427,19 +708,17 @@ __log_earliest(dbenv, lowtime, lowlsn) * record whose ckp_lsn is greater than first_lsn. */ - for (ret = log_get(dbenv, &first_lsn, &data, DB_FIRST); - ret == 0; ret = log_get(dbenv, &lsn, &data, DB_NEXT)) { - if (ret != 0) - break; + for (ret = __log_c_get(logc, &first_lsn, &data, DB_FIRST); + ret == 0; ret = __log_c_get(logc, &lsn, &data, DB_NEXT)) { memcpy(&rectype, data.data, sizeof(rectype)); - if (rectype != DB_txn_ckp) + if (rectype != DB___txn_ckp) continue; if ((ret = __txn_ckp_read(dbenv, data.data, &ckpargs)) == 0) { cmp = log_compare(&ckpargs->ckp_lsn, &first_lsn); *lowlsn = ckpargs->ckp_lsn; *lowtime = ckpargs->timestamp; - __os_free(ckpargs, 0); + __os_free(dbenv, ckpargs); if (cmp >= 0) break; } @@ -447,3 +726,70 @@ __log_earliest(dbenv, lowtime, lowlsn) return (ret); } + +/* + * __env_openfiles -- + * Perform the pass of recovery that opens files. This is used + * both during regular recovery and an initial call to txn_recover (since + * we need files open in order to abort prepared, but not yet committed + * transactions). + * + * See the comments in db_apprec for a detailed description of the + * various recovery passes. + * + * If we are not doing feedback processing (i.e., we are doing txn_recover + * processing and in_recovery is zero), then last_lsn can be NULL. + * + * PUBLIC: int __env_openfiles __P((DB_ENV *, DB_LOGC *, + * PUBLIC: void *, DBT *, DB_LSN *, DB_LSN *, double, int)); + */ +int +__env_openfiles(dbenv, logc, txninfo, + data, open_lsn, last_lsn, nfiles, in_recovery) + DB_ENV *dbenv; + DB_LOGC *logc; + void *txninfo; + DBT *data; + DB_LSN *open_lsn, *last_lsn; + int in_recovery; + double nfiles; +{ + DB_LSN lsn; + u_int32_t log_size; + int progress, ret; + + /* + * XXX + * Get the log size. No locking required because we're single-threaded + * during recovery. + */ + log_size = + ((LOG *)(((DB_LOG *)dbenv->lg_handle)->reginfo.primary))->log_size; + + lsn = *open_lsn; + for (;;) { + if (in_recovery && dbenv->db_feedback != NULL) { + DB_ASSERT(last_lsn != NULL); + progress = (int)(33 * (__lsn_diff(open_lsn, + last_lsn, &lsn, log_size, 1) / nfiles)); + dbenv->db_feedback(dbenv, DB_RECOVER, progress); + } + ret = __db_dispatch(dbenv, + dbenv->recover_dtab, dbenv->recover_dtab_size, data, &lsn, + in_recovery ? DB_TXN_OPENFILES : DB_TXN_POPENFILES, + txninfo); + if (ret != 0 && ret != DB_TXN_CKP) { + __db_err(dbenv, + "Recovery function for LSN %lu %lu failed", + (u_long)lsn.file, (u_long)lsn.offset); + break; + } + if ((ret = __log_c_get(logc, &lsn, data, DB_NEXT)) != 0) { + if (ret == DB_NOTFOUND) + ret = 0; + break; + } + } + + return (ret); +} diff --git a/db/env/env_region.c b/db/env/env_region.c index f3df4bac1..2306407e7 100644 --- a/db/env/env_region.c +++ b/db/env/env_region.c @@ -1,39 +1,33 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996, 1997, 1998, 1999, 2000 + * Copyright (c) 1996-2003 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "$Id: env_region.c,v 11.28 2000/12/12 17:36:10 bostic Exp $"; +static const char revid[] = "$Id: env_region.c,v 11.79 2003/10/31 01:56:10 bostic Exp $"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include <sys/types.h> -#include <ctype.h> #include <string.h> -#include <unistd.h> #endif #include "db_int.h" -#include "db_shash.h" -#include "lock.h" -#include "lock_ext.h" -#include "log.h" -#include "log_ext.h" -#include "mp.h" -#include "mp_ext.h" -#include "txn.h" -#include "txn_ext.h" - -static int __db_des_destroy __P((DB_ENV *, REGION *)); +#include "dbinc/db_shash.h" +#include "dbinc/lock.h" +#include "dbinc/log.h" +#include "dbinc/mp.h" +#include "dbinc/txn.h" + +static int __db_des_destroy __P((DB_ENV *, REGION *, int)); static int __db_des_get __P((DB_ENV *, REGINFO *, REGINFO *, REGION **)); static int __db_e_remfile __P((DB_ENV *)); -static int __db_faultmem __P((void *, size_t, int)); +static int __db_faultmem __P((DB_ENV *, void *, size_t, int)); static void __db_region_destroy __P((DB_ENV *, REGINFO *)); /* @@ -110,13 +104,17 @@ loop: renv = NULL; * If this is a public environment, we use the filesystem to ensure * the creation of the environment file is single-threaded. */ - if (F_ISSET(dbenv, DB_ENV_PRIVATE)) + if (F_ISSET(dbenv, DB_ENV_PRIVATE)) { + if ((ret = __os_strdup(dbenv, + "process-private", &infop->name)) != 0) + goto err; goto creation; + } /* Build the region name. */ (void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV); if ((ret = __db_appname(dbenv, - DB_APP_NONE, NULL, buf, 0, NULL, &infop->name)) != 0) + DB_APP_NONE, buf, 0, NULL, &infop->name)) != 0) goto err; /* @@ -128,9 +126,9 @@ loop: renv = NULL; * errno return value -- I sure hope they're right. */ if (F_ISSET(dbenv, DB_ENV_CREATE)) { - if ((ret = __os_open(dbenv, - infop->name, DB_OSO_REGION | DB_OSO_CREATE | DB_OSO_EXCL, - dbenv->db_mode, dbenv->lockfhp)) == 0) + if ((ret = __os_open(dbenv, infop->name, + DB_OSO_CREATE | DB_OSO_DIRECT | DB_OSO_EXCL | DB_OSO_REGION, + dbenv->db_mode, &dbenv->lockfhp)) == 0) goto creation; if (ret != EEXIST) { __db_err(dbenv, @@ -143,10 +141,13 @@ loop: renv = NULL; * If we couldn't create the file, try and open it. (If that fails, * we're done.) */ - if ((ret = __os_open(dbenv, infop->name, - DB_OSO_REGION, dbenv->db_mode, dbenv->lockfhp)) != 0) + if ((ret = __os_open(dbenv, infop->name, DB_OSO_REGION | DB_OSO_DIRECT, + dbenv->db_mode, &dbenv->lockfhp)) != 0) goto err; + /* The region exists, it's not okay to recreate it. */ + F_CLR(infop, REGION_CREATE_OK); + /* * !!! * The region may be in system memory not backed by the filesystem @@ -230,12 +231,13 @@ loop: renv = NULL; * the better. */ #ifdef HAVE_MUTEX_THREADS - __os_closehandle(dbenv->lockfhp); + (void)__os_closehandle(dbenv, dbenv->lockfhp); + dbenv->lockfhp = NULL; #endif /* Call the region join routine to acquire the region. */ memset(&tregion, 0, sizeof(tregion)); - tregion.size = size; + tregion.size = (roff_t)size; tregion.segid = segid; if ((ret = __os_r_attach(dbenv, infop, &tregion)) != 0) goto err; @@ -249,6 +251,19 @@ loop: renv = NULL; */ infop->primary = R_ADDR(infop, 0); infop->addr = (u_int8_t *)infop->addr + sizeof(REGENV); + renv = infop->primary; + + /* Make sure the region matches our build. */ + if (renv->majver != DB_VERSION_MAJOR || + renv->minver != DB_VERSION_MINOR) { + __db_err(dbenv, + "Program version %d.%d doesn't match environment version", + DB_VERSION_MAJOR, DB_VERSION_MINOR); +#ifndef DIAGNOSTIC + ret = EINVAL; + goto err; +#endif + } /* * Check if the environment has had a catastrophic failure. @@ -264,36 +279,21 @@ loop: renv = NULL; * I'd rather play permissions games using the underlying file, but I * can't because Windows/NT filesystems won't open files mode 0. */ - renv = infop->primary; - if (renv->panic) { + if (renv->envpanic && !F_ISSET(dbenv, DB_ENV_NOPANIC)) { ret = __db_panic_msg(dbenv); goto err; } if (renv->magic != DB_REGION_MAGIC) goto retry; - /* Make sure the region matches our build. */ - if (renv->majver != DB_VERSION_MAJOR || - renv->minver != DB_VERSION_MINOR || - renv->patch != DB_VERSION_PATCH) { - __db_err(dbenv, - "Program version %d.%d.%d doesn't match environment version %d.%d.%d", - DB_VERSION_MAJOR, DB_VERSION_MINOR, DB_VERSION_PATCH, - renv->majver, renv->minver, renv->patch); -#ifndef DIAGNOSTIC - ret = EINVAL; - goto err; -#endif - } - /* Lock the environment. */ - MUTEX_LOCK(dbenv, &renv->mutex, dbenv->lockfhp); + MUTEX_LOCK(dbenv, &renv->mutex); /* * Finally! We own the environment now. Repeat the panic check, it's * possible that it was set while we waited for the lock. */ - if (renv->panic) { + if (renv->envpanic && !F_ISSET(dbenv, DB_ENV_NOPANIC)) { ret = __db_panic_msg(dbenv); goto err_unlock; } @@ -338,7 +338,7 @@ err_unlock: MUTEX_UNLOCK(dbenv, &renv->mutex); * Fault the pages into memory. Note, do this AFTER releasing the * lock, because we're only reading the pages, not writing them. */ - (void)__db_faultmem(infop->primary, rp->size, 0); + (void)__db_faultmem(dbenv, infop->primary, rp->size, 0); /* Everything looks good, we're done. */ dbenv->reginfo = infop; @@ -352,9 +352,12 @@ creation: * Allocate room for 50 REGION structures plus overhead (we're going * to use this space for last-ditch allocation requests), although we * should never need anything close to that. + * + * Encryption passwds are stored in the env region. Add that in too. */ memset(&tregion, 0, sizeof(tregion)); - tregion.size = 50 * sizeof(REGION) + 50 * sizeof(MUTEX) + 2048; + tregion.size = (roff_t)(50 * sizeof(REGION) + + dbenv->passwd_len + 2048); tregion.segid = INVALID_REGION_SEGID; if ((ret = __os_r_attach(dbenv, infop, &tregion)) != 0) goto err; @@ -363,7 +366,7 @@ creation: * Fault the pages into memory. Note, do this BEFORE we initialize * anything, because we're writing the pages, not just reading them. */ - (void)__db_faultmem(infop->addr, tregion.size, 1); + (void)__db_faultmem(dbenv, infop->addr, tregion.size, 1); /* * The first object in the region is the REGENV structure. This is @@ -392,10 +395,12 @@ creation: * number which validates the file/environment. */ renv = infop->primary; - renv->panic = 0; + renv->envpanic = 0; db_version(&renv->majver, &renv->minver, &renv->patch); SH_LIST_INIT(&renv->regionq); renv->refcnt = 1; + renv->cipher_off = INVALID_ROFF; + renv->rep_off = INVALID_ROFF; /* * Initialize init_flags to store the flags that any other environment @@ -412,15 +417,15 @@ creation: * filesystem as the database home. But you knew that, I'm sure -- it * probably wasn't even worth mentioning.) */ - if ((ret = - __db_mutex_init(dbenv, &renv->mutex, DB_FCNTL_OFF_GEN, 0)) != 0) { + if ((ret = __db_mutex_setup(dbenv, infop, &renv->mutex, + MUTEX_NO_RECORD | MUTEX_NO_RLOCK)) != 0) { __db_err(dbenv, "%s: unable to initialize environment lock: %s", infop->name, db_strerror(ret)); goto err; } if (!F_ISSET(&renv->mutex, MUTEX_IGNORE) && - (ret = __db_mutex_lock(dbenv, &renv->mutex, dbenv->lockfhp)) != 0) { + (ret = __db_mutex_lock(dbenv, &renv->mutex)) != 0) { __db_err(dbenv, "%s: unable to acquire environment lock: %s", infop->name, db_strerror(ret)); goto err; @@ -459,8 +464,8 @@ find_err: __db_err(dbenv, if (tregion.segid != INVALID_REGION_SEGID) { ref.size = tregion.size; ref.segid = tregion.segid; - if ((ret = __os_write(dbenv, dbenv->lockfhp, - &ref, sizeof(ref), &nrw)) != 0 || nrw != sizeof(ref)) { + if ((ret = __os_write( + dbenv, dbenv->lockfhp, &ref, sizeof(ref), &nrw)) != 0) { __db_err(dbenv, "%s: unable to write out public environment ID: %s", infop->name, db_strerror(ret)); @@ -475,8 +480,10 @@ find_err: __db_err(dbenv, * the better. */ #if defined(HAVE_MUTEX_THREADS) - if (F_ISSET(dbenv->lockfhp, DB_FH_VALID)) - __os_closehandle(dbenv->lockfhp); + if (dbenv->lockfhp != NULL) { + (void)__os_closehandle(dbenv, dbenv->lockfhp); + dbenv->lockfhp = NULL; + } #endif /* Validate the file. */ @@ -491,8 +498,10 @@ find_err: __db_err(dbenv, err: retry: /* Close any open file handle. */ - if (F_ISSET(dbenv->lockfhp, DB_FH_VALID)) - (void)__os_closehandle(dbenv->lockfhp); + if (dbenv->lockfhp != NULL) { + (void)__os_closehandle(dbenv, dbenv->lockfhp); + dbenv->lockfhp = NULL; + } /* * If we joined or created the region, detach from it. If we created @@ -513,8 +522,8 @@ retry: /* Close any open file handle. */ /* Free the allocated name and/or REGINFO structure. */ if (infop->name != NULL) - __os_freestr(infop->name); - __os_free(infop, sizeof(REGINFO)); + __os_free(dbenv, infop->name); + __os_free(dbenv, infop); /* If we had a temporary error, wait awhile and try again. */ if (ret == 0) { @@ -522,7 +531,7 @@ retry: /* Close any open file handle. */ __db_err(dbenv, "unable to join the environment"); ret = EAGAIN; } else { - __os_sleep(dbenv, retry_cnt * 3, 0); + (void)__os_sleep(dbenv, retry_cnt * 3, 0); goto loop; } } @@ -547,14 +556,16 @@ __db_e_detach(dbenv, destroy) infop = dbenv->reginfo; renv = infop->primary; + if (F_ISSET(dbenv, DB_ENV_PRIVATE)) + destroy = 1; /* Lock the environment. */ - MUTEX_LOCK(dbenv, &renv->mutex, dbenv->lockfhp); + MUTEX_LOCK(dbenv, &renv->mutex); /* Decrement the reference count. */ if (renv->refcnt == 0) { __db_err(dbenv, "region %lu (environment): reference count went negative", - infop->rp->id); + (u_long)infop->rp->id); } else --renv->refcnt; @@ -562,34 +573,42 @@ __db_e_detach(dbenv, destroy) MUTEX_UNLOCK(dbenv, &renv->mutex); /* Close the locking file handle. */ - if (F_ISSET(dbenv->lockfhp, DB_FH_VALID)) - (void)__os_closehandle(dbenv->lockfhp); + if (dbenv->lockfhp != NULL) { + (void)__os_closehandle(dbenv, dbenv->lockfhp); + dbenv->lockfhp = NULL; + } /* Reset the addr value that we "corrected" above. */ infop->addr = infop->primary; /* * If we are destroying the environment, we need to - * destroy any system resources backing the mutex. - * Do that now before we free the memory in __os_r_detach. + * destroy any system resources backing the mutex, as well + * as any system resources that the replication system may have + * acquired and put in the main region. + * + * Do these now before we free the memory in __os_r_detach. */ - if (destroy) + if (destroy) { + (void)__rep_region_destroy(dbenv); __db_mutex_destroy(&renv->mutex); + __db_mutex_destroy(&infop->rp->mutex); + } /* * Release the region, and kill our reference. * - * We set the DBENV->reginfo field to NULL here and discard its memory. - * DBENV->remove calls __dbenv_remove to do the region remove, and + * We set the DB_ENV->reginfo field to NULL here and discard its memory. + * DB_ENV->remove calls __dbenv_remove to do the region remove, and * __dbenv_remove attached and then detaches from the region. We don't - * want to return to DBENV->remove with a non-NULL DBENV->reginfo field - * because it will attempt to detach again as part of its cleanup. + * want to return to DB_ENV->remove with a non-NULL DB_ENV->reginfo + * field because it will attempt to detach again as part of its cleanup. */ (void)__os_r_detach(dbenv, infop, destroy); if (infop->name != NULL) - __os_free(infop->name, 0); - __os_free(dbenv->reginfo, sizeof(REGINFO)); + __os_free(dbenv, infop->name); + __os_free(dbenv, dbenv->reginfo); dbenv->reginfo = NULL; return (0); @@ -599,18 +618,20 @@ __db_e_detach(dbenv, destroy) * __db_e_remove -- * Discard an environment if it's not in use. * - * PUBLIC: int __db_e_remove __P((DB_ENV *, int)); + * PUBLIC: int __db_e_remove __P((DB_ENV *, u_int32_t)); */ int -__db_e_remove(dbenv, force) +__db_e_remove(dbenv, flags) DB_ENV *dbenv; - int force; + u_int32_t flags; { REGENV *renv; REGINFO *infop, reginfo; REGION *rp; - int ret; + u_int32_t db_env_reset; + int force, ret; + force = LF_ISSET(DB_FORCE) ? 1 : 0; /* * This routine has to walk a nasty line between not looking into * the environment (which may be corrupted after an app or system @@ -632,8 +653,10 @@ __db_e_remove(dbenv, force) * If the force flag is set, we do not acquire any locks during this * process. */ + db_env_reset = F_ISSET(dbenv, DB_ENV_NOLOCKING | DB_ENV_NOPANIC); if (force) - dbenv->db_mutexlocks = 0; + F_SET(dbenv, DB_ENV_NOLOCKING); + F_SET(dbenv, DB_ENV_NOPANIC); /* Join the environment. */ if ((ret = __db_e_attach(dbenv, NULL)) != 0) { @@ -645,17 +668,21 @@ __db_e_remove(dbenv, force) ret = 0; if (force) goto remfiles; - goto err; + goto done; } infop = dbenv->reginfo; renv = infop->primary; /* Lock the environment. */ - MUTEX_LOCK(dbenv, &renv->mutex, dbenv->lockfhp); + MUTEX_LOCK(dbenv, &renv->mutex); - /* If it's in use, we're done. */ - if (renv->refcnt == 1 || force) { + /* + * If it's in use, we're done unless we're forcing the issue or the + * environment has panic'd. (Presumably, if the environment panic'd, + * the thread holding the reference count may not have cleaned up.) + */ + if (renv->refcnt == 1 || renv->envpanic == 1 || force) { /* * Set the panic flag and overwrite the magic number. * @@ -663,7 +690,7 @@ __db_e_remove(dbenv, force) * From this point on, there's no going back, we pretty * much ignore errors, and just whack on whatever we can. */ - renv->panic = 1; + renv->envpanic = 1; renv->magic = 0; /* @@ -683,37 +710,28 @@ __db_e_remove(dbenv, force) * callers be prepared to create the region in order to join it. */ memset(®info, 0, sizeof(reginfo)); -restart: for (rp = SH_LIST_FIRST(&renv->regionq, __db_region); + for (rp = SH_LIST_FIRST(&renv->regionq, __db_region); rp != NULL; rp = SH_LIST_NEXT(rp, q, __db_region)) { if (rp->type == REGION_TYPE_ENV) continue; + /* + * If we get here and can't attach and/or detach to the + * region, it's a mess. Ignore errors, there's nothing + * we can do about them. + */ reginfo.id = rp->id; reginfo.flags = REGION_CREATE_OK; - if ((ret = __db_r_attach(dbenv, ®info, 0)) != 0) { - __db_err(dbenv, - "region %s attach: %s", db_strerror(ret)); - continue; - } - R_UNLOCK(dbenv, ®info); - if ((ret = __db_r_detach(dbenv, ®info, 1)) != 0) { - __db_err(dbenv, - "region detach: %s", db_strerror(ret)); - continue; + if (__db_r_attach(dbenv, ®info, 0) == 0) { + R_UNLOCK(dbenv, ®info); + (void)__db_r_detach(dbenv, ®info, 1); } - /* - * If we have an error, we continue so we eventually - * reach the end of the list. If we succeed, restart - * the list because it was relinked when we destroyed - * the entry. - */ - goto restart; } /* Destroy the environment's region. */ (void)__db_e_detach(dbenv, 1); - /* Discard the physical files. */ + /* Discard any remaining physical files. */ remfiles: (void)__db_e_remfile(dbenv); } else { /* Unlock the environment. */ @@ -725,7 +743,9 @@ remfiles: (void)__db_e_remfile(dbenv); ret = EBUSY; } -err: +done: F_CLR(dbenv, DB_ENV_NOLOCKING | DB_ENV_NOPANIC); + F_SET(dbenv, db_env_reset); + return (ret); } @@ -737,13 +757,6 @@ static int __db_e_remfile(dbenv) DB_ENV *dbenv; { - static char *old_region_names[] = { - "__db_lock.share", - "__db_log.share", - "__db_mpool.share", - "__db_txn.share", - NULL, - }; int cnt, fcnt, lastrm, ret; u_int8_t saved_byte; const char *dir; @@ -751,8 +764,7 @@ __db_e_remfile(dbenv) /* Get the full path of a file in the environment. */ (void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV); - if ((ret = - __db_appname(dbenv, DB_APP_NONE, NULL, buf, 0, NULL, &path)) != 0) + if ((ret = __db_appname(dbenv, DB_APP_NONE, buf, 0, NULL, &path)) != 0) return (ret); /* Get the parent directory for the environment. */ @@ -769,61 +781,67 @@ __db_e_remfile(dbenv) } /* Get the list of file names. */ - ret = __os_dirlist(dbenv, dir, &names, &fcnt); + if ((ret = __os_dirlist(dbenv, dir, &names, &fcnt)) != 0) + __db_err(dbenv, "%s: %s", dir, db_strerror(ret)); /* Restore the path, and free it. */ *p = saved_byte; - __os_freestr(path); + __os_free(dbenv, path); - if (ret != 0) { - __db_err(dbenv, "%s: %s", dir, db_strerror(ret)); + if (ret != 0) return (ret); - } /* - * Search for valid region names, and remove them. We remove the - * environment region last, because it's the key to this whole mess. + * Remove files from the region directory. */ for (lastrm = -1, cnt = fcnt; --cnt >= 0;) { - if (strlen(names[cnt]) != DB_REGION_NAME_LENGTH || - memcmp(names[cnt], DB_REGION_FMT, DB_REGION_NAME_NUM) != 0) + /* Skip anything outside our name space. */ + if (strncmp(names[cnt], + DB_REGION_PREFIX, sizeof(DB_REGION_PREFIX) - 1)) + continue; + + /* Skip queue extent files. */ + if (strncmp(names[cnt], "__dbq.", 6) == 0) continue; + + /* + * Remove the primary environment region last, because it's + * the key to this whole mess. + */ if (strcmp(names[cnt], DB_REGION_ENV) == 0) { lastrm = cnt; continue; } - for (p = names[cnt] + DB_REGION_NAME_NUM; - *p != '\0' && isdigit((int)*p); ++p) - ; - if (*p != '\0') - continue; + /* Remove the file. */ if (__db_appname(dbenv, - DB_APP_NONE, NULL, names[cnt], 0, NULL, &path) == 0) { + DB_APP_NONE, names[cnt], 0, NULL, &path) == 0) { + /* + * Overwrite region files. Temporary files would have + * been maintained in encrypted format, so there's no + * reason to overwrite them. This is not an exact + * check on the file being a region file, but it's + * not likely to be wrong, and the worst thing that can + * happen is we overwrite a file that didn't need to be + * overwritten. + */ + if (F_ISSET(dbenv, DB_ENV_OVERWRITE) && + strlen(names[cnt]) == DB_REGION_NAME_LENGTH) + (void)__db_overwrite(dbenv, path); (void)__os_unlink(dbenv, path); - __os_freestr(path); + __os_free(dbenv, path); } } if (lastrm != -1) if (__db_appname(dbenv, - DB_APP_NONE, NULL, names[lastrm], 0, NULL, &path) == 0) { + DB_APP_NONE, names[lastrm], 0, NULL, &path) == 0) { + if (F_ISSET(dbenv, DB_ENV_OVERWRITE)) + (void)__db_overwrite(dbenv, path); (void)__os_unlink(dbenv, path); - __os_freestr(path); - } - __os_dirfree(names, fcnt); - - /* - * !!! - * Backward compatibility -- remove region files from releases - * before 2.8.XX. - */ - for (names = (char **)old_region_names; *names != NULL; ++names) - if (__db_appname(dbenv, - DB_APP_NONE, NULL, *names, 0, NULL, &path) == 0) { - (void)__os_unlink(dbenv, path); - __os_freestr(path); + __os_free(dbenv, path); } + __os_dirfree(dbenv, names, fcnt); return (0); } @@ -832,33 +850,47 @@ __db_e_remfile(dbenv) * __db_e_stat * Statistics for the environment. * - * PUBLIC: int __db_e_stat __P((DB_ENV *, REGENV *, REGION *, int *)); + * PUBLIC: int __db_e_stat __P((DB_ENV *, + * PUBLIC: REGENV *, REGION *, int *, u_int32_t)); */ int -__db_e_stat(dbenv, arg_renv, arg_regions, arg_regions_cnt) +__db_e_stat(dbenv, arg_renv, arg_regions, arg_regions_cnt, flags) DB_ENV *dbenv; REGENV *arg_renv; REGION *arg_regions; int *arg_regions_cnt; + u_int32_t flags; { REGENV *renv; REGINFO *infop; REGION *rp; - int n; + int n, ret; infop = dbenv->reginfo; renv = infop->primary; rp = infop->rp; + if ((ret = __db_fchk(dbenv, + "DB_ENV->stat", flags, DB_STAT_CLEAR)) != 0) + return (ret); /* Lock the environment. */ - MUTEX_LOCK(dbenv, &rp->mutex, dbenv->lockfhp); + MUTEX_LOCK(dbenv, &rp->mutex); *arg_renv = *renv; + if (LF_ISSET(DB_STAT_CLEAR)) { + renv->mutex.mutex_set_nowait = 0; + renv->mutex.mutex_set_wait = 0; + } for (n = 0, rp = SH_LIST_FIRST(&renv->regionq, __db_region); n < *arg_regions_cnt && rp != NULL; - ++n, rp = SH_LIST_NEXT(rp, q, __db_region)) + ++n, rp = SH_LIST_NEXT(rp, q, __db_region)) { arg_regions[n] = *rp; + if (LF_ISSET(DB_STAT_CLEAR)) { + rp->mutex.mutex_set_nowait = 0; + rp->mutex.mutex_set_wait = 0; + } + } /* Release the lock. */ rp = infop->rp; @@ -887,12 +919,15 @@ __db_r_attach(dbenv, infop, size) char buf[sizeof(DB_REGION_FMT) + 20]; renv = ((REGINFO *)dbenv->reginfo)->primary; - F_CLR(infop, REGION_CREATE); /* Lock the environment. */ - MUTEX_LOCK(dbenv, &renv->mutex, dbenv->lockfhp); + MUTEX_LOCK(dbenv, &renv->mutex); - /* Find or create a REGION structure for this region. */ + /* + * Find or create a REGION structure for this region. If we create + * it, the REGION_CREATE flag will be set in the infop structure. + */ + F_CLR(infop, REGION_CREATE); if ((ret = __db_des_get(dbenv, dbenv->reginfo, infop, &rp)) != 0) { MUTEX_UNLOCK(dbenv, &renv->mutex); return (ret); @@ -903,12 +938,12 @@ __db_r_attach(dbenv, infop, size) /* If we're creating the region, set the desired size. */ if (F_ISSET(infop, REGION_CREATE)) - rp->size = size; + rp->size = (roff_t)size; /* Join/create the underlying region. */ (void)snprintf(buf, sizeof(buf), DB_REGION_FMT, infop->id); if ((ret = __db_appname(dbenv, - DB_APP_NONE, NULL, buf, 0, NULL, &infop->name)) != 0) + DB_APP_NONE, buf, 0, NULL, &infop->name)) != 0) goto err; if ((ret = __os_r_attach(dbenv, infop, rp)) != 0) goto err; @@ -918,8 +953,8 @@ __db_r_attach(dbenv, infop, size) * anything because we're writing pages in created regions, not just * reading them. */ - (void)__db_faultmem(infop->addr, - rp->size, F_ISSET(infop, REGION_CREATE)); + (void)__db_faultmem(dbenv, + infop->addr, rp->size, F_ISSET(infop, REGION_CREATE)); /* * !!! @@ -929,18 +964,15 @@ __db_r_attach(dbenv, infop, size) * * If we created the region, initialize it for allocation. */ - if (F_ISSET(infop, REGION_CREATE)) { - ((REGION *)(infop->addr))->magic = DB_REGION_MAGIC; - + if (F_ISSET(infop, REGION_CREATE)) (void)__db_shalloc_init(infop->addr, rp->size); - } /* * If the underlying REGION isn't the environment, acquire a lock * for it and release our lock on the environment. */ if (infop->type != REGION_TYPE_ENV) { - MUTEX_LOCK(dbenv, &rp->mutex, dbenv->lockfhp); + MUTEX_LOCK(dbenv, &rp->mutex); MUTEX_UNLOCK(dbenv, &renv->mutex); } @@ -954,8 +986,10 @@ err: if (infop->addr != NULL) infop->id = INVALID_REGION_ID; /* Discard the REGION structure if we created it. */ - if (F_ISSET(infop, REGION_CREATE)) - (void)__db_des_destroy(dbenv, rp); + if (F_ISSET(infop, REGION_CREATE)) { + (void)__db_des_destroy(dbenv, rp, 1); + F_CLR(infop, REGION_CREATE); + } /* Release the environment lock. */ MUTEX_UNLOCK(dbenv, &renv->mutex); @@ -981,12 +1015,14 @@ __db_r_detach(dbenv, infop, destroy) renv = ((REGINFO *)dbenv->reginfo)->primary; rp = infop->rp; + if (F_ISSET(dbenv, DB_ENV_PRIVATE)) + destroy = 1; /* Lock the environment. */ - MUTEX_LOCK(dbenv, &renv->mutex, dbenv->lockfhp); + MUTEX_LOCK(dbenv, &renv->mutex); /* Acquire the lock for the REGION. */ - MUTEX_LOCK(dbenv, &rp->mutex, dbenv->lockfhp); + MUTEX_LOCK(dbenv, &rp->mutex); /* * We need to call destroy on per-subsystem info before @@ -1001,9 +1037,15 @@ __db_r_detach(dbenv, infop, destroy) /* Release the REGION lock. */ MUTEX_UNLOCK(dbenv, &rp->mutex); - /* If we destroyed the region, discard the REGION structure. */ + /* + * If we destroyed the region, discard the REGION structure. The only + * time this routine is called with the destroy flag set is when the + * environment is being removed, and it's likely that the only reason + * the environment is being removed is because we crashed. Don't do + * any unnecessary shared memory manipulation. + */ if (destroy && - ((t_ret = __db_des_destroy(dbenv, rp)) != 0) && ret == 0) + ((t_ret = __db_des_destroy(dbenv, rp, 0)) != 0) && ret == 0) ret = t_ret; /* Release the environment lock. */ @@ -1011,7 +1053,7 @@ __db_r_detach(dbenv, infop, destroy) /* Destroy the structure. */ if (infop->name != NULL) - __os_freestr(infop->name); + __os_free(dbenv, infop->name); return (ret); } @@ -1089,9 +1131,8 @@ __db_des_get(dbenv, env_infop, infop, rpp) /* Initialize the region. */ memset(rp, 0, sizeof(*rp)); - if ((ret = __db_mutex_init(dbenv, &rp->mutex, - R_OFFSET(env_infop, &rp->mutex) + DB_FCNTL_OFF_GEN, - 0)) != 0) { + if ((ret = __db_mutex_setup(dbenv, env_infop, &rp->mutex, + MUTEX_NO_RECORD | MUTEX_NO_RLOCK)) != 0) { __db_shalloc_free(env_infop->addr, rp); return (ret); } @@ -1117,9 +1158,10 @@ __db_des_get(dbenv, env_infop, infop, rpp) * Destroy a reference to a REGION. */ static int -__db_des_destroy(dbenv, rp) +__db_des_destroy(dbenv, rp, shmem_safe) DB_ENV *dbenv; REGION *rp; + int shmem_safe; { REGINFO *infop; @@ -1129,9 +1171,22 @@ __db_des_destroy(dbenv, rp) */ infop = dbenv->reginfo; - SH_LIST_REMOVE(rp, q, __db_region); + /* + * If we're calling during recovery, it may not be safe to access the + * shared memory, as the shared memory may have been corrupted during + * the crash. If the shared memory is safe, remove the REGION entry + * from its linked list, destroy the mutex, and free the allocated + * memory. On systems that require system mutex support, we don't + * have a choice -- safe or not, we have to destroy the mutex or we'll + * leak memory. + */ + if (shmem_safe) + SH_LIST_REMOVE(rp, q, __db_region); + __db_mutex_destroy(&rp->mutex); - __db_shalloc_free(infop->addr, rp); + + if (shmem_safe) + __db_shalloc_free(infop->addr, rp); return (0); } @@ -1141,7 +1196,8 @@ __db_des_destroy(dbenv, rp) * Fault the region into memory. */ static int -__db_faultmem(addr, size, created) +__db_faultmem(dbenv, addr, size, created) + DB_ENV *dbenv; void *addr; size_t size; int created; @@ -1162,7 +1218,7 @@ __db_faultmem(addr, size, created) * that it doesn't figure out that we're never really using it. */ ret = 0; - if (DB_GLOBAL(db_region_init)) { + if (F_ISSET(dbenv, DB_ENV_REGION_INIT)) { if (created) for (p = addr, t = (u_int8_t *)addr + size; p < t; p += OS_VMPAGESIZE) @@ -1190,13 +1246,17 @@ __db_region_destroy(dbenv, infop) case REGION_TYPE_LOCK: __lock_region_destroy(dbenv, infop); break; + case REGION_TYPE_LOG: + __log_region_destroy(dbenv, infop); + break; case REGION_TYPE_MPOOL: __mpool_region_destroy(dbenv, infop); break; + case REGION_TYPE_TXN: + __txn_region_destroy(dbenv, infop); + break; case REGION_TYPE_ENV: - case REGION_TYPE_LOG: case REGION_TYPE_MUTEX: - case REGION_TYPE_TXN: break; default: DB_ASSERT(0); |