diff options
author | Panu Matilainen <pmatilai@redhat.com> | 2007-07-16 16:48:14 +0300 |
---|---|---|
committer | Panu Matilainen <pmatilai@redhat.com> | 2007-07-16 16:48:14 +0300 |
commit | 2cfd3012bfcb5c5c61bbaf662ef084e0ab789d79 (patch) | |
tree | e12ee52087506ac8c7a5eee83b17497d98df2d40 /db/fileops | |
parent | b754fe19fd387ca5fe8e7c00ddaa25c898fa192f (diff) | |
download | rpm-2cfd3012bfcb5c5c61bbaf662ef084e0ab789d79.tar.gz rpm-2cfd3012bfcb5c5c61bbaf662ef084e0ab789d79.tar.bz2 rpm-2cfd3012bfcb5c5c61bbaf662ef084e0ab789d79.zip |
Update internal BDB to version 4.5.20
Diffstat (limited to 'db/fileops')
-rw-r--r-- | db/fileops/fileops.src | 24 | ||||
-rw-r--r-- | db/fileops/fileops_auto.c | 238 | ||||
-rw-r--r-- | db/fileops/fileops_autop.c | 68 | ||||
-rw-r--r-- | db/fileops/fop_basic.c | 90 | ||||
-rw-r--r-- | db/fileops/fop_rec.c | 76 | ||||
-rw-r--r-- | db/fileops/fop_util.c | 1081 |
6 files changed, 1066 insertions, 511 deletions
diff --git a/db/fileops/fileops.src b/db/fileops/fileops.src index a77b5d5c4..ce81e1513 100644 --- a/db/fileops/fileops.src +++ b/db/fileops/fileops.src @@ -1,26 +1,18 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2001-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 2001-2006 + * Oracle Corporation. All rights reserved. * - * $Id: fileops.src,v 1.13 2004/06/17 17:35:20 bostic Exp $ + * $Id: fileops.src,v 12.6 2006/08/24 14:46:03 bostic Exp $ */ PREFIX __fop DBPRIVATE -INCLUDE #ifndef NO_SYSTEM_INCLUDES -INCLUDE #include <sys/types.h> -INCLUDE -INCLUDE #include <ctype.h> -INCLUDE #include <string.h> -INCLUDE #endif -INCLUDE INCLUDE #include "db_int.h" INCLUDE #include "dbinc/crypto.h" INCLUDE #include "dbinc/db_page.h" -INCLUDE #include "dbinc/db_dispatch.h" INCLUDE #include "dbinc/db_am.h" INCLUDE #include "dbinc/log.h" INCLUDE #include "dbinc/txn.h" @@ -34,7 +26,7 @@ INCLUDE * appname: indicates if the name needs to go through __db_appname * mode: file system mode */ -BEGIN create 143 +BEGIN create 42 143 DBT name DBT s ARG appname u_int32_t lu ARG mode u_int32_t o @@ -46,7 +38,7 @@ END * name: name in the file system * appname: indicates if the name needs to go through __db_appname */ -BEGIN remove 144 +BEGIN remove 42 144 DBT name DBT s DBT fid DBT s ARG appname u_int32_t lu @@ -64,7 +56,7 @@ END * flag: non-0 indicates that this is a tempfile, so we needn't undo * these modifications (we'll toss the file). */ -BEGIN write 145 +BEGIN write 42 145 DBT name DBT s ARG appname u_int32_t lu ARG pgsize u_int32_t lu @@ -82,7 +74,7 @@ END * DB fileid of the file being renamed. We need to check it on recovery * so that we don't inadvertently overwrite good files. */ -BEGIN rename 146 +BEGIN rename 42 146 DBT oldname DBT s DBT newname DBT s DBT fileid DBT s @@ -103,7 +95,7 @@ END * child: The transaction that removed or renamed the file. */ */ -BEGIN file_remove 141 +BEGIN file_remove 42 141 DBT real_fid DBT s DBT tmp_fid DBT s DBT name DBT s diff --git a/db/fileops/fileops_auto.c b/db/fileops/fileops_auto.c index 333e37755..0da353b2b 100644 --- a/db/fileops/fileops_auto.c +++ b/db/fileops/fileops_auto.c @@ -2,17 +2,9 @@ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <ctype.h> -#include <string.h> -#endif - #include "db_int.h" #include "dbinc/crypto.h" #include "dbinc/db_page.h" -#include "dbinc/db_dispatch.h" #include "dbinc/db_am.h" #include "dbinc/log.h" #include "dbinc/txn.h" @@ -23,10 +15,10 @@ * PUBLIC: u_int32_t, const DBT *, u_int32_t, u_int32_t)); */ int -__fop_create_log(dbenv, txnid, ret_lsnp, flags, +__fop_create_log(dbenv, txnp, ret_lsnp, flags, name, appname, mode) DB_ENV *dbenv; - DB_TXN *txnid; + DB_TXN *txnp; DB_LSN *ret_lsnp; u_int32_t flags; const DBT *name; @@ -50,29 +42,30 @@ __fop_create_log(dbenv, txnid, ret_lsnp, flags, ret = 0; if (LF_ISSET(DB_LOG_NOT_DURABLE)) { - if (txnid == NULL) + if (txnp == NULL) + return (0); + if (txnp == NULL) return (0); is_durable = 0; } else is_durable = 1; - if (txnid == NULL) { + if (txnp == NULL) { txn_num = 0; lsnp = &null_lsn; null_lsn.file = null_lsn.offset = 0; } else { - if (TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) + if (TAILQ_FIRST(&txnp->kids) != NULL && + (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) return (ret); /* * We need to assign begin_lsn while holding region mutex. * That assignment is done inside the DbEnv->log_put call, * so pass in the appropriate memory location to be filled * in by the log_put code. - */ - DB_SET_BEGIN_LSNP(txnid, &rlsnp); - txn_num = txnid->txnid; - lsnp = &txnid->last_lsn; + */ + DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); + txn_num = txnp->txnid; } logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) @@ -85,7 +78,7 @@ __fop_create_log(dbenv, txnid, ret_lsnp, flags, logrec.size += npad; } - if (is_durable || txnid == NULL) { + if (is_durable || txnp == NULL) { if ((ret = __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) return (ret); @@ -136,12 +129,13 @@ __fop_create_log(dbenv, txnid, ret_lsnp, flags, memcpy(bp, &uinttmp, sizeof(uinttmp)); bp += sizeof(uinttmp); - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + DB_ASSERT(dbenv, + (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - if (is_durable || txnid == NULL) { + if (is_durable || txnp == NULL) { if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnid != NULL) { - txnid->last_lsn = *rlsnp; + flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { + *lsnp = *rlsnp; if (rlsnp != ret_lsnp) *ret_lsnp = *rlsnp; } @@ -160,20 +154,21 @@ __fop_create_log(dbenv, txnid, ret_lsnp, flags, #else ret = 0; #endif - STAILQ_INSERT_HEAD(&txnid->logs, lr, links); + STAILQ_INSERT_HEAD(&txnp->logs, lr, links); + F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); LSN_NOT_LOGGED(*ret_lsnp); } #ifdef LOG_DIAGNOSTIC if (ret != 0) (void)__fop_create_print(dbenv, - (DBT *)&logrec, ret_lsnp, NULL, NULL); + (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); #endif #ifdef DIAGNOSTIC __os_free(dbenv, logrec.data); #else - if (is_durable || txnid == NULL) + if (is_durable || txnp == NULL) __os_free(dbenv, logrec.data); #endif return (ret); @@ -197,13 +192,14 @@ __fop_create_read(dbenv, recbuf, argpp) sizeof(__fop_create_args) + sizeof(DB_TXN), &argp)) != 0) return (ret); bp = recbuf; - argp->txnid = (DB_TXN *)&argp[1]; + argp->txnp = (DB_TXN *)&argp[1]; + memset(argp->txnp, 0, sizeof(DB_TXN)); memcpy(&argp->type, bp, sizeof(argp->type)); bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); + memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); + bp += sizeof(argp->txnp->txnid); memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); @@ -231,10 +227,10 @@ __fop_create_read(dbenv, recbuf, argpp) * PUBLIC: u_int32_t, const DBT *, const DBT *, u_int32_t)); */ int -__fop_remove_log(dbenv, txnid, ret_lsnp, flags, +__fop_remove_log(dbenv, txnp, ret_lsnp, flags, name, fid, appname) DB_ENV *dbenv; - DB_TXN *txnid; + DB_TXN *txnp; DB_LSN *ret_lsnp; u_int32_t flags; const DBT *name; @@ -258,29 +254,30 @@ __fop_remove_log(dbenv, txnid, ret_lsnp, flags, ret = 0; if (LF_ISSET(DB_LOG_NOT_DURABLE)) { - if (txnid == NULL) + if (txnp == NULL) + return (0); + if (txnp == NULL) return (0); is_durable = 0; } else is_durable = 1; - if (txnid == NULL) { + if (txnp == NULL) { txn_num = 0; lsnp = &null_lsn; null_lsn.file = null_lsn.offset = 0; } else { - if (TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) + if (TAILQ_FIRST(&txnp->kids) != NULL && + (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) return (ret); /* * We need to assign begin_lsn while holding region mutex. * That assignment is done inside the DbEnv->log_put call, * so pass in the appropriate memory location to be filled * in by the log_put code. - */ - DB_SET_BEGIN_LSNP(txnid, &rlsnp); - txn_num = txnid->txnid; - lsnp = &txnid->last_lsn; + */ + DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); + txn_num = txnp->txnid; } logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) @@ -293,7 +290,7 @@ __fop_remove_log(dbenv, txnid, ret_lsnp, flags, logrec.size += npad; } - if (is_durable || txnid == NULL) { + if (is_durable || txnp == NULL) { if ((ret = __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) return (ret); @@ -351,12 +348,13 @@ __fop_remove_log(dbenv, txnid, ret_lsnp, flags, memcpy(bp, &uinttmp, sizeof(uinttmp)); bp += sizeof(uinttmp); - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + DB_ASSERT(dbenv, + (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - if (is_durable || txnid == NULL) { + if (is_durable || txnp == NULL) { if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnid != NULL) { - txnid->last_lsn = *rlsnp; + flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { + *lsnp = *rlsnp; if (rlsnp != ret_lsnp) *ret_lsnp = *rlsnp; } @@ -375,20 +373,21 @@ __fop_remove_log(dbenv, txnid, ret_lsnp, flags, #else ret = 0; #endif - STAILQ_INSERT_HEAD(&txnid->logs, lr, links); + STAILQ_INSERT_HEAD(&txnp->logs, lr, links); + F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); LSN_NOT_LOGGED(*ret_lsnp); } #ifdef LOG_DIAGNOSTIC if (ret != 0) (void)__fop_remove_print(dbenv, - (DBT *)&logrec, ret_lsnp, NULL, NULL); + (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); #endif #ifdef DIAGNOSTIC __os_free(dbenv, logrec.data); #else - if (is_durable || txnid == NULL) + if (is_durable || txnp == NULL) __os_free(dbenv, logrec.data); #endif return (ret); @@ -412,13 +411,14 @@ __fop_remove_read(dbenv, recbuf, argpp) sizeof(__fop_remove_args) + sizeof(DB_TXN), &argp)) != 0) return (ret); bp = recbuf; - argp->txnid = (DB_TXN *)&argp[1]; + argp->txnp = (DB_TXN *)&argp[1]; + memset(argp->txnp, 0, sizeof(DB_TXN)); memcpy(&argp->type, bp, sizeof(argp->type)); bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); + memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); + bp += sizeof(argp->txnp->txnid); memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); @@ -449,11 +449,11 @@ __fop_remove_read(dbenv, recbuf, argpp) * PUBLIC: u_int32_t, const DBT *, u_int32_t)); */ int -__fop_write_log(dbenv, txnid, ret_lsnp, flags, +__fop_write_log(dbenv, txnp, ret_lsnp, flags, name, appname, pgsize, pageno, offset, page, flag) DB_ENV *dbenv; - DB_TXN *txnid; + DB_TXN *txnp; DB_LSN *ret_lsnp; u_int32_t flags; const DBT *name; @@ -481,29 +481,30 @@ __fop_write_log(dbenv, txnid, ret_lsnp, flags, ret = 0; if (LF_ISSET(DB_LOG_NOT_DURABLE)) { - if (txnid == NULL) + if (txnp == NULL) + return (0); + if (txnp == NULL) return (0); is_durable = 0; } else is_durable = 1; - if (txnid == NULL) { + if (txnp == NULL) { txn_num = 0; lsnp = &null_lsn; null_lsn.file = null_lsn.offset = 0; } else { - if (TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) + if (TAILQ_FIRST(&txnp->kids) != NULL && + (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) return (ret); /* * We need to assign begin_lsn while holding region mutex. * That assignment is done inside the DbEnv->log_put call, * so pass in the appropriate memory location to be filled * in by the log_put code. - */ - DB_SET_BEGIN_LSNP(txnid, &rlsnp); - txn_num = txnid->txnid; - lsnp = &txnid->last_lsn; + */ + DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); + txn_num = txnp->txnid; } logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) @@ -520,7 +521,7 @@ __fop_write_log(dbenv, txnid, ret_lsnp, flags, logrec.size += npad; } - if (is_durable || txnid == NULL) { + if (is_durable || txnp == NULL) { if ((ret = __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) return (ret); @@ -594,12 +595,13 @@ __fop_write_log(dbenv, txnid, ret_lsnp, flags, memcpy(bp, &uinttmp, sizeof(uinttmp)); bp += sizeof(uinttmp); - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + DB_ASSERT(dbenv, + (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - if (is_durable || txnid == NULL) { + if (is_durable || txnp == NULL) { if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnid != NULL) { - txnid->last_lsn = *rlsnp; + flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { + *lsnp = *rlsnp; if (rlsnp != ret_lsnp) *ret_lsnp = *rlsnp; } @@ -618,20 +620,21 @@ __fop_write_log(dbenv, txnid, ret_lsnp, flags, #else ret = 0; #endif - STAILQ_INSERT_HEAD(&txnid->logs, lr, links); + STAILQ_INSERT_HEAD(&txnp->logs, lr, links); + F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); LSN_NOT_LOGGED(*ret_lsnp); } #ifdef LOG_DIAGNOSTIC if (ret != 0) (void)__fop_write_print(dbenv, - (DBT *)&logrec, ret_lsnp, NULL, NULL); + (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); #endif #ifdef DIAGNOSTIC __os_free(dbenv, logrec.data); #else - if (is_durable || txnid == NULL) + if (is_durable || txnp == NULL) __os_free(dbenv, logrec.data); #endif return (ret); @@ -655,13 +658,14 @@ __fop_write_read(dbenv, recbuf, argpp) sizeof(__fop_write_args) + sizeof(DB_TXN), &argp)) != 0) return (ret); bp = recbuf; - argp->txnid = (DB_TXN *)&argp[1]; + argp->txnp = (DB_TXN *)&argp[1]; + memset(argp->txnp, 0, sizeof(DB_TXN)); memcpy(&argp->type, bp, sizeof(argp->type)); bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); + memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); + bp += sizeof(argp->txnp->txnid); memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); @@ -707,10 +711,10 @@ __fop_write_read(dbenv, recbuf, argpp) * PUBLIC: u_int32_t, const DBT *, const DBT *, const DBT *, u_int32_t)); */ int -__fop_rename_log(dbenv, txnid, ret_lsnp, flags, +__fop_rename_log(dbenv, txnp, ret_lsnp, flags, oldname, newname, fileid, appname) DB_ENV *dbenv; - DB_TXN *txnid; + DB_TXN *txnp; DB_LSN *ret_lsnp; u_int32_t flags; const DBT *oldname; @@ -735,29 +739,30 @@ __fop_rename_log(dbenv, txnid, ret_lsnp, flags, ret = 0; if (LF_ISSET(DB_LOG_NOT_DURABLE)) { - if (txnid == NULL) + if (txnp == NULL) + return (0); + if (txnp == NULL) return (0); is_durable = 0; } else is_durable = 1; - if (txnid == NULL) { + if (txnp == NULL) { txn_num = 0; lsnp = &null_lsn; null_lsn.file = null_lsn.offset = 0; } else { - if (TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) + if (TAILQ_FIRST(&txnp->kids) != NULL && + (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) return (ret); /* * We need to assign begin_lsn while holding region mutex. * That assignment is done inside the DbEnv->log_put call, * so pass in the appropriate memory location to be filled * in by the log_put code. - */ - DB_SET_BEGIN_LSNP(txnid, &rlsnp); - txn_num = txnid->txnid; - lsnp = &txnid->last_lsn; + */ + DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); + txn_num = txnp->txnid; } logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) @@ -771,7 +776,7 @@ __fop_rename_log(dbenv, txnid, ret_lsnp, flags, logrec.size += npad; } - if (is_durable || txnid == NULL) { + if (is_durable || txnp == NULL) { if ((ret = __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) return (ret); @@ -840,12 +845,13 @@ __fop_rename_log(dbenv, txnid, ret_lsnp, flags, memcpy(bp, &uinttmp, sizeof(uinttmp)); bp += sizeof(uinttmp); - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + DB_ASSERT(dbenv, + (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - if (is_durable || txnid == NULL) { + if (is_durable || txnp == NULL) { if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnid != NULL) { - txnid->last_lsn = *rlsnp; + flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { + *lsnp = *rlsnp; if (rlsnp != ret_lsnp) *ret_lsnp = *rlsnp; } @@ -864,20 +870,21 @@ __fop_rename_log(dbenv, txnid, ret_lsnp, flags, #else ret = 0; #endif - STAILQ_INSERT_HEAD(&txnid->logs, lr, links); + STAILQ_INSERT_HEAD(&txnp->logs, lr, links); + F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); LSN_NOT_LOGGED(*ret_lsnp); } #ifdef LOG_DIAGNOSTIC if (ret != 0) (void)__fop_rename_print(dbenv, - (DBT *)&logrec, ret_lsnp, NULL, NULL); + (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); #endif #ifdef DIAGNOSTIC __os_free(dbenv, logrec.data); #else - if (is_durable || txnid == NULL) + if (is_durable || txnp == NULL) __os_free(dbenv, logrec.data); #endif return (ret); @@ -901,13 +908,14 @@ __fop_rename_read(dbenv, recbuf, argpp) sizeof(__fop_rename_args) + sizeof(DB_TXN), &argp)) != 0) return (ret); bp = recbuf; - argp->txnid = (DB_TXN *)&argp[1]; + argp->txnp = (DB_TXN *)&argp[1]; + memset(argp->txnp, 0, sizeof(DB_TXN)); memcpy(&argp->type, bp, sizeof(argp->type)); bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); + memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); + bp += sizeof(argp->txnp->txnid); memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); @@ -944,10 +952,10 @@ __fop_rename_read(dbenv, recbuf, argpp) * PUBLIC: u_int32_t, u_int32_t)); */ int -__fop_file_remove_log(dbenv, txnid, ret_lsnp, flags, +__fop_file_remove_log(dbenv, txnp, ret_lsnp, flags, real_fid, tmp_fid, name, appname, child) DB_ENV *dbenv; - DB_TXN *txnid; + DB_TXN *txnp; DB_LSN *ret_lsnp; u_int32_t flags; const DBT *real_fid; @@ -973,29 +981,30 @@ __fop_file_remove_log(dbenv, txnid, ret_lsnp, flags, ret = 0; if (LF_ISSET(DB_LOG_NOT_DURABLE)) { - if (txnid == NULL) + if (txnp == NULL) + return (0); + if (txnp == NULL) return (0); is_durable = 0; } else is_durable = 1; - if (txnid == NULL) { + if (txnp == NULL) { txn_num = 0; lsnp = &null_lsn; null_lsn.file = null_lsn.offset = 0; } else { - if (TAILQ_FIRST(&txnid->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnid)) != 0) + if (TAILQ_FIRST(&txnp->kids) != NULL && + (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) return (ret); /* * We need to assign begin_lsn while holding region mutex. * That assignment is done inside the DbEnv->log_put call, * so pass in the appropriate memory location to be filled * in by the log_put code. - */ - DB_SET_BEGIN_LSNP(txnid, &rlsnp); - txn_num = txnid->txnid; - lsnp = &txnid->last_lsn; + */ + DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); + txn_num = txnp->txnid; } logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) @@ -1010,7 +1019,7 @@ __fop_file_remove_log(dbenv, txnid, ret_lsnp, flags, logrec.size += npad; } - if (is_durable || txnid == NULL) { + if (is_durable || txnp == NULL) { if ((ret = __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) return (ret); @@ -1083,12 +1092,13 @@ __fop_file_remove_log(dbenv, txnid, ret_lsnp, flags, memcpy(bp, &uinttmp, sizeof(uinttmp)); bp += sizeof(uinttmp); - DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); + DB_ASSERT(dbenv, + (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - if (is_durable || txnid == NULL) { + if (is_durable || txnp == NULL) { if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnid != NULL) { - txnid->last_lsn = *rlsnp; + flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { + *lsnp = *rlsnp; if (rlsnp != ret_lsnp) *ret_lsnp = *rlsnp; } @@ -1107,20 +1117,21 @@ __fop_file_remove_log(dbenv, txnid, ret_lsnp, flags, #else ret = 0; #endif - STAILQ_INSERT_HEAD(&txnid->logs, lr, links); + STAILQ_INSERT_HEAD(&txnp->logs, lr, links); + F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); LSN_NOT_LOGGED(*ret_lsnp); } #ifdef LOG_DIAGNOSTIC if (ret != 0) (void)__fop_file_remove_print(dbenv, - (DBT *)&logrec, ret_lsnp, NULL, NULL); + (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); #endif #ifdef DIAGNOSTIC __os_free(dbenv, logrec.data); #else - if (is_durable || txnid == NULL) + if (is_durable || txnp == NULL) __os_free(dbenv, logrec.data); #endif return (ret); @@ -1145,13 +1156,14 @@ __fop_file_remove_read(dbenv, recbuf, argpp) sizeof(__fop_file_remove_args) + sizeof(DB_TXN), &argp)) != 0) return (ret); bp = recbuf; - argp->txnid = (DB_TXN *)&argp[1]; + argp->txnp = (DB_TXN *)&argp[1]; + memset(argp->txnp, 0, sizeof(DB_TXN)); memcpy(&argp->type, bp, sizeof(argp->type)); bp += sizeof(argp->type); - memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid)); - bp += sizeof(argp->txnid->txnid); + memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); + bp += sizeof(argp->txnp->txnid); memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); bp += sizeof(DB_LSN); diff --git a/db/fileops/fileops_autop.c b/db/fileops/fileops_autop.c index 970b0c63b..e19167691 100644 --- a/db/fileops/fileops_autop.c +++ b/db/fileops/fileops_autop.c @@ -2,17 +2,9 @@ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <ctype.h> -#include <string.h> -#endif - #include "db_int.h" #include "dbinc/crypto.h" #include "dbinc/db_page.h" -#include "dbinc/db_dispatch.h" #include "dbinc/db_am.h" #include "dbinc/log.h" #include "dbinc/txn.h" @@ -35,20 +27,18 @@ __fop_create_print(dbenv, dbtp, lsnp, notused2, notused3) int ch; int ret; - notused2 = DB_TXN_ABORT; + notused2 = DB_TXN_PRINT; notused3 = NULL; if ((ret = __fop_create_read(dbenv, dbtp->data, &argp)) != 0) return (ret); (void)printf( - "[%lu][%lu]__fop_create%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, + "[%lu][%lu]__fop_create%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, (u_long)lsnp->offset, (argp->type & DB_debug_FLAG) ? "_debug" : "", (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); + (u_long)argp->txnp->txnid, + (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); (void)printf("\tname: "); for (i = 0; i < argp->name.size; i++) { ch = ((u_int8_t *)argp->name.data)[i]; @@ -79,20 +69,18 @@ __fop_remove_print(dbenv, dbtp, lsnp, notused2, notused3) int ch; int ret; - notused2 = DB_TXN_ABORT; + notused2 = DB_TXN_PRINT; notused3 = NULL; if ((ret = __fop_remove_read(dbenv, dbtp->data, &argp)) != 0) return (ret); (void)printf( - "[%lu][%lu]__fop_remove%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, + "[%lu][%lu]__fop_remove%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, (u_long)lsnp->offset, (argp->type & DB_debug_FLAG) ? "_debug" : "", (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); + (u_long)argp->txnp->txnid, + (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); (void)printf("\tname: "); for (i = 0; i < argp->name.size; i++) { ch = ((u_int8_t *)argp->name.data)[i]; @@ -128,20 +116,18 @@ __fop_write_print(dbenv, dbtp, lsnp, notused2, notused3) int ch; int ret; - notused2 = DB_TXN_ABORT; + notused2 = DB_TXN_PRINT; notused3 = NULL; if ((ret = __fop_write_read(dbenv, dbtp->data, &argp)) != 0) return (ret); (void)printf( - "[%lu][%lu]__fop_write%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, + "[%lu][%lu]__fop_write%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, (u_long)lsnp->offset, (argp->type & DB_debug_FLAG) ? "_debug" : "", (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); + (u_long)argp->txnp->txnid, + (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); (void)printf("\tname: "); for (i = 0; i < argp->name.size; i++) { ch = ((u_int8_t *)argp->name.data)[i]; @@ -181,20 +167,18 @@ __fop_rename_print(dbenv, dbtp, lsnp, notused2, notused3) int ch; int ret; - notused2 = DB_TXN_ABORT; + notused2 = DB_TXN_PRINT; notused3 = NULL; if ((ret = __fop_rename_read(dbenv, dbtp->data, &argp)) != 0) return (ret); (void)printf( - "[%lu][%lu]__fop_rename%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, + "[%lu][%lu]__fop_rename%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, (u_long)lsnp->offset, (argp->type & DB_debug_FLAG) ? "_debug" : "", (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); + (u_long)argp->txnp->txnid, + (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); (void)printf("\toldname: "); for (i = 0; i < argp->oldname.size; i++) { ch = ((u_int8_t *)argp->oldname.data)[i]; @@ -236,20 +220,18 @@ __fop_file_remove_print(dbenv, dbtp, lsnp, notused2, notused3) int ch; int ret; - notused2 = DB_TXN_ABORT; + notused2 = DB_TXN_PRINT; notused3 = NULL; if ((ret = __fop_file_remove_read(dbenv, dbtp->data, &argp)) != 0) return (ret); (void)printf( - "[%lu][%lu]__fop_file_remove%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, - (u_long)lsnp->offset, + "[%lu][%lu]__fop_file_remove%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", + (u_long)lsnp->file, (u_long)lsnp->offset, (argp->type & DB_debug_FLAG) ? "_debug" : "", (u_long)argp->type, - (u_long)argp->txnid->txnid, - (u_long)argp->prev_lsn.file, - (u_long)argp->prev_lsn.offset); + (u_long)argp->txnp->txnid, + (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); (void)printf("\treal_fid: "); for (i = 0; i < argp->real_fid.size; i++) { ch = ((u_int8_t *)argp->real_fid.data)[i]; diff --git a/db/fileops/fop_basic.c b/db/fileops/fop_basic.c index 36a958e95..9563ddbc1 100644 --- a/db/fileops/fop_basic.c +++ b/db/fileops/fop_basic.c @@ -1,22 +1,16 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2001-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 2001-2006 + * Oracle Corporation. All rights reserved. * - * $Id: fop_basic.c,v 1.32 2004/11/15 20:04:50 bostic Exp $ + * $Id: fop_basic.c,v 12.19 2006/09/19 15:06:59 bostic Exp $ */ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <string.h> -#include <sys/types.h> -#endif - #include "db_int.h" #include "dbinc/db_page.h" -#include "dbinc/db_shash.h" #include "dbinc/fop.h" #include "dbinc/log.h" #include "dbinc/mp.h" @@ -24,9 +18,33 @@ #include "dbinc/db_am.h" /* - * This file implements the basic file-level operations. This code - * ought to be fairly independent of DB, other than through its - * error-reporting mechanism. + * The transactional guarantees Berkeley DB provides for file + * system level operations (database physical file create, delete, + * rename) are based on our understanding of current file system + * semantics; a system that does not provide these semantics and + * guarantees could be in danger. + * + * First, as in standard database changes, fsync and fdatasync must + * work: when applied to the log file, the records written into the + * log must be transferred to stable storage. + * + * Second, it must not be possible for the log file to be removed + * without previous file system level operations being flushed to + * stable storage. Berkeley DB applications write log records + * describing file system operations into the log, then perform the + * file system operation, then commit the enclosing transaction + * (which flushes the log file to stable storage). Subsequently, + * a database environment checkpoint may make it possible for the + * application to remove the log file containing the record of the + * file system operation. DB's transactional guarantees for file + * system operations require the log file removal not succeed until + * all previous filesystem operations have been flushed to stable + * storage. In other words, the flush of the log file, or the + * removal of the log file, must block until all previous + * filesystem operations have been flushed to stable storage. This + * semantic is not, as far as we know, required by any existing + * standards document, but we have never seen a filesystem where + * it does not apply. */ /* @@ -55,20 +73,20 @@ __fop_create(dbenv, txn, fhpp, name, appname, mode, flags) char *real_name; real_name = NULL; + fhp = NULL; if ((ret = __db_appname(dbenv, appname, name, 0, NULL, &real_name)) != 0) return (ret); if (mode == 0) - mode = __db_omode("rw----"); + mode = __db_omode(OWNER_RW); if (DBENV_LOGGING(dbenv)) { - memset(&data, 0, sizeof(data)); - data.data = (void *)name; - data.size = (u_int32_t)strlen(name) + 1; + DB_INIT_DBT(data, name, strlen(name) + 1); if ((ret = __fop_create_log(dbenv, txn, &lsn, - flags | DB_FLUSH, &data, (u_int32_t)appname, mode)) != 0) + flags | DB_FLUSH, + &data, (u_int32_t)appname, (u_int32_t)mode)) != 0) goto err; } @@ -115,23 +133,21 @@ __fop_remove(dbenv, txn, fileid, name, appname, flags) __db_appname(dbenv, appname, name, 0, NULL, &real_name)) != 0) goto err; - if (txn == NULL) { + if (!IS_REAL_TXN(txn)) { if (fileid != NULL && (ret = __memp_nameop( - dbenv, fileid, NULL, real_name, NULL)) != 0) + dbenv, fileid, NULL, real_name, NULL, 0)) != 0) goto err; } else { if (DBENV_LOGGING(dbenv)) { memset(&fdbt, 0, sizeof(ndbt)); fdbt.data = fileid; fdbt.size = fileid == NULL ? 0 : DB_FILE_ID_LEN; - memset(&ndbt, 0, sizeof(ndbt)); - ndbt.data = (void *)name; - ndbt.size = (u_int32_t)strlen(name) + 1; - if ((ret = __fop_remove_log(dbenv, - txn, &lsn, flags, &ndbt, &fdbt, appname)) != 0) + DB_INIT_DBT(ndbt, name, strlen(name) + 1); + if ((ret = __fop_remove_log(dbenv, txn, &lsn, + flags, &ndbt, &fdbt, (u_int32_t)appname)) != 0) goto err; } - ret = __txn_remevent(dbenv, txn, real_name, fileid); + ret = __txn_remevent(dbenv, txn, real_name, fileid, 0); } err: if (real_name != NULL) @@ -176,7 +192,7 @@ __fop_write(dbenv, int local_open, ret, t_ret; char *real_name; - DB_ASSERT(istmp != 0); + DB_ASSERT(dbenv, istmp != 0); ret = local_open = 0; real_name = NULL; @@ -189,11 +205,10 @@ __fop_write(dbenv, memset(&data, 0, sizeof(data)); data.data = buf; data.size = size; - memset(&namedbt, 0, sizeof(namedbt)); - namedbt.data = (void *)name; - namedbt.size = (u_int32_t)strlen(name) + 1; - if ((ret = __fop_write_log(dbenv, txn, &lsn, flags, - &namedbt, appname, pgsize, pageno, off, &data, istmp)) != 0) + DB_INIT_DBT(namedbt, name, strlen(name) + 1); + if ((ret = __fop_write_log(dbenv, txn, + &lsn, flags, &namedbt, (u_int32_t)appname, + pgsize, pageno, off, &data, istmp)) != 0) goto err; } @@ -205,8 +220,7 @@ __fop_write(dbenv, } /* Seek to offset. */ - if ((ret = __os_seek(dbenv, - fhp, pgsize, pageno, off, 0, DB_OS_SEEK_SET)) != 0) + if ((ret = __os_seek(dbenv, fhp, pageno, pgsize, off)) != 0) goto err; /* Now do the write. */ @@ -251,13 +265,9 @@ __fop_rename(dbenv, txn, oldname, newname, fid, appname, flags) goto err; if (DBENV_LOGGING(dbenv)) { - memset(&old, 0, sizeof(old)); - memset(&new, 0, sizeof(new)); + DB_INIT_DBT(old, oldname, strlen(oldname) + 1); + DB_INIT_DBT(new, newname, strlen(newname) + 1); memset(&fiddbt, 0, sizeof(fiddbt)); - old.data = (void *)oldname; - old.size = (u_int32_t)strlen(oldname) + 1; - new.data = (void *)newname; - new.size = (u_int32_t)strlen(newname) + 1; fiddbt.data = fid; fiddbt.size = DB_FILE_ID_LEN; if ((ret = __fop_rename_log(dbenv, txn, &lsn, flags | DB_FLUSH, @@ -265,7 +275,7 @@ __fop_rename(dbenv, txn, oldname, newname, fid, appname, flags) goto err; } - ret = __memp_nameop(dbenv, fid, newname, o, n); + ret = __memp_nameop(dbenv, fid, newname, o, n, 0); err: if (o != NULL) __os_free(dbenv, o); diff --git a/db/fileops/fop_rec.c b/db/fileops/fop_rec.c index a9326d532..eced8fd39 100644 --- a/db/fileops/fop_rec.c +++ b/db/fileops/fop_rec.c @@ -1,29 +1,52 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2001-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 2001-2006 + * Oracle Corporation. All rights reserved. * - * $Id: fop_rec.c,v 1.31 2004/09/22 03:45:25 bostic Exp $ + * $Id: fop_rec.c,v 12.12 2006/08/24 14:46:03 bostic Exp $ */ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <string.h> -#endif - #include "db_int.h" #include "dbinc/db_page.h" -#include "dbinc/db_shash.h" #include "dbinc/fop.h" #include "dbinc/db_am.h" #include "dbinc/mp.h" #include "dbinc/txn.h" /* + * The transactional guarantees Berkeley DB provides for file + * system level operations (database physical file create, delete, + * rename) are based on our understanding of current file system + * semantics; a system that does not provide these semantics and + * guarantees could be in danger. + * + * First, as in standard database changes, fsync and fdatasync must + * work: when applied to the log file, the records written into the + * log must be transferred to stable storage. + * + * Second, it must not be possible for the log file to be removed + * without previous file system level operations being flushed to + * stable storage. Berkeley DB applications write log records + * describing file system operations into the log, then perform the + * file system operation, then commit the enclosing transaction + * (which flushes the log file to stable storage). Subsequently, + * a database environment checkpoint may make it possible for the + * application to remove the log file containing the record of the + * file system operation. DB's transactional guarantees for file + * system operations require the log file removal not succeed until + * all previous filesystem operations have been flushed to stable + * storage. In other words, the flush of the log file, or the + * removal of the log file, must block until all previous + * filesystem operations have been flushed to stable storage. This + * semantic is not, as far as we know, required by any existing + * standards document, but we have never seen a filesystem where + * it does not apply. + */ + +/* * __fop_create_recover -- * Recovery function for create. * @@ -56,7 +79,7 @@ __fop_create_recover(dbenv, dbtp, lsnp, op, info) (void)__os_unlink(dbenv, real_name); else if (DB_REDO(op)) { if ((ret = __os_open(dbenv, real_name, - DB_OSO_CREATE | DB_OSO_EXCL, argp->mode, &fhp)) == 0) + DB_OSO_CREATE | DB_OSO_EXCL, (int)argp->mode, &fhp)) == 0) (void)__os_closehandle(dbenv, fhp); else goto out; @@ -101,7 +124,7 @@ __fop_remove_recover(dbenv, dbtp, lsnp, op, info) /* Its ok if the file is not there. */ if (DB_REDO(op)) (void)__memp_nameop(dbenv, - (u_int8_t *)argp->fid.data, NULL, real_name, NULL); + (u_int8_t *)argp->fid.data, NULL, real_name, NULL, 0); *lsnp = argp->prev_lsn; out: if (real_name != NULL) @@ -133,10 +156,10 @@ __fop_write_recover(dbenv, dbtp, lsnp, op, info) ret = 0; if (DB_UNDO(op)) - DB_ASSERT(argp->flag != 0); + DB_ASSERT(dbenv, argp->flag != 0); else if (DB_REDO(op)) ret = __fop_write(dbenv, - argp->txnid, argp->name.data, argp->appname, + argp->txnp, argp->name.data, (APPNAME)argp->appname, NULL, argp->pgsize, argp->pageno, argp->offset, argp->page.data, argp->page.size, argp->flag, 0); @@ -209,14 +232,33 @@ __fop_rename_recover(dbenv, dbtp, lsnp, op, info) goto done; (void)__os_closehandle(dbenv, fhp); fhp = NULL; + if (DB_REDO(op)) { + /* + * Check to see if the target file exists. If it + * does and it does not have the proper id then + * it is a later version. We just remove the source + * file since the state of the world is beyond this + * point. + */ + if (__os_open(dbenv, real_new, 0, 0, &fhp) == 0 && + __fop_read_meta(dbenv, src, mbuf, + DBMETASIZE, fhp, 1, NULL) == 0 && + __db_chk_meta(dbenv, NULL, meta, 1) == 0 && + memcmp(argp->fileid.data, + meta->uid, DB_FILE_ID_LEN) != 0) { + (void)__memp_nameop(dbenv, + fileid, NULL, real_old, NULL, 0); + goto done; + } + } } if (DB_UNDO(op)) (void)__memp_nameop(dbenv, fileid, - (const char *)argp->oldname.data, real_new, real_old); + (const char *)argp->oldname.data, real_new, real_old, 0); if (DB_REDO(op)) (void)__memp_nameop(dbenv, fileid, - (const char *)argp->newname.data, real_old, real_new); + (const char *)argp->newname.data, real_old, real_new, 0); done: *lsnp = argp->prev_lsn; out: if (real_new != NULL) @@ -327,7 +369,7 @@ __fop_file_remove_recover(dbenv, dbtp, lsnp, op, info) if (cstat == TXN_COMMIT) (void)__memp_nameop(dbenv, is_real ? argp->real_fid.data : argp->tmp_fid.data, - NULL, real_name, NULL); + NULL, real_name, NULL, 0); } done: *lsnp = argp->prev_lsn; diff --git a/db/fileops/fop_util.c b/db/fileops/fop_util.c index 564dc4a36..9da9d4a43 100644 --- a/db/fileops/fop_util.c +++ b/db/fileops/fop_util.c @@ -1,25 +1,18 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2001-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 2001-2006 + * Oracle Corporation. All rights reserved. * - * $Id: fop_util.c,v 1.104 2004/09/24 00:43:18 bostic Exp $ + * $Id: fop_util.c,v 12.36 2006/09/19 15:06:59 bostic Exp $ */ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <stdlib.h> -#include <string.h> -#endif - #include "db_int.h" #include "dbinc/db_page.h" -#include "dbinc/db_shash.h" #include "dbinc/db_am.h" +#include "dbinc/hash.h" #include "dbinc/fop.h" #include "dbinc/lock.h" #include "dbinc/mp.h" @@ -27,6 +20,15 @@ #include "dbinc/txn.h" static int __fop_set_pgsize __P((DB *, DB_FH *, const char *)); +static int __fop_inmem_create __P((DB *, const char *, DB_TXN *, u_int32_t)); +static int __fop_inmem_dummy __P((DB *, DB_TXN *, const char *, u_int8_t *)); +static int __fop_inmem_read_meta __P((DB *, DB_TXN *, const char *, u_int32_t)); +static int __fop_inmem_swap __P((DB *, DB *, DB_TXN *, + const char *, const char *, const char *, u_int32_t)); +static int __fop_ondisk_dummy __P((DB *, + DB_TXN *, const char *, u_int8_t *, u_int32_t)); +static int __fop_ondisk_swap __P((DB *, DB *, DB_TXN *, + const char *, const char *, const char *, u_int32_t, u_int32_t)); /* * Acquire the environment meta-data lock. The parameters are the @@ -56,6 +58,14 @@ static int __fop_set_pgsize __P((DB *, DB_FH *, const char *)); } while (0) #endif +#define RESET_MPF(D, F) do { \ + (void)__memp_fclose((D)->mpf, (F)); \ + (D)->mpf = NULL; \ + F_CLR((D), DB_AM_OPEN_CALLED); \ + if ((ret = __memp_fcreate((D)->dbenv, &(D)->mpf)) != 0) \ + goto err; \ +} while (0) + /* * If we open a file handle and our caller is doing fcntl(2) locking, * we can't close the handle because that would discard the caller's @@ -109,7 +119,7 @@ __fop_lock_handle(dbenv, dbp, locker, mode, elockp, flags) * doing is on the global environment. */ if (IS_RECOVERING(dbenv)) - return (elockp == NULL ? 0 : __ENV_LPUT(dbenv, *elockp, 0)); + return (elockp == NULL ? 0 : __ENV_LPUT(dbenv, *elockp)); memcpy(lock_desc.fileid, dbp->fileid, DB_FILE_ID_LEN); lock_desc.pgno = dbp->meta_pgno; @@ -199,24 +209,28 @@ __fop_file_setup(dbp, txn, name, mode, flags, retidp) DB_FH *fhp; DB_LOCK elock; DB_TXN *stxn; + DBTYPE save_type; size_t len; u_int32_t dflags, locker, oflags; u_int8_t mbuf[DBMETASIZE]; - int created_locker, ret, retries, t_ret, tmp_created, truncating; + int created_locker, create_ok, ret, retries, t_ret, tmp_created; + int truncating, was_inval; char *real_name, *real_tmpname, *tmpname; - DB_ASSERT(name != NULL); - *retidp = TXN_INVALID; dbenv = dbp->dbenv; fhp = NULL; LOCK_INIT(elock); stxn = NULL; - created_locker = tmp_created = truncating = 0; + created_locker = tmp_created = truncating = was_inval = 0; real_name = real_tmpname = tmpname = NULL; dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0; + ret = 0; + retries = 0; + save_type = dbp->type; + /* * Get a lockerid for this handle. There are paths through queue * rename and remove where this dbp already has a locker, so make @@ -226,7 +240,7 @@ __fop_file_setup(dbp, txn, name, mode, flags, retidp) !F_ISSET(dbp, DB_AM_COMPENSATE) && !F_ISSET(dbp, DB_AM_RECOVER) && dbp->lid == DB_LOCK_INVALIDID) { - if ((ret = __lock_id(dbenv, &dbp->lid)) != 0) + if ((ret = __lock_id(dbenv, &dbp->lid, NULL)) != 0) goto err; created_locker = 1; } @@ -234,21 +248,29 @@ __fop_file_setup(dbp, txn, name, mode, flags, retidp) locker = txn == NULL ? dbp->lid : txn->txnid; - /* Get the real backing file name. */ - if ((ret = __db_appname(dbenv, - DB_APP_DATA, name, 0, NULL, &real_name)) != 0) - goto err; + oflags = 0; + if (F_ISSET(dbp, DB_AM_INMEM)) + real_name = (char *)name; + else { + /* Get the real backing file name. */ + if ((ret = __db_appname(dbenv, + DB_APP_DATA, name, 0, NULL, &real_name)) != 0) + goto err; - /* Fill in the default file mode. */ - if (mode == 0) - mode = __db_omode("rwrw--"); + /* Fill in the default file mode. */ + if (mode == 0) + mode = __db_omode("rw-rw----"); + + if (LF_ISSET(DB_RDONLY)) + oflags |= DB_OSO_RDONLY; + if (LF_ISSET(DB_TRUNCATE)) + oflags |= DB_OSO_TRUNC; + } - oflags = 0; - if (LF_ISSET(DB_RDONLY)) - oflags |= DB_OSO_RDONLY; - if (LF_ISSET(DB_TRUNCATE)) - oflags |= DB_OSO_TRUNC; retries = 0; + create_ok = LF_ISSET(DB_CREATE); + LF_CLR(DB_CREATE); + retry: /* * If we cannot create the file, only retry a few times. We @@ -257,13 +279,36 @@ retry: * a previous crash). */ if (++retries > DB_RETRY) { - __db_err(dbenv, "__fop_file_setup: Retry limit (%d) exceeded", + __db_errx(dbenv, "__fop_file_setup: Retry limit (%d) exceeded", DB_RETRY); goto err; } if (!F_ISSET(dbp, DB_AM_COMPENSATE) && !F_ISSET(dbp, DB_AM_RECOVER)) GET_ENVLOCK(dbenv, locker, &elock); - if ((ret = __os_exists(real_name, NULL)) == 0) { + if (name == NULL) + ret = ENOENT; + else if (F_ISSET(dbp, DB_AM_INMEM)) { + ret = __db_dbenv_mpool(dbp, name, flags); + /* + * We are using __db_dbenv_open as a check for existence. + * However, db_dbenv_mpool does an actual open and there + * are scenarios where the object exists, but cannot be + * opened, because our settings don't match those internally. + * We need to check for that explicitly. We'll need the + * mpool open to read the meta-data page, so we're going to + * have to temporarily turn this dbp into an UNKNOWN one. + */ + if (ret == EINVAL) { + was_inval = 1; + save_type = dbp->type; + dbp->type = DB_UNKNOWN; + ret = __db_dbenv_mpool(dbp, name, flags); + dbp->type = save_type; + } + } else + ret = __os_exists(dbenv, real_name, NULL); + + if (ret == 0) { /* * If the file exists, there are 5 possible cases: * 1. DB_EXCL was specified so this is an error, unless @@ -275,12 +320,14 @@ retry: * of file it is, we should open/create it. * 3. It is 0-length, we are not doing transactions (i.e., * we are sendmail), we should open/create into it. + * -- on-disk files only! * 4. Is it a Berkeley DB file and we should simply open it. * 5. It is not a BDB file and we should return an error. */ - /* We have to open the file. */ -reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0) + /* Open file (if there is one). */ +reopen: if (!F_ISSET(dbp, DB_AM_INMEM) && + (ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0) goto err; /* Case 2: DB_TRUNCATE: we must do the creation in place. */ @@ -295,33 +342,43 @@ reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0) } /* Cases 1,3-5: we need to read the meta-data page. */ - ret = __fop_read_meta(dbenv, real_name, mbuf, sizeof(mbuf), fhp, - LF_ISSET(DB_FCNTL_LOCKING) && txn == NULL ? 1 : 0, &len); - - /* Case 3: 0-length, no txns. */ - if (ret != 0 && len == 0 && txn == NULL) { - if (LF_ISSET(DB_EXCL)) { - /* Case 1b: DB_EXCL and 0-lenth file exists. */ - ret = EEXIST; - goto err; + if (F_ISSET(dbp, DB_AM_INMEM)) + ret = __fop_inmem_read_meta(dbp, txn, name, flags); + else { + ret = __fop_read_meta(dbenv, real_name, mbuf, + sizeof(mbuf), fhp, + LF_ISSET(DB_FCNTL_LOCKING) && txn == NULL ? 1 : 0, + &len); + + /* Case 3: 0-length, no txns. */ + if (ret != 0 && len == 0 && txn == NULL) { + if (LF_ISSET(DB_EXCL)) { + /* + * Case 1b: DB_EXCL and + * 0-lenth file exists. + */ + ret = EEXIST; + goto err; + } + tmpname = (char *)name; + goto creat2; } - tmpname = (char *)name; - goto creat2; + + /* Case 4: This is a valid file. */ + if (ret == 0) + ret = __db_meta_setup(dbenv, dbp, + real_name, (DBMETA *)mbuf, flags, 1); + } /* Case 5: Invalid file. */ if (ret != 0) goto err; - /* Case 4: This is a valid file. */ - if ((ret = __db_meta_setup(dbenv, - dbp, real_name, (DBMETA *)mbuf, flags, 1)) != 0) - goto err; - /* Now, get our handle lock. */ if ((ret = __fop_lock_handle(dbenv, dbp, locker, DB_LOCK_READ, NULL, DB_LOCK_NOWAIT)) == 0) { - if ((ret = __ENV_LPUT(dbenv, elock, 0)) != 0) + if ((ret = __ENV_LPUT(dbenv, elock)) != 0) goto err; } else if (ret != DB_LOCK_NOTGRANTED || (txn != NULL && F_ISSET(txn, TXN_NOWAIT))) @@ -341,28 +398,66 @@ reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0) * We assert it here to make sure we aren't destroying * any application level FCNTL semantics. */ - DB_ASSERT(!LF_ISSET(DB_FCNTL_LOCKING)); - if ((ret = __os_closehandle(dbenv, fhp)) != 0) - goto err; - fhp = NULL; - ret = __fop_lock_handle(dbenv, - dbp, locker, DB_LOCK_READ, &elock, 0); - if (ret == DB_LOCK_NOTEXIST) - goto retry; - if (ret != 0) + DB_ASSERT(dbenv, !LF_ISSET(DB_FCNTL_LOCKING)); + if (!F_ISSET(dbp, DB_AM_INMEM)) { + if ((ret = __os_closehandle(dbenv, fhp)) != 0) + goto err; + fhp = NULL; + } + if ((ret = __fop_lock_handle(dbenv, + dbp, locker, DB_LOCK_READ, &elock, 0)) != 0) { + if (F_ISSET(dbp, DB_AM_INMEM)) + RESET_MPF(dbp, 0); goto err; + } + /* - * XXX - * I need to convince myself that I don't need to - * re-read the metadata page here. If you do need - * to re-read it you'd better decrypt it too... + * It's possible that our DBP was initialized + * with a different file last time we opened it. + * Therefore, we need to reset the DBP type and then + * re-read the meta-data page and reset any other + * fields that __db_meta_setup initializes. We + * need to shut down this dbp and reopen for in-memory + * named databases. Unfortunately __db_refresh is + * pretty aggressive at the shutting down, so we need + * to do a bunch of restoration. + * XXX it would be nice to pull refresh apart into + * the stuff you need to do to call __db_env_mpool + * and the stuff you can really throw away. */ - if ((ret = - __os_open(dbenv, real_name, 0, 0, &fhp)) != 0) + if (F_ISSET(dbp, DB_AM_INMEM)) { + if ((ret = __db_refresh(dbp, + txn, DB_NOSYNC, NULL, 1)) != 0) + goto err; + ret = __db_dbenv_mpool(dbp, name, flags); + } else + ret = __os_open(dbenv, real_name, 0, 0, &fhp); + + if (ret != 0) { + if ((ret = + __ENV_LPUT(dbenv, dbp->handle_lock)) != 0) { + LOCK_INIT(dbp->handle_lock); + goto err; + } + goto retry; + } + + dbp->type = save_type; + if (F_ISSET(dbp, DB_AM_INMEM)) + ret = __fop_inmem_read_meta(dbp, + txn, name, flags); + else if ((ret = + __fop_read_meta(dbenv, real_name, mbuf, + sizeof(mbuf), fhp, + LF_ISSET(DB_FCNTL_LOCKING) && txn == NULL ? 1 : 0, + &len)) != 0 || + (ret = __db_meta_setup(dbenv, dbp, real_name, + (DBMETA *)mbuf, flags, 1)) != 0) goto err; + } - /* If we got here, then we now have the handle lock. */ + /* If we got here, then we have the handle lock. */ /* * Check for a file in the midst of a rename. If we find that @@ -370,12 +465,18 @@ reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0) * that it is in our current transaction (else we would still * be blocking), so we can continue along and create a new file * with the same name. In that case, we have to close the file - * handle because we reuse it below. + * handle because we reuse it below. This is a case where + * a 'was_inval' above is OK. */ if (F_ISSET(dbp, DB_AM_IN_RENAME)) { - if (LF_ISSET(DB_CREATE)) { - if ((ret = __os_closehandle(dbenv, fhp)) != 0) + was_inval = 0; + if (create_ok) { + if (F_ISSET(dbp, DB_AM_INMEM)) { + RESET_MPF(dbp, DB_MPOOL_DISCARD); + } else if ((ret = + __os_closehandle(dbenv, fhp)) != 0) goto err; + LF_SET(DB_CREATE); goto create; } else { ret = ENOENT; @@ -383,6 +484,12 @@ reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0) } } + /* If we get here, a was_inval is bad. */ + if (was_inval) { + ret = EINVAL; + goto err; + } + /* * Now, case 1: check for DB_EXCL, because the file that exists * is not in the middle of a rename, so we have an error. This @@ -391,7 +498,7 @@ reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0) * should not have been allowed to open it. */ if (LF_ISSET(DB_EXCL)) { - ret = __ENV_LPUT(dbenv, dbp->handle_lock, 0); + ret = __ENV_LPUT(dbenv, dbp->handle_lock); LOCK_INIT(dbp->handle_lock); if (ret == 0) ret = EEXIST; @@ -401,59 +508,82 @@ reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0) } /* File does not exist. */ - if (!LF_ISSET(DB_CREATE)) +#ifdef HAVE_VXWORKS + /* + * VxWorks can return file-system specific error codes if the + * file does not exist, not ENOENT. + */ + if (!create_ok) +#else + if (!create_ok || ret != ENOENT) +#endif goto err; + LF_SET(DB_CREATE); ret = 0; /* * We need to create file, which means that we need to set up the file, * the fileid and the locks. Then we need to call the appropriate - * routines to create meta-data pages. + * routines to create meta-data pages. For in-memory files, we retain + * the environment lock, while for on-disk files, we drop the env lock + * and create into a temporary. */ - if ((ret = __ENV_LPUT(dbenv, elock, 0)) != 0) + if (!F_ISSET(dbp, DB_AM_INMEM) && + (ret = __ENV_LPUT(dbenv, elock)) != 0) goto err; create: if (txn != NULL && IS_REP_CLIENT(dbenv)) { - __db_err(dbenv, + __db_errx(dbenv, "Transactional create on replication client disallowed"); ret = EINVAL; goto err; } - if ((ret = __db_backup_name(dbenv, name, txn, &tmpname)) != 0) - goto err; - if (TXN_ON(dbenv) && txn != NULL && - (ret = __txn_begin(dbenv, txn, &stxn, 0)) != 0) - goto err; - if ((ret = __fop_create(dbenv, - stxn, &fhp, tmpname, DB_APP_DATA, mode, dflags)) != 0) { - /* - * If we don't have transactions there is a race on - * creating the temp file. - */ - if (!TXN_ON(dbenv) && ret == EEXIST) { - __os_free(dbenv, tmpname); - tmpname = NULL; - __os_yield(dbenv, 1); - goto retry; + + if (F_ISSET(dbp, DB_AM_INMEM)) + ret = __fop_inmem_create(dbp, name, txn, flags); + else { + if ((ret = __db_backup_name(dbenv, name, txn, &tmpname)) != 0) + goto err; + if (TXN_ON(dbenv) && txn != NULL && + (ret = __txn_begin(dbenv, txn, &stxn, 0)) != 0) + goto err; + if ((ret = __fop_create(dbenv, + stxn, &fhp, tmpname, DB_APP_DATA, mode, dflags)) != 0) { + /* + * If no transactions, there is a race on creating the + * backup file, as the backup file name is the same for + * all processes. Wait for the other process to finish + * with the name. + */ + if (!TXN_ON(dbenv) && ret == EEXIST) { + __os_free(dbenv, tmpname); + tmpname = NULL; + __os_sleep(dbenv, 1, 0); + goto retry; + } + goto err; } - goto err; + tmp_created = 1; } - tmp_created = 1; -creat2: if ((ret = __db_appname(dbenv, - DB_APP_DATA, tmpname, 0, NULL, &real_tmpname)) != 0) - goto err; +creat2: if (!F_ISSET(dbp, DB_AM_INMEM)) { + if ((ret = __db_appname(dbenv, + DB_APP_DATA, tmpname, 0, NULL, &real_tmpname)) != 0) + goto err; - /* Set the pagesize if it isn't yet set. */ - if (dbp->pgsize == 0 && - (ret = __fop_set_pgsize(dbp, fhp, real_tmpname)) != 0) - goto errmsg; + /* Set the pagesize if it isn't yet set. */ + if (dbp->pgsize == 0 && + (ret = __fop_set_pgsize(dbp, fhp, real_tmpname)) != 0) + goto errmsg; - /* Construct a file_id. */ - if ((ret = __os_fileid(dbenv, real_tmpname, 1, dbp->fileid)) != 0) - goto errmsg; + /* Construct a file_id. */ + if ((ret = + __os_fileid(dbenv, real_tmpname, 1, dbp->fileid)) != 0) + goto errmsg; + } - if ((ret = __db_new_file(dbp, stxn, fhp, tmpname)) != 0) + if ((ret = __db_new_file(dbp, + F_ISSET(dbp, DB_AM_INMEM) ? txn : stxn, fhp, tmpname)) != 0) goto err; /* @@ -464,9 +594,12 @@ creat2: if ((ret = __db_appname(dbenv, /* * Now move the file into place unless we are creating in place (because - * we created a database in a file that started out 0-length). + * we created a database in a file that started out 0-length). If + * this is an in-memory file, we may or may not hold the environment + * lock depending on how we got here. */ - if (!F_ISSET(dbp, DB_AM_COMPENSATE) && !F_ISSET(dbp, DB_AM_RECOVER)) + if (!F_ISSET(dbp, DB_AM_COMPENSATE) && + !F_ISSET(dbp, DB_AM_RECOVER) && !LOCK_ISSET(elock)) GET_ENVLOCK(dbenv, locker, &elock); if (F_ISSET(dbp, DB_AM_IN_RENAME)) { @@ -474,14 +607,15 @@ creat2: if ((ret = __db_appname(dbenv, __txn_remrem(dbenv, txn, real_name); } else if (name == tmpname) { /* We created it in place. */ - } else if (__os_exists(real_name, NULL) == 0) { + } else if (!F_ISSET(dbp, DB_AM_INMEM) && + __os_exists(dbenv, real_name, NULL) == 0) { /* * Someone managed to create the file; remove our temp * and try to open the file that now exists. */ (void)__fop_remove(dbenv, NULL, dbp->fileid, tmpname, DB_APP_DATA, dflags); - (void)__ENV_LPUT(dbenv, dbp->handle_lock, 0); + (void)__ENV_LPUT(dbenv, dbp->handle_lock); LOCK_INIT(dbp->handle_lock); if (stxn != NULL) { @@ -493,10 +627,10 @@ creat2: if ((ret = __db_appname(dbenv, goto reopen; } - if ((ret = __fop_lock_handle(dbenv, + if (name != NULL && (ret = __fop_lock_handle(dbenv, dbp, locker, DB_LOCK_WRITE, &elock, NOWAIT_FLAG(txn))) != 0) goto err; - if (tmpname != name && (ret = __fop_rename(dbenv, + if (tmpname != NULL && tmpname != name && (ret = __fop_rename(dbenv, stxn, tmpname, name, dbp->fileid, DB_APP_DATA, dflags)) != 0) goto err; @@ -513,7 +647,7 @@ creat2: if ((ret = __db_appname(dbenv, F_SET(dbp, DB_AM_CREATED); if (0) { -errmsg: __db_err(dbenv, "%s: %s", name, db_strerror(ret)); +errmsg: __db_err(dbenv, ret, "%s", name); err: CLOSE_HANDLE(dbp, fhp); if (stxn != NULL) @@ -522,8 +656,8 @@ err: CLOSE_HANDLE(dbp, fhp); (void)__fop_remove(dbenv, NULL, NULL, tmpname, DB_APP_DATA, dflags); if (txn == NULL) - (void)__ENV_LPUT(dbenv, dbp->handle_lock, 0); - (void)__ENV_LPUT(dbenv, elock, 0); + (void)__ENV_LPUT(dbenv, dbp->handle_lock); + (void)__ENV_LPUT(dbenv, elock); if (created_locker) { (void)__lock_id_free(dbenv, dbp->lid); dbp->lid = DB_LOCK_INVALIDID; @@ -537,7 +671,7 @@ done: /* */ if (!truncating && tmpname != NULL && tmpname != name) __os_free(dbenv, tmpname); - if (real_name != NULL) + if (real_name != name && real_name != NULL) __os_free(dbenv, real_name); if (real_tmpname != NULL) __os_free(dbenv, real_tmpname); @@ -569,7 +703,7 @@ __fop_set_pgsize(dbp, fhp, name) * default pagesize to 16K. */ if ((ret = __os_ioinfo(dbenv, name, fhp, NULL, NULL, &iopsize)) != 0) { - __db_err(dbenv, "%s: %s", name, db_strerror(ret)); + __db_err(dbenv, ret, "%s", name); return (ret); } if (iopsize < 512) @@ -681,7 +815,7 @@ __fop_subdb_setup(dbp, txn, mname, name, mode, flags) * If there was no transaction and we created this database, * then we need to undo the update of the master database. */ - if (F_ISSET(dbp, DB_AM_CREATED) && txn != NULL) + if (F_ISSET(dbp, DB_AM_CREATED) && txn == NULL) (void)__db_master_update(mdbp, dbp, txn, name, dbp->type, MU_REMOVE, NULL, 0); F_CLR(dbp, DB_AM_CREATED); @@ -721,7 +855,7 @@ __fop_subdb_setup(dbp, txn, mname, name, mode, flags) err: DB_TEST_RECOVERY_LABEL if (txn == NULL) - (void)__ENV_LPUT(dbenv, dbp->handle_lock, 0); + (void)__ENV_LPUT(dbenv, dbp->handle_lock); } /* @@ -734,7 +868,7 @@ DB_TEST_RECOVERY_LABEL * before we register this event, we'd better remove any * events that we've already registered for the master. */ - if (!F_ISSET(dbp, DB_AM_RECOVER) && txn != NULL) { + if (!F_ISSET(dbp, DB_AM_RECOVER) && IS_REAL_TXN(txn)) { /* Unregister old master events. */ __txn_remlock(dbenv, txn, &mdbp->handle_lock, DB_LOCK_INVALIDID); @@ -777,7 +911,6 @@ __fop_remove_setup(dbp, txn, name, flags) DB_ENV *dbenv; DB_FH *fhp; DB_LOCK elock; - u_int32_t refcnt; u_int8_t mbuf[DBMETASIZE]; int ret; @@ -786,13 +919,14 @@ __fop_remove_setup(dbp, txn, name, flags) PANIC_CHECK(dbenv); LOCK_INIT(elock); fhp = NULL; + ret = 0; /* Create locker if necessary. */ retry: if (LOCKING_ON(dbenv)) { if (txn != NULL) dbp->lid = txn->txnid; else if (dbp->lid == DB_LOCK_INVALIDID) { - if ((ret = __lock_id(dbenv, &dbp->lid)) != 0) + if ((ret = __lock_id(dbenv, &dbp->lid, NULL)) != 0) goto err; } } @@ -808,7 +942,7 @@ retry: if (LOCKING_ON(dbenv)) { * that we shouldn't close the handle. */ fhp = dbp->saved_open_fhp; - DB_ASSERT(LF_ISSET(DB_FCNTL_LOCKING) || fhp == NULL); + DB_ASSERT(dbenv, LF_ISSET(DB_FCNTL_LOCKING) || fhp == NULL); /* * Lock environment to protect file open. That will enable us to @@ -816,15 +950,24 @@ retry: if (LOCKING_ON(dbenv)) { * the handle. */ GET_ENVLOCK(dbenv, dbp->lid, &elock); - if (fhp == NULL && - (ret = __os_open(dbenv, name, DB_OSO_RDONLY, 0, &fhp)) != 0) - goto err; - if ((ret = __fop_read_meta(dbenv, - name, mbuf, sizeof(mbuf), fhp, 0, NULL)) != 0) + + /* Open database. */ + if (F_ISSET(dbp, DB_AM_INMEM)) { + if ((ret = __db_dbenv_mpool(dbp, name, flags)) == 0) + ret = __os_strdup(dbenv, name, &dbp->dname); + } else if (fhp == NULL) + ret = __os_open(dbenv, name, DB_OSO_RDONLY, 0, &fhp); + if (ret != 0) goto err; - if ((ret = - __db_meta_setup(dbenv, dbp, name, (DBMETA *)mbuf, flags, 1)) != 0) + /* Get meta-data */ + if (F_ISSET(dbp, DB_AM_INMEM)) + ret = __fop_inmem_read_meta(dbp, txn, name, flags); + else if ((ret = __fop_read_meta(dbenv, + name, mbuf, sizeof(mbuf), fhp, 0, NULL)) == 0) + ret = __db_meta_setup(dbenv, + dbp, name, (DBMETA *)mbuf, flags, 1); + if (ret != 0) goto err; /* @@ -839,55 +982,45 @@ retry: if (LOCKING_ON(dbenv)) { * Close the file, block on the lock, clean up the dbp, and * then start all over again. */ - if (!LF_ISSET(DB_FCNTL_LOCKING)) { + if (!F_ISSET(dbp, DB_AM_INMEM) && !LF_ISSET(DB_FCNTL_LOCKING)) { (void)__os_closehandle(dbenv, fhp); fhp = NULL; } - if (ret == DB_LOCK_NOTEXIST) { - if ((ret = __ENV_LPUT(dbenv, elock, 0)) != 0) - goto err; - } else if (ret != DB_LOCK_NOTGRANTED || + if (ret != DB_LOCK_NOTGRANTED || (txn != NULL && F_ISSET(txn, TXN_NOWAIT))) goto err; else if ((ret = __fop_lock_handle(dbenv, - dbp, dbp->lid, DB_LOCK_WRITE, &elock, 0)) != 0 && - ret != DB_LOCK_NOTEXIST) + dbp, dbp->lid, DB_LOCK_WRITE, &elock, 0)) != 0) goto err; - if (txn != NULL) - dbp->lid = DB_LOCK_INVALIDID; - (void)__db_refresh(dbp, txn, DB_NOSYNC, NULL); + if (F_ISSET(dbp, DB_AM_INMEM)) { + (void)__lock_put(dbenv, &dbp->handle_lock); + (void)__db_refresh(dbp, txn, DB_NOSYNC, NULL, 1); + } else { + if (txn != NULL) + dbp->lid = DB_LOCK_INVALIDID; + (void)__db_refresh(dbp, txn, DB_NOSYNC, NULL, 0); + } goto retry; - } else if ((ret = __ENV_LPUT(dbenv, elock, 0)) != 0) - goto err; - - /* Check if the file is already open. */ - if ((ret = __memp_get_refcnt(dbenv, dbp->fileid, &refcnt)) != 0) + } else if ((ret = __ENV_LPUT(dbenv, elock)) != 0) goto err; - /* - * Now, error check. If the file is already open (refcnt != 0), then - * we must have it open (since we got the lock) and we need to panic, - * because this is a self deadlock and the application has a bug. - * If the file isn't open, but it's in the midst of a rename then - * this file doesn't really exist. - */ - if (refcnt != 0) { - __db_err(dbenv, -"Attempting to remove file open in current transaction causing self-deadlock"); - ret = __db_panic(dbenv, DB_LOCK_DEADLOCK); - } else if (F_ISSET(dbp, DB_AM_IN_RENAME)) + else if (F_ISSET(dbp, DB_AM_IN_RENAME)) ret = ENOENT; if (0) { -err: (void)__ENV_LPUT(dbenv, elock, 0); +err: (void)__ENV_LPUT(dbenv, elock); } if (fhp != NULL && !LF_ISSET(DB_FCNTL_LOCKING)) (void)__os_closehandle(dbenv, fhp); /* - * If we are going to proceed with the removal, then we need to make - * sure that we don't leave any pages around in the mpool. + * If this is a real file and we are going to proceed with the removal, + * then we need to make sure that we don't leave any pages around in the + * mpool since the file is closed and will be reopened again before + * access. However, this might be an in-memory file, in which case + * we will handle the discard from the mpool later as it's the "real" + * removal of the database. */ - if (ret == 0) + if (ret == 0 && !F_ISSET(dbp, DB_AM_INMEM)) F_SET(dbp, DB_AM_DISCARD); return (ret); } @@ -926,13 +1059,13 @@ __fop_read_meta(dbenv, name, buf, size, fhp, errok, nbytesp) if (ret != 0) { if (!errok) - __db_err(dbenv, "%s: %s", name, db_strerror(ret)); + __db_err(dbenv, ret, "%s", name); goto err; } if (nr != size) { if (!errok) - __db_err(dbenv, + __db_errx(dbenv, "%s: unexpected file type or format", name); ret = EINVAL; } @@ -957,100 +1090,418 @@ __fop_dummy(dbp, txn, old, new, flags) const char *old, *new; u_int32_t flags; { - DB *tmpdbp, *t2dbp; + DB *tmpdbp; DB_ENV *dbenv; - DB_FH *fhp; - DB_LOCK elock; - DB_LSN lsn; - DBT fiddbt, namedbt, tmpdbt; DB_TXN *stxn; char *back; - char *realback, *realnew, *realold; int ret, t_ret; - size_t len; u_int8_t mbuf[DBMETASIZE]; - u_int32_t dflags, locker, stxnid; + u_int32_t locker; dbenv = dbp->dbenv; - LOCK_INIT(elock); - realback = NULL; - realnew = NULL; - realold = NULL; back = NULL; stxn = NULL; - tmpdbp = t2dbp = NULL; - fhp = NULL; - dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0; + tmpdbp = NULL; - DB_ASSERT(txn != NULL); + DB_ASSERT(dbenv, txn != NULL); locker = txn->txnid; - /* Begin sub transaction to encapsulate the rename. */ + /* + * Begin sub transaction to encapsulate the rename. Note that we + * expect the inmem_swap calls to complete the sub-transaction, + * aborting on error and committing on success. + */ if (TXN_ON(dbenv) && (ret = __txn_begin(dbenv, txn, &stxn, 0)) != 0) goto err; /* We need to create a dummy file as a place holder. */ if ((ret = __db_backup_name(dbenv, new, stxn, &back)) != 0) goto err; + /* Create a dummy dbp handle. */ + if ((ret = db_create(&tmpdbp, dbenv, 0)) != 0) + goto err; + + memset(mbuf, 0, sizeof(mbuf)); + ret = F_ISSET(dbp, DB_AM_INMEM) ? + __fop_inmem_dummy(tmpdbp, stxn, back, mbuf) : + __fop_ondisk_dummy(tmpdbp, stxn, back, mbuf, flags); + + if (ret != 0) + goto err; + + ret = F_ISSET(dbp, DB_AM_INMEM) ? + __fop_inmem_swap(dbp, tmpdbp, stxn, old, new, back, locker) : + __fop_ondisk_swap(dbp, tmpdbp, stxn, old, new, back, locker, flags); + stxn = NULL; + if (ret != 0) + goto err; + +err: if (stxn != NULL) + (void)__txn_abort(stxn); + if (tmpdbp != NULL && + (t_ret = __db_close(tmpdbp, NULL, 0)) != 0 && ret == 0) + ret = t_ret; + if (back != NULL) + __os_free(dbenv, back); + return (ret); +} + +/* + * __fop_dbrename -- + * Do the appropriate file locking and file system operations + * to effect a dbrename in the absence of transactions (__fop_dummy + * and the subsequent calls in __db_rename do the work for the + * transactional case). + * + * PUBLIC: int __fop_dbrename __P((DB *, const char *, const char *)); + */ +int +__fop_dbrename(dbp, old, new) + DB *dbp; + const char *old, *new; +{ + DB_ENV *dbenv; + DB_LOCK elock; + char *real_new, *real_old; + int ret, t_ret; + + dbenv = dbp->dbenv; + real_new = NULL; + real_old = NULL; + LOCK_INIT(elock); + + if (F_ISSET(dbp, DB_AM_INMEM)) { + real_new = (char *)new; + real_old = (char *)old; + } else { + /* Get full names. */ + if ((ret = __db_appname(dbenv, + DB_APP_DATA, new, 0, NULL, &real_new)) != 0) + goto err; + + if ((ret = __db_appname(dbenv, + DB_APP_DATA, old, 0, NULL, &real_old)) != 0) + goto err; + + } + + /* + * It is an error to rename a file over one that already exists, + * as that wouldn't be transaction-safe. We check explicitly + * for ondisk files, but it's done memp_nameop for in-memory ones. + */ + GET_ENVLOCK(dbenv, dbp->lid, &elock); + ret = F_ISSET(dbp, DB_AM_INMEM) ? ENOENT : + __os_exists(dbenv, real_new, NULL); + + if (ret == 0) { + ret = EEXIST; + __db_errx(dbenv, "rename: file %s exists", real_new); + goto err; + } + + ret = __memp_nameop(dbenv, + dbp->fileid, new, real_old, real_new, F_ISSET(dbp, DB_AM_INMEM)); + +err: if ((t_ret = __ENV_LPUT(dbenv, elock)) != 0 && ret == 0) + ret = t_ret; + if (!F_ISSET(dbp, DB_AM_INMEM) && real_old != NULL) + __os_free(dbenv, real_old); + if (!F_ISSET(dbp, DB_AM_INMEM) && real_new != NULL) + __os_free(dbenv, real_new); + return (ret); +} + +static int +__fop_inmem_create(dbp, name, txn, flags) + DB *dbp; + const char *name; + DB_TXN *txn; + u_int32_t flags; +{ + DB_ENV *dbenv; + DB_LSN lsn; + DBT fid_dbt, name_dbt; + int ret; + int32_t lfid; + u_int32_t *p32; + + dbenv = dbp->dbenv; + + MAKE_INMEM(dbp); + + /* Set the pagesize if it isn't yet set. */ + if (dbp->pgsize == 0) + dbp->pgsize = DB_DEF_IOSIZE; + + /* + * Construct a file_id. + * + * If this file has no name, then we only need a fileid for locking. + * If this file has a name, we need the fileid both for locking and + * matching in the memory pool. So, with unnamed in-memory databases, + * use a lock_id. For named in-memory files, we need to find a value + * that we can use to uniquely identify a name/fid pair. We use a + * combination of a unique id (__os_unique_id) and a hash of the + * original name. + */ + if (name == NULL) { + if (LOCKING_ON(dbenv) && (ret = + __lock_id(dbenv, (u_int32_t *)dbp->fileid, NULL)) != 0) + goto err; + } else { + p32 = (u_int32_t *)(&dbp->fileid[0]); + __os_unique_id(dbenv, p32); + p32++; + (void)strncpy( + (char *)p32, name, DB_FILE_ID_LEN - sizeof(u_int32_t)); + dbp->preserve_fid = 1; + + if (DBENV_LOGGING(dbenv) && dbp->log_filename != NULL) + memcpy(dbp->log_filename->ufid, + dbp->fileid, DB_FILE_ID_LEN); + } + + /* Now, set the fileid. */ + if ((ret = __memp_set_fileid(dbp->mpf, dbp->fileid)) != 0) + goto err; + + if ((ret = __db_dbenv_mpool(dbp, name, flags)) != 0) + goto err; + + if (name != NULL && DBENV_LOGGING(dbenv)) { + DB_INIT_DBT(name_dbt, name, strlen(name) + 1); + memset(&fid_dbt, 0, sizeof(fid_dbt)); + fid_dbt.data = dbp->fileid; + fid_dbt.size = DB_FILE_ID_LEN; + lfid = dbp->log_filename == NULL ? + DB_LOGFILEID_INVALID : dbp->log_filename->id; + if ((ret = __crdel_inmem_create_log(dbenv, txn, + &lsn, 0, lfid, &name_dbt, &fid_dbt, dbp->pgsize)) != 0) + goto err; + } + + F_SET(dbp, DB_AM_CREATED); + +err: + return (ret); +} + +static int +__fop_inmem_read_meta(dbp, txn, name, flags) + DB *dbp; + DB_TXN *txn; + const char *name; + u_int32_t flags; +{ + DBMETA *metap; + db_pgno_t pgno; + int ret, t_ret; + + pgno = PGNO_BASE_MD; + if ((ret = __memp_fget(dbp->mpf, &pgno, txn, 0, &metap)) != 0) + return (ret); + ret = __db_meta_setup(dbp->dbenv, dbp, name, metap, flags, 1); + + if ((t_ret = __memp_fput(dbp->mpf, metap, 0)) && ret == 0) + ret = t_ret; + + return (ret); +} + +static int +__fop_ondisk_dummy(dbp, txn, name, mbuf, flags) + DB *dbp; + DB_TXN *txn; + const char *name; + u_int8_t *mbuf; + u_int32_t flags; +{ + DB_ENV *dbenv; + int ret; + char *realname; + u_int32_t dflags; + + realname = NULL; + dbenv = dbp->dbenv; + dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0; + if ((ret = __db_appname(dbenv, - DB_APP_DATA, back, flags, NULL, &realback)) != 0) + DB_APP_DATA, name, flags, NULL, &realname)) != 0) goto err; + if ((ret = __fop_create(dbenv, - stxn, NULL, back, DB_APP_DATA, 0, dflags)) != 0) + txn, NULL, name, DB_APP_DATA, 0, dflags)) != 0) goto err; - memset(mbuf, 0, sizeof(mbuf)); if ((ret = - __os_fileid(dbenv, realback, 1, ((DBMETA *)mbuf)->uid)) != 0) + __os_fileid(dbenv, realname, 1, ((DBMETA *)mbuf)->uid)) != 0) goto err; + ((DBMETA *)mbuf)->magic = DB_RENAMEMAGIC; - if ((ret = __fop_write(dbenv, stxn, back, + if ((ret = __fop_write(dbenv, txn, name, DB_APP_DATA, NULL, 0, 0, 0, mbuf, DBMETASIZE, 1, dflags)) != 0) goto err; - /* Create a dummy dbp handle. */ - if ((ret = db_create(&tmpdbp, dbenv, 0)) != 0) + memcpy(dbp->fileid, ((DBMETA *)mbuf)->uid, DB_FILE_ID_LEN); + +err: if (realname != NULL) + __os_free(dbenv, realname); + + return (ret); +} + +static int +__fop_inmem_dummy(dbp, txn, name, mbuf) + DB *dbp; + DB_TXN *txn; + const char *name; + u_int8_t *mbuf; +{ + DBMETA *metap; + db_pgno_t pgno; + int ret, t_ret; + + if ((ret = __fop_inmem_create(dbp, name, txn, DB_CREATE)) != 0) + return (ret); + + pgno = PGNO_BASE_MD; + if ((ret = __memp_fget(dbp->mpf, &pgno, txn, + DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &metap)) != 0) + return (ret); + /* Check file existed. */ + if (metap->magic != 0) + ret = EEXIST; + else + metap->magic = DB_RENAMEMAGIC; + + /* Copy the fileid onto the meta-data page. */ + memcpy(metap->uid, dbp->fileid, DB_FILE_ID_LEN); + + if ((t_ret = __memp_fput(dbp->mpf, metap, + ret == 0 ? 0 : DB_MPOOL_DISCARD)) != 0 && ret == 0) + ret = t_ret; + + if (ret != 0) goto err; - memcpy(tmpdbp->fileid, ((DBMETA *)mbuf)->uid, DB_FILE_ID_LEN); - /* Now, lock the name space while we initialize this file. */ - if ((ret = __db_appname(dbenv, - DB_APP_DATA, new, 0, NULL, &realnew)) != 0) + ((DBMETA *)mbuf)->magic = DB_RENAMEMAGIC; + +err: return (ret); +} + +static int +__fop_ondisk_swap(dbp, tmpdbp, txn, old, new, back, locker, flags) + DB *dbp, *tmpdbp; + DB_TXN *txn; + const char *old, *new, *back; + u_int32_t locker, flags; +{ + DB_ENV *dbenv; + DB_FH *fhp; + DB_LOCK elock; + DB_LSN lsn; + DBT fiddbt, namedbt, tmpdbt; + DB_TXN *parent; + char *realold, *realnew; + int ret, t_ret; + u_int8_t mbuf[DBMETASIZE]; + u_int32_t child_txnid, dflags; + + dbenv = dbp->dbenv; + DB_ASSERT(dbenv, txn != NULL); + DB_ASSERT(dbenv, old != NULL); + + realold = realnew = NULL; + LOCK_INIT(elock); + fhp = NULL; + dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0; + + if ((ret = + __db_appname(dbenv, DB_APP_DATA, new, 0, NULL, &realnew)) != 0) goto err; - GET_ENVLOCK(dbenv, locker, &elock); - if (__os_exists(realnew, NULL) == 0) { + + /* Now, lock the name space while we initialize this file. */ +retry: GET_ENVLOCK(dbenv, locker, &elock); + if (__os_exists(dbenv, realnew, NULL) == 0) { /* * It is possible that the only reason this file exists is * because we've done a previous rename of it and we have * left a placeholder here. We need to check for that case * and allow this rename to succeed if that's the case. */ - if ((ret = db_create(&t2dbp, dbenv, 0)) != 0) - goto err; if ((ret = __os_open(dbenv, realnew, 0, 0, &fhp)) != 0) goto err; if ((ret = __fop_read_meta(dbenv, - realnew, mbuf, sizeof(mbuf), fhp, 0, &len)) != 0 || + realnew, mbuf, sizeof(mbuf), fhp, 0, NULL)) != 0 || (ret = __db_meta_setup(dbenv, - t2dbp, realnew, (DBMETA *)mbuf, 0, 1)) != 0) { + tmpdbp, realnew, (DBMETA *)mbuf, 0, 1)) != 0) { ret = EEXIST; goto err; } /* - * Now, try to acquire the handle lock. If it's from our txn, - * then we'll get the lock. If it's not, then someone else has - * it locked, and we need to report this as an error. If we - * know we can get the lock, we can immediately release it, - * which we need to do since this is a temporary handle. + * Now, try to acquire the handle lock. If the handle is locked + * by our current, transaction, then we'll get it and life is + * good. + * + * Alternately, it's not locked at all, we'll get the lock, but + * we will realize it exists and consider this an error. + * + * However, if it's held by another transaction, then there + * could be two different scenarios: 1) the file is in the + * midst of being created or deleted and when that transaction + * is over, we might be able to proceed. 2) the file is open + * and exists and we should report an error. In order to + * distinguish these two cases, we do the following. First, we + * try to acquire a READLOCK. If the handle is in the midst of + * being created, then we'll block because a writelock is held. + * In that case, we should request a blocking write, and when we + * get the lock, we should then go back and check to see if the + * object exists and start all over again. + * + * If we got the READLOCK, then either no one is holding the + * lock or someone has an open handle and the fact that the file + * exists is problematic. So, in this case, we request the + * WRITELOCK non-blocking -- if it succeeds, we're golden. If + * it fails, then the file exists and we return EEXIST. */ if ((ret = __fop_lock_handle(dbenv, - t2dbp, locker, DB_LOCK_WRITE, NULL, DB_LOCK_NOWAIT)) != 0) - ret = EEXIST; - else { - (void)__lock_put(dbenv, &t2dbp->handle_lock, 0); - if (!F_ISSET(t2dbp, DB_AM_IN_RENAME)) + tmpdbp, locker, DB_LOCK_READ, NULL, DB_LOCK_NOWAIT)) != 0) { + /* + * Someone holds a writelock. Try for the WRITELOCK + * and after we get it, retry. + */ + if ((ret = __fop_lock_handle(dbenv, tmpdbp, + locker, DB_LOCK_WRITE, &elock, 0)) != 0) + goto err; + + /* + * We now have the write lock; release it and start + * over. + */ + (void)__lock_put(dbenv, &tmpdbp->handle_lock); + (void)__db_refresh(tmpdbp, NULL, 0, NULL, 0); + goto retry; + } else { + /* We got the read lock; try to upgrade it. */ + ret = __fop_lock_handle(dbenv, + tmpdbp, locker, DB_LOCK_WRITE, + NULL, DB_LOCK_UPGRADE | DB_LOCK_NOWAIT); + if (ret != 0) { + /* + * We did not get the writelock, so someone + * has the handle open. This is an error. + */ + (void)__lock_put(dbenv, &tmpdbp->handle_lock); + ret = EEXIST; + } else if (F_ISSET(tmpdbp, DB_AM_IN_RENAME)) + /* We got the lock and are renaming it. */ + ret = 0; + else { /* We got the lock, but the file exists. */ + (void)__lock_put(dbenv, &tmpdbp->handle_lock); ret = EEXIST; + } } if ((t_ret = __os_closehandle(dbenv, fhp)) != 0 && ret == 0) ret = t_ret; @@ -1064,10 +1515,10 @@ __fop_dummy(dbp, txn, old, new, flags) * swap for the handle lock. */ if ((ret = __fop_rename(dbenv, - stxn, old, new, dbp->fileid, DB_APP_DATA, dflags)) != 0) + txn, old, new, dbp->fileid, DB_APP_DATA, dflags)) != 0) goto err; if ((ret = __fop_rename(dbenv, - stxn, back, old, tmpdbp->fileid, DB_APP_DATA, dflags)) != 0) + txn, back, old, tmpdbp->fileid, DB_APP_DATA, dflags)) != 0) goto err; if ((ret = __fop_lock_handle(dbenv, tmpdbp, locker, DB_LOCK_WRITE, &elock, NOWAIT_FLAG(txn))) != 0) @@ -1080,108 +1531,174 @@ __fop_dummy(dbp, txn, old, new, flags) */ LOCK_INIT(tmpdbp->handle_lock); - if (stxn != NULL) { - /* Commit the child. */ - stxnid = stxn->txnid; - ret = __txn_commit(stxn, 0); - stxn = NULL; - - /* Now log the child information in the parent. */ - memset(&fiddbt, 0, sizeof(fiddbt)); - memset(&tmpdbt, 0, sizeof(fiddbt)); - memset(&namedbt, 0, sizeof(namedbt)); - fiddbt.data = dbp->fileid; - fiddbt.size = DB_FILE_ID_LEN; - tmpdbt.data = tmpdbp->fileid; - tmpdbt.size = DB_FILE_ID_LEN; - namedbt.data = (void *)old; - namedbt.size = (u_int32_t)strlen(old) + 1; - if ((t_ret = - __fop_file_remove_log(dbenv, txn, &lsn, 0, &fiddbt, - &tmpdbt, &namedbt, DB_APP_DATA, stxnid)) != 0 && ret == 0) - ret = t_ret; - } + /* Commit the child. */ + child_txnid = txn->txnid; + parent = txn->parent; + ret = __txn_commit(txn, 0); + txn = NULL; + + /* Now log the child information in the parent. */ + memset(&fiddbt, 0, sizeof(fiddbt)); + fiddbt.data = dbp->fileid; + fiddbt.size = DB_FILE_ID_LEN; + memset(&tmpdbt, 0, sizeof(fiddbt)); + tmpdbt.data = tmpdbp->fileid; + tmpdbt.size = DB_FILE_ID_LEN; + DB_INIT_DBT(namedbt, old, strlen(old) + 1); + if ((t_ret = __fop_file_remove_log(dbenv, + parent, &lsn, 0, &fiddbt, &tmpdbt, &namedbt, + (u_int32_t)DB_APP_DATA, child_txnid)) != 0 && ret == 0) + ret = t_ret; /* This is a delayed delete of the dummy file. */ if ((ret = __db_appname(dbenv, DB_APP_DATA, old, flags, NULL, &realold)) != 0) goto err; - if ((ret = __txn_remevent(dbenv, txn, realold, NULL)) != 0) + + if ((ret = __txn_remevent(dbenv, parent, realold, NULL, 0)) != 0) goto err; -err: (void)__ENV_LPUT(dbenv, elock, 0); - if (stxn != NULL) - (void)__txn_abort(stxn); - if (tmpdbp != NULL && - (t_ret = __db_close(tmpdbp, NULL, 0)) != 0 && ret == 0) - ret = t_ret; - if (t2dbp != NULL && - (t_ret = __db_close(t2dbp, NULL, 0)) != 0 && ret == 0) - ret = t_ret; - if (fhp != NULL) - (void)__os_closehandle(dbenv, fhp); - if (realold != NULL) - __os_free(dbenv, realold); +err: if (txn != NULL) /* Ret must already be set, so void abort. */ + (void)__txn_abort(txn); + + (void)__ENV_LPUT(dbenv, elock); if (realnew != NULL) __os_free(dbenv, realnew); - if (realback != NULL) - __os_free(dbenv, realback); - if (back != NULL) - __os_free(dbenv, back); + if (realold != NULL) + __os_free(dbenv, realold); return (ret); } -/* - * __fop_dbrename -- - * Do the appropriate file locking and file system operations - * to effect a dbrename in the absence of transactions (__fop_dummy - * and the subsequent calls in __db_rename do the work for the - * transactional case). - * - * PUBLIC: int __fop_dbrename __P((DB *, const char *, const char *)); - */ -int -__fop_dbrename(dbp, old, new) - DB *dbp; - const char *old, *new; +static int +__fop_inmem_swap(olddbp, backdbp, txn, old, new, back, locker) + DB *olddbp, *backdbp; + DB_TXN *txn; + const char *old, *new, *back; + u_int32_t locker; { DB_ENV *dbenv; DB_LOCK elock; - char *real_new, *real_old; + DB_LSN lsn; + DB_TXN *parent; + DBT fid_dbt, n1_dbt, n2_dbt; + DB *tmpdbp; int ret, t_ret; - dbenv = dbp->dbenv; - real_new = NULL; - real_old = NULL; - LOCK_INIT(elock); + dbenv = olddbp->dbenv; + parent = txn->parent; +retry: LOCK_INIT(elock); + if ((ret = db_create(&tmpdbp, dbenv, 0)) != 0) + return (ret); + MAKE_INMEM(tmpdbp); - /* Find the real newname of the file. */ - if ((ret = __db_appname(dbenv, - DB_APP_DATA, new, 0, NULL, &real_new)) != 0) - goto err; + GET_ENVLOCK(dbenv, locker, &elock); + if ((ret = __db_dbenv_mpool(tmpdbp, new, 0)) == 0) { + /* + * It is possible that the only reason this database exists is + * because we've done a previous rename of it and we have + * left a placeholder here. We need to check for that case + * and allow this rename to succeed if that's the case. + */ + + if ((ret = __fop_inmem_read_meta(tmpdbp, txn, new, 0)) != 0) { + ret = EEXIST; + goto err; + } + + /* + * Now, try to acquire the handle lock. If it's from our txn, + * then we'll get the lock. If it's not, then someone else has + * it locked. See the comments in __fop_ondisk_swap for + * details. + */ + if ((ret = __fop_lock_handle(dbenv, + tmpdbp, locker, DB_LOCK_READ, NULL, DB_LOCK_NOWAIT)) != 0) { + /* + * Someone holds a writelock. Try for the WRITELOCK + * and after we get it, retry. + */ + if ((ret = __fop_lock_handle(dbenv, tmpdbp, + locker, DB_LOCK_WRITE, &elock, 0)) != 0) + goto err; + + /* We have the write lock; release it and start over. */ + (void)__lock_put(dbenv, &tmpdbp->handle_lock); + (void)__db_close(tmpdbp, NULL, DB_NOSYNC); + (void)__ENV_LPUT(dbenv, elock); + goto retry; + } else { + (void)__lock_put(dbenv, &tmpdbp->handle_lock); + if (!F_ISSET(tmpdbp, DB_AM_IN_RENAME)) + ret = EEXIST; + } + if (ret != 0) + goto err; + } + + /* Log the renames. */ + if (LOGGING_ON(dbenv)) { + /* Rename old to new. */ + DB_INIT_DBT(fid_dbt, olddbp->fileid, DB_FILE_ID_LEN); + DB_INIT_DBT(n1_dbt, old, strlen(old) + 1); + DB_INIT_DBT(n2_dbt, new, strlen(new) + 1); + if ((ret = __crdel_inmem_rename_log(dbenv, txn, &lsn, 0, + &n1_dbt, &n2_dbt, &fid_dbt)) != 0) + goto err; + + /* Rename back to old */ + fid_dbt.data = backdbp->fileid; + DB_SET_DBT(n2_dbt, back, strlen(back) + 1); + if ((ret = __crdel_inmem_rename_log(dbenv, txn, &lsn, 0, + &n2_dbt, &n1_dbt, &fid_dbt)) != 0) + goto err; + } /* - * It is an error to rename a file over one that already exists, - * as that wouldn't be transaction-safe. + * While we have the namespace locked, do the renames and then + * swap for the handle lock. If we ran into a file in the midst + * of rename, then we need to delete it first, else nameop is + * going to consider it an error. */ - GET_ENVLOCK(dbenv, dbp->lid, &elock); - if (__os_exists(real_new, NULL) == 0) { - ret = EEXIST; - __db_err(dbenv, "rename: file %s exists", real_new); - goto err; + if (F_ISSET(tmpdbp, DB_AM_IN_RENAME)) { + if ((ret = __memp_nameop(dbenv, + tmpdbp->fileid, NULL, new, NULL, 1)) != 0) + goto err; + __txn_remrem(dbenv, parent, new); } - if ((ret = __db_appname(dbenv, - DB_APP_DATA, old, 0, NULL, &real_old)) != 0) + if ((ret = __memp_nameop(dbenv, olddbp->fileid, new, old, new, 1)) != 0) + goto err; + if ((ret = + __memp_nameop(dbenv, backdbp->fileid, old, back, old, 1)) != 0) goto err; - ret = __memp_nameop(dbenv, dbp->fileid, new, real_old, real_new); + if ((ret = __fop_lock_handle(dbenv, + tmpdbp, locker, DB_LOCK_WRITE, &elock, 0)) != 0) + goto err; -err: if ((t_ret = __ENV_LPUT(dbenv, elock, 0)) != 0 && ret == 0) + /* + * We just acquired a transactional lock on the tmp handle. + * We need to null out the tmp handle's lock so that it + * doesn't create problems for us in the close path. + */ + LOCK_INIT(tmpdbp->handle_lock); + + DB_ASSERT(dbenv, txn != NULL); + + /* Commit the child. */ + ret = __txn_commit(txn, 0); + txn = NULL; + + if ((ret = __db_inmem_remove(backdbp, parent, old)) != 0) + goto err; + +err: (void)__ENV_LPUT(dbenv, elock); + + if (txn != NULL) + (void)__txn_abort(txn); + + if ((t_ret = __db_close(tmpdbp, NULL, 0)) != 0 && ret == 0) ret = t_ret; - if (real_old != NULL) - __os_free(dbenv, real_old); - if (real_new != NULL) - __os_free(dbenv, real_new); + return (ret); } |