summaryrefslogtreecommitdiff
path: root/db/fileops
diff options
context:
space:
mode:
authorPanu Matilainen <pmatilai@redhat.com>2007-07-16 16:48:14 +0300
committerPanu Matilainen <pmatilai@redhat.com>2007-07-16 16:48:14 +0300
commit2cfd3012bfcb5c5c61bbaf662ef084e0ab789d79 (patch)
treee12ee52087506ac8c7a5eee83b17497d98df2d40 /db/fileops
parentb754fe19fd387ca5fe8e7c00ddaa25c898fa192f (diff)
downloadrpm-2cfd3012bfcb5c5c61bbaf662ef084e0ab789d79.tar.gz
rpm-2cfd3012bfcb5c5c61bbaf662ef084e0ab789d79.tar.bz2
rpm-2cfd3012bfcb5c5c61bbaf662ef084e0ab789d79.zip
Update internal BDB to version 4.5.20
Diffstat (limited to 'db/fileops')
-rw-r--r--db/fileops/fileops.src24
-rw-r--r--db/fileops/fileops_auto.c238
-rw-r--r--db/fileops/fileops_autop.c68
-rw-r--r--db/fileops/fop_basic.c90
-rw-r--r--db/fileops/fop_rec.c76
-rw-r--r--db/fileops/fop_util.c1081
6 files changed, 1066 insertions, 511 deletions
diff --git a/db/fileops/fileops.src b/db/fileops/fileops.src
index a77b5d5c4..ce81e1513 100644
--- a/db/fileops/fileops.src
+++ b/db/fileops/fileops.src
@@ -1,26 +1,18 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2001-2004
- * Sleepycat Software. All rights reserved.
+ * Copyright (c) 2001-2006
+ * Oracle Corporation. All rights reserved.
*
- * $Id: fileops.src,v 1.13 2004/06/17 17:35:20 bostic Exp $
+ * $Id: fileops.src,v 12.6 2006/08/24 14:46:03 bostic Exp $
*/
PREFIX __fop
DBPRIVATE
-INCLUDE #ifndef NO_SYSTEM_INCLUDES
-INCLUDE #include <sys/types.h>
-INCLUDE
-INCLUDE #include <ctype.h>
-INCLUDE #include <string.h>
-INCLUDE #endif
-INCLUDE
INCLUDE #include "db_int.h"
INCLUDE #include "dbinc/crypto.h"
INCLUDE #include "dbinc/db_page.h"
-INCLUDE #include "dbinc/db_dispatch.h"
INCLUDE #include "dbinc/db_am.h"
INCLUDE #include "dbinc/log.h"
INCLUDE #include "dbinc/txn.h"
@@ -34,7 +26,7 @@ INCLUDE
* appname: indicates if the name needs to go through __db_appname
* mode: file system mode
*/
-BEGIN create 143
+BEGIN create 42 143
DBT name DBT s
ARG appname u_int32_t lu
ARG mode u_int32_t o
@@ -46,7 +38,7 @@ END
* name: name in the file system
* appname: indicates if the name needs to go through __db_appname
*/
-BEGIN remove 144
+BEGIN remove 42 144
DBT name DBT s
DBT fid DBT s
ARG appname u_int32_t lu
@@ -64,7 +56,7 @@ END
* flag: non-0 indicates that this is a tempfile, so we needn't undo
* these modifications (we'll toss the file).
*/
-BEGIN write 145
+BEGIN write 42 145
DBT name DBT s
ARG appname u_int32_t lu
ARG pgsize u_int32_t lu
@@ -82,7 +74,7 @@ END
* DB fileid of the file being renamed. We need to check it on recovery
* so that we don't inadvertently overwrite good files.
*/
-BEGIN rename 146
+BEGIN rename 42 146
DBT oldname DBT s
DBT newname DBT s
DBT fileid DBT s
@@ -103,7 +95,7 @@ END
* child: The transaction that removed or renamed the file.
*/
*/
-BEGIN file_remove 141
+BEGIN file_remove 42 141
DBT real_fid DBT s
DBT tmp_fid DBT s
DBT name DBT s
diff --git a/db/fileops/fileops_auto.c b/db/fileops/fileops_auto.c
index 333e37755..0da353b2b 100644
--- a/db/fileops/fileops_auto.c
+++ b/db/fileops/fileops_auto.c
@@ -2,17 +2,9 @@
#include "db_config.h"
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <ctype.h>
-#include <string.h>
-#endif
-
#include "db_int.h"
#include "dbinc/crypto.h"
#include "dbinc/db_page.h"
-#include "dbinc/db_dispatch.h"
#include "dbinc/db_am.h"
#include "dbinc/log.h"
#include "dbinc/txn.h"
@@ -23,10 +15,10 @@
* PUBLIC: u_int32_t, const DBT *, u_int32_t, u_int32_t));
*/
int
-__fop_create_log(dbenv, txnid, ret_lsnp, flags,
+__fop_create_log(dbenv, txnp, ret_lsnp, flags,
name, appname, mode)
DB_ENV *dbenv;
- DB_TXN *txnid;
+ DB_TXN *txnp;
DB_LSN *ret_lsnp;
u_int32_t flags;
const DBT *name;
@@ -50,29 +42,30 @@ __fop_create_log(dbenv, txnid, ret_lsnp, flags,
ret = 0;
if (LF_ISSET(DB_LOG_NOT_DURABLE)) {
- if (txnid == NULL)
+ if (txnp == NULL)
+ return (0);
+ if (txnp == NULL)
return (0);
is_durable = 0;
} else
is_durable = 1;
- if (txnid == NULL) {
+ if (txnp == NULL) {
txn_num = 0;
lsnp = &null_lsn;
null_lsn.file = null_lsn.offset = 0;
} else {
- if (TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ if (TAILQ_FIRST(&txnp->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnp)) != 0)
return (ret);
/*
* We need to assign begin_lsn while holding region mutex.
* That assignment is done inside the DbEnv->log_put call,
* so pass in the appropriate memory location to be filled
* in by the log_put code.
- */
- DB_SET_BEGIN_LSNP(txnid, &rlsnp);
- txn_num = txnid->txnid;
- lsnp = &txnid->last_lsn;
+ */
+ DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
+ txn_num = txnp->txnid;
}
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
@@ -85,7 +78,7 @@ __fop_create_log(dbenv, txnid, ret_lsnp, flags,
logrec.size += npad;
}
- if (is_durable || txnid == NULL) {
+ if (is_durable || txnp == NULL) {
if ((ret =
__os_malloc(dbenv, logrec.size, &logrec.data)) != 0)
return (ret);
@@ -136,12 +129,13 @@ __fop_create_log(dbenv, txnid, ret_lsnp, flags,
memcpy(bp, &uinttmp, sizeof(uinttmp));
bp += sizeof(uinttmp);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+ DB_ASSERT(dbenv,
+ (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
- if (is_durable || txnid == NULL) {
+ if (is_durable || txnp == NULL) {
if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnid != NULL) {
- txnid->last_lsn = *rlsnp;
+ flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
+ *lsnp = *rlsnp;
if (rlsnp != ret_lsnp)
*ret_lsnp = *rlsnp;
}
@@ -160,20 +154,21 @@ __fop_create_log(dbenv, txnid, ret_lsnp, flags,
#else
ret = 0;
#endif
- STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+ STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
+ F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
LSN_NOT_LOGGED(*ret_lsnp);
}
#ifdef LOG_DIAGNOSTIC
if (ret != 0)
(void)__fop_create_print(dbenv,
- (DBT *)&logrec, ret_lsnp, NULL, NULL);
+ (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
#endif
#ifdef DIAGNOSTIC
__os_free(dbenv, logrec.data);
#else
- if (is_durable || txnid == NULL)
+ if (is_durable || txnp == NULL)
__os_free(dbenv, logrec.data);
#endif
return (ret);
@@ -197,13 +192,14 @@ __fop_create_read(dbenv, recbuf, argpp)
sizeof(__fop_create_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
bp = recbuf;
- argp->txnid = (DB_TXN *)&argp[1];
+ argp->txnp = (DB_TXN *)&argp[1];
+ memset(argp->txnp, 0, sizeof(DB_TXN));
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid));
+ bp += sizeof(argp->txnp->txnid);
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
@@ -231,10 +227,10 @@ __fop_create_read(dbenv, recbuf, argpp)
* PUBLIC: u_int32_t, const DBT *, const DBT *, u_int32_t));
*/
int
-__fop_remove_log(dbenv, txnid, ret_lsnp, flags,
+__fop_remove_log(dbenv, txnp, ret_lsnp, flags,
name, fid, appname)
DB_ENV *dbenv;
- DB_TXN *txnid;
+ DB_TXN *txnp;
DB_LSN *ret_lsnp;
u_int32_t flags;
const DBT *name;
@@ -258,29 +254,30 @@ __fop_remove_log(dbenv, txnid, ret_lsnp, flags,
ret = 0;
if (LF_ISSET(DB_LOG_NOT_DURABLE)) {
- if (txnid == NULL)
+ if (txnp == NULL)
+ return (0);
+ if (txnp == NULL)
return (0);
is_durable = 0;
} else
is_durable = 1;
- if (txnid == NULL) {
+ if (txnp == NULL) {
txn_num = 0;
lsnp = &null_lsn;
null_lsn.file = null_lsn.offset = 0;
} else {
- if (TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ if (TAILQ_FIRST(&txnp->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnp)) != 0)
return (ret);
/*
* We need to assign begin_lsn while holding region mutex.
* That assignment is done inside the DbEnv->log_put call,
* so pass in the appropriate memory location to be filled
* in by the log_put code.
- */
- DB_SET_BEGIN_LSNP(txnid, &rlsnp);
- txn_num = txnid->txnid;
- lsnp = &txnid->last_lsn;
+ */
+ DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
+ txn_num = txnp->txnid;
}
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
@@ -293,7 +290,7 @@ __fop_remove_log(dbenv, txnid, ret_lsnp, flags,
logrec.size += npad;
}
- if (is_durable || txnid == NULL) {
+ if (is_durable || txnp == NULL) {
if ((ret =
__os_malloc(dbenv, logrec.size, &logrec.data)) != 0)
return (ret);
@@ -351,12 +348,13 @@ __fop_remove_log(dbenv, txnid, ret_lsnp, flags,
memcpy(bp, &uinttmp, sizeof(uinttmp));
bp += sizeof(uinttmp);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+ DB_ASSERT(dbenv,
+ (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
- if (is_durable || txnid == NULL) {
+ if (is_durable || txnp == NULL) {
if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnid != NULL) {
- txnid->last_lsn = *rlsnp;
+ flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
+ *lsnp = *rlsnp;
if (rlsnp != ret_lsnp)
*ret_lsnp = *rlsnp;
}
@@ -375,20 +373,21 @@ __fop_remove_log(dbenv, txnid, ret_lsnp, flags,
#else
ret = 0;
#endif
- STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+ STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
+ F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
LSN_NOT_LOGGED(*ret_lsnp);
}
#ifdef LOG_DIAGNOSTIC
if (ret != 0)
(void)__fop_remove_print(dbenv,
- (DBT *)&logrec, ret_lsnp, NULL, NULL);
+ (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
#endif
#ifdef DIAGNOSTIC
__os_free(dbenv, logrec.data);
#else
- if (is_durable || txnid == NULL)
+ if (is_durable || txnp == NULL)
__os_free(dbenv, logrec.data);
#endif
return (ret);
@@ -412,13 +411,14 @@ __fop_remove_read(dbenv, recbuf, argpp)
sizeof(__fop_remove_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
bp = recbuf;
- argp->txnid = (DB_TXN *)&argp[1];
+ argp->txnp = (DB_TXN *)&argp[1];
+ memset(argp->txnp, 0, sizeof(DB_TXN));
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid));
+ bp += sizeof(argp->txnp->txnid);
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
@@ -449,11 +449,11 @@ __fop_remove_read(dbenv, recbuf, argpp)
* PUBLIC: u_int32_t, const DBT *, u_int32_t));
*/
int
-__fop_write_log(dbenv, txnid, ret_lsnp, flags,
+__fop_write_log(dbenv, txnp, ret_lsnp, flags,
name, appname, pgsize, pageno, offset, page,
flag)
DB_ENV *dbenv;
- DB_TXN *txnid;
+ DB_TXN *txnp;
DB_LSN *ret_lsnp;
u_int32_t flags;
const DBT *name;
@@ -481,29 +481,30 @@ __fop_write_log(dbenv, txnid, ret_lsnp, flags,
ret = 0;
if (LF_ISSET(DB_LOG_NOT_DURABLE)) {
- if (txnid == NULL)
+ if (txnp == NULL)
+ return (0);
+ if (txnp == NULL)
return (0);
is_durable = 0;
} else
is_durable = 1;
- if (txnid == NULL) {
+ if (txnp == NULL) {
txn_num = 0;
lsnp = &null_lsn;
null_lsn.file = null_lsn.offset = 0;
} else {
- if (TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ if (TAILQ_FIRST(&txnp->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnp)) != 0)
return (ret);
/*
* We need to assign begin_lsn while holding region mutex.
* That assignment is done inside the DbEnv->log_put call,
* so pass in the appropriate memory location to be filled
* in by the log_put code.
- */
- DB_SET_BEGIN_LSNP(txnid, &rlsnp);
- txn_num = txnid->txnid;
- lsnp = &txnid->last_lsn;
+ */
+ DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
+ txn_num = txnp->txnid;
}
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
@@ -520,7 +521,7 @@ __fop_write_log(dbenv, txnid, ret_lsnp, flags,
logrec.size += npad;
}
- if (is_durable || txnid == NULL) {
+ if (is_durable || txnp == NULL) {
if ((ret =
__os_malloc(dbenv, logrec.size, &logrec.data)) != 0)
return (ret);
@@ -594,12 +595,13 @@ __fop_write_log(dbenv, txnid, ret_lsnp, flags,
memcpy(bp, &uinttmp, sizeof(uinttmp));
bp += sizeof(uinttmp);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+ DB_ASSERT(dbenv,
+ (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
- if (is_durable || txnid == NULL) {
+ if (is_durable || txnp == NULL) {
if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnid != NULL) {
- txnid->last_lsn = *rlsnp;
+ flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
+ *lsnp = *rlsnp;
if (rlsnp != ret_lsnp)
*ret_lsnp = *rlsnp;
}
@@ -618,20 +620,21 @@ __fop_write_log(dbenv, txnid, ret_lsnp, flags,
#else
ret = 0;
#endif
- STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+ STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
+ F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
LSN_NOT_LOGGED(*ret_lsnp);
}
#ifdef LOG_DIAGNOSTIC
if (ret != 0)
(void)__fop_write_print(dbenv,
- (DBT *)&logrec, ret_lsnp, NULL, NULL);
+ (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
#endif
#ifdef DIAGNOSTIC
__os_free(dbenv, logrec.data);
#else
- if (is_durable || txnid == NULL)
+ if (is_durable || txnp == NULL)
__os_free(dbenv, logrec.data);
#endif
return (ret);
@@ -655,13 +658,14 @@ __fop_write_read(dbenv, recbuf, argpp)
sizeof(__fop_write_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
bp = recbuf;
- argp->txnid = (DB_TXN *)&argp[1];
+ argp->txnp = (DB_TXN *)&argp[1];
+ memset(argp->txnp, 0, sizeof(DB_TXN));
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid));
+ bp += sizeof(argp->txnp->txnid);
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
@@ -707,10 +711,10 @@ __fop_write_read(dbenv, recbuf, argpp)
* PUBLIC: u_int32_t, const DBT *, const DBT *, const DBT *, u_int32_t));
*/
int
-__fop_rename_log(dbenv, txnid, ret_lsnp, flags,
+__fop_rename_log(dbenv, txnp, ret_lsnp, flags,
oldname, newname, fileid, appname)
DB_ENV *dbenv;
- DB_TXN *txnid;
+ DB_TXN *txnp;
DB_LSN *ret_lsnp;
u_int32_t flags;
const DBT *oldname;
@@ -735,29 +739,30 @@ __fop_rename_log(dbenv, txnid, ret_lsnp, flags,
ret = 0;
if (LF_ISSET(DB_LOG_NOT_DURABLE)) {
- if (txnid == NULL)
+ if (txnp == NULL)
+ return (0);
+ if (txnp == NULL)
return (0);
is_durable = 0;
} else
is_durable = 1;
- if (txnid == NULL) {
+ if (txnp == NULL) {
txn_num = 0;
lsnp = &null_lsn;
null_lsn.file = null_lsn.offset = 0;
} else {
- if (TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ if (TAILQ_FIRST(&txnp->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnp)) != 0)
return (ret);
/*
* We need to assign begin_lsn while holding region mutex.
* That assignment is done inside the DbEnv->log_put call,
* so pass in the appropriate memory location to be filled
* in by the log_put code.
- */
- DB_SET_BEGIN_LSNP(txnid, &rlsnp);
- txn_num = txnid->txnid;
- lsnp = &txnid->last_lsn;
+ */
+ DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
+ txn_num = txnp->txnid;
}
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
@@ -771,7 +776,7 @@ __fop_rename_log(dbenv, txnid, ret_lsnp, flags,
logrec.size += npad;
}
- if (is_durable || txnid == NULL) {
+ if (is_durable || txnp == NULL) {
if ((ret =
__os_malloc(dbenv, logrec.size, &logrec.data)) != 0)
return (ret);
@@ -840,12 +845,13 @@ __fop_rename_log(dbenv, txnid, ret_lsnp, flags,
memcpy(bp, &uinttmp, sizeof(uinttmp));
bp += sizeof(uinttmp);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+ DB_ASSERT(dbenv,
+ (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
- if (is_durable || txnid == NULL) {
+ if (is_durable || txnp == NULL) {
if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnid != NULL) {
- txnid->last_lsn = *rlsnp;
+ flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
+ *lsnp = *rlsnp;
if (rlsnp != ret_lsnp)
*ret_lsnp = *rlsnp;
}
@@ -864,20 +870,21 @@ __fop_rename_log(dbenv, txnid, ret_lsnp, flags,
#else
ret = 0;
#endif
- STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+ STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
+ F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
LSN_NOT_LOGGED(*ret_lsnp);
}
#ifdef LOG_DIAGNOSTIC
if (ret != 0)
(void)__fop_rename_print(dbenv,
- (DBT *)&logrec, ret_lsnp, NULL, NULL);
+ (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
#endif
#ifdef DIAGNOSTIC
__os_free(dbenv, logrec.data);
#else
- if (is_durable || txnid == NULL)
+ if (is_durable || txnp == NULL)
__os_free(dbenv, logrec.data);
#endif
return (ret);
@@ -901,13 +908,14 @@ __fop_rename_read(dbenv, recbuf, argpp)
sizeof(__fop_rename_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
bp = recbuf;
- argp->txnid = (DB_TXN *)&argp[1];
+ argp->txnp = (DB_TXN *)&argp[1];
+ memset(argp->txnp, 0, sizeof(DB_TXN));
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid));
+ bp += sizeof(argp->txnp->txnid);
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
@@ -944,10 +952,10 @@ __fop_rename_read(dbenv, recbuf, argpp)
* PUBLIC: u_int32_t, u_int32_t));
*/
int
-__fop_file_remove_log(dbenv, txnid, ret_lsnp, flags,
+__fop_file_remove_log(dbenv, txnp, ret_lsnp, flags,
real_fid, tmp_fid, name, appname, child)
DB_ENV *dbenv;
- DB_TXN *txnid;
+ DB_TXN *txnp;
DB_LSN *ret_lsnp;
u_int32_t flags;
const DBT *real_fid;
@@ -973,29 +981,30 @@ __fop_file_remove_log(dbenv, txnid, ret_lsnp, flags,
ret = 0;
if (LF_ISSET(DB_LOG_NOT_DURABLE)) {
- if (txnid == NULL)
+ if (txnp == NULL)
+ return (0);
+ if (txnp == NULL)
return (0);
is_durable = 0;
} else
is_durable = 1;
- if (txnid == NULL) {
+ if (txnp == NULL) {
txn_num = 0;
lsnp = &null_lsn;
null_lsn.file = null_lsn.offset = 0;
} else {
- if (TAILQ_FIRST(&txnid->kids) != NULL &&
- (ret = __txn_activekids(dbenv, rectype, txnid)) != 0)
+ if (TAILQ_FIRST(&txnp->kids) != NULL &&
+ (ret = __txn_activekids(dbenv, rectype, txnp)) != 0)
return (ret);
/*
* We need to assign begin_lsn while holding region mutex.
* That assignment is done inside the DbEnv->log_put call,
* so pass in the appropriate memory location to be filled
* in by the log_put code.
- */
- DB_SET_BEGIN_LSNP(txnid, &rlsnp);
- txn_num = txnid->txnid;
- lsnp = &txnid->last_lsn;
+ */
+ DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp);
+ txn_num = txnp->txnid;
}
logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
@@ -1010,7 +1019,7 @@ __fop_file_remove_log(dbenv, txnid, ret_lsnp, flags,
logrec.size += npad;
}
- if (is_durable || txnid == NULL) {
+ if (is_durable || txnp == NULL) {
if ((ret =
__os_malloc(dbenv, logrec.size, &logrec.data)) != 0)
return (ret);
@@ -1083,12 +1092,13 @@ __fop_file_remove_log(dbenv, txnid, ret_lsnp, flags,
memcpy(bp, &uinttmp, sizeof(uinttmp));
bp += sizeof(uinttmp);
- DB_ASSERT((u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
+ DB_ASSERT(dbenv,
+ (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size);
- if (is_durable || txnid == NULL) {
+ if (is_durable || txnp == NULL) {
if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec,
- flags | DB_LOG_NOCOPY)) == 0 && txnid != NULL) {
- txnid->last_lsn = *rlsnp;
+ flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) {
+ *lsnp = *rlsnp;
if (rlsnp != ret_lsnp)
*ret_lsnp = *rlsnp;
}
@@ -1107,20 +1117,21 @@ __fop_file_remove_log(dbenv, txnid, ret_lsnp, flags,
#else
ret = 0;
#endif
- STAILQ_INSERT_HEAD(&txnid->logs, lr, links);
+ STAILQ_INSERT_HEAD(&txnp->logs, lr, links);
+ F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY);
LSN_NOT_LOGGED(*ret_lsnp);
}
#ifdef LOG_DIAGNOSTIC
if (ret != 0)
(void)__fop_file_remove_print(dbenv,
- (DBT *)&logrec, ret_lsnp, NULL, NULL);
+ (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL);
#endif
#ifdef DIAGNOSTIC
__os_free(dbenv, logrec.data);
#else
- if (is_durable || txnid == NULL)
+ if (is_durable || txnp == NULL)
__os_free(dbenv, logrec.data);
#endif
return (ret);
@@ -1145,13 +1156,14 @@ __fop_file_remove_read(dbenv, recbuf, argpp)
sizeof(__fop_file_remove_args) + sizeof(DB_TXN), &argp)) != 0)
return (ret);
bp = recbuf;
- argp->txnid = (DB_TXN *)&argp[1];
+ argp->txnp = (DB_TXN *)&argp[1];
+ memset(argp->txnp, 0, sizeof(DB_TXN));
memcpy(&argp->type, bp, sizeof(argp->type));
bp += sizeof(argp->type);
- memcpy(&argp->txnid->txnid, bp, sizeof(argp->txnid->txnid));
- bp += sizeof(argp->txnid->txnid);
+ memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid));
+ bp += sizeof(argp->txnp->txnid);
memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
bp += sizeof(DB_LSN);
diff --git a/db/fileops/fileops_autop.c b/db/fileops/fileops_autop.c
index 970b0c63b..e19167691 100644
--- a/db/fileops/fileops_autop.c
+++ b/db/fileops/fileops_autop.c
@@ -2,17 +2,9 @@
#include "db_config.h"
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <ctype.h>
-#include <string.h>
-#endif
-
#include "db_int.h"
#include "dbinc/crypto.h"
#include "dbinc/db_page.h"
-#include "dbinc/db_dispatch.h"
#include "dbinc/db_am.h"
#include "dbinc/log.h"
#include "dbinc/txn.h"
@@ -35,20 +27,18 @@ __fop_create_print(dbenv, dbtp, lsnp, notused2, notused3)
int ch;
int ret;
- notused2 = DB_TXN_ABORT;
+ notused2 = DB_TXN_PRINT;
notused3 = NULL;
if ((ret = __fop_create_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
(void)printf(
- "[%lu][%lu]__fop_create%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
+ "[%lu][%lu]__fop_create%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file, (u_long)lsnp->offset,
(argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
+ (u_long)argp->txnp->txnid,
+ (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
(void)printf("\tname: ");
for (i = 0; i < argp->name.size; i++) {
ch = ((u_int8_t *)argp->name.data)[i];
@@ -79,20 +69,18 @@ __fop_remove_print(dbenv, dbtp, lsnp, notused2, notused3)
int ch;
int ret;
- notused2 = DB_TXN_ABORT;
+ notused2 = DB_TXN_PRINT;
notused3 = NULL;
if ((ret = __fop_remove_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
(void)printf(
- "[%lu][%lu]__fop_remove%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
+ "[%lu][%lu]__fop_remove%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file, (u_long)lsnp->offset,
(argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
+ (u_long)argp->txnp->txnid,
+ (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
(void)printf("\tname: ");
for (i = 0; i < argp->name.size; i++) {
ch = ((u_int8_t *)argp->name.data)[i];
@@ -128,20 +116,18 @@ __fop_write_print(dbenv, dbtp, lsnp, notused2, notused3)
int ch;
int ret;
- notused2 = DB_TXN_ABORT;
+ notused2 = DB_TXN_PRINT;
notused3 = NULL;
if ((ret = __fop_write_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
(void)printf(
- "[%lu][%lu]__fop_write%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
+ "[%lu][%lu]__fop_write%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file, (u_long)lsnp->offset,
(argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
+ (u_long)argp->txnp->txnid,
+ (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
(void)printf("\tname: ");
for (i = 0; i < argp->name.size; i++) {
ch = ((u_int8_t *)argp->name.data)[i];
@@ -181,20 +167,18 @@ __fop_rename_print(dbenv, dbtp, lsnp, notused2, notused3)
int ch;
int ret;
- notused2 = DB_TXN_ABORT;
+ notused2 = DB_TXN_PRINT;
notused3 = NULL;
if ((ret = __fop_rename_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
(void)printf(
- "[%lu][%lu]__fop_rename%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
+ "[%lu][%lu]__fop_rename%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file, (u_long)lsnp->offset,
(argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
+ (u_long)argp->txnp->txnid,
+ (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
(void)printf("\toldname: ");
for (i = 0; i < argp->oldname.size; i++) {
ch = ((u_int8_t *)argp->oldname.data)[i];
@@ -236,20 +220,18 @@ __fop_file_remove_print(dbenv, dbtp, lsnp, notused2, notused3)
int ch;
int ret;
- notused2 = DB_TXN_ABORT;
+ notused2 = DB_TXN_PRINT;
notused3 = NULL;
if ((ret = __fop_file_remove_read(dbenv, dbtp->data, &argp)) != 0)
return (ret);
(void)printf(
- "[%lu][%lu]__fop_file_remove%s: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
- (u_long)lsnp->file,
- (u_long)lsnp->offset,
+ "[%lu][%lu]__fop_file_remove%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n",
+ (u_long)lsnp->file, (u_long)lsnp->offset,
(argp->type & DB_debug_FLAG) ? "_debug" : "",
(u_long)argp->type,
- (u_long)argp->txnid->txnid,
- (u_long)argp->prev_lsn.file,
- (u_long)argp->prev_lsn.offset);
+ (u_long)argp->txnp->txnid,
+ (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset);
(void)printf("\treal_fid: ");
for (i = 0; i < argp->real_fid.size; i++) {
ch = ((u_int8_t *)argp->real_fid.data)[i];
diff --git a/db/fileops/fop_basic.c b/db/fileops/fop_basic.c
index 36a958e95..9563ddbc1 100644
--- a/db/fileops/fop_basic.c
+++ b/db/fileops/fop_basic.c
@@ -1,22 +1,16 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2001-2004
- * Sleepycat Software. All rights reserved.
+ * Copyright (c) 2001-2006
+ * Oracle Corporation. All rights reserved.
*
- * $Id: fop_basic.c,v 1.32 2004/11/15 20:04:50 bostic Exp $
+ * $Id: fop_basic.c,v 12.19 2006/09/19 15:06:59 bostic Exp $
*/
#include "db_config.h"
-#ifndef NO_SYSTEM_INCLUDES
-#include <string.h>
-#include <sys/types.h>
-#endif
-
#include "db_int.h"
#include "dbinc/db_page.h"
-#include "dbinc/db_shash.h"
#include "dbinc/fop.h"
#include "dbinc/log.h"
#include "dbinc/mp.h"
@@ -24,9 +18,33 @@
#include "dbinc/db_am.h"
/*
- * This file implements the basic file-level operations. This code
- * ought to be fairly independent of DB, other than through its
- * error-reporting mechanism.
+ * The transactional guarantees Berkeley DB provides for file
+ * system level operations (database physical file create, delete,
+ * rename) are based on our understanding of current file system
+ * semantics; a system that does not provide these semantics and
+ * guarantees could be in danger.
+ *
+ * First, as in standard database changes, fsync and fdatasync must
+ * work: when applied to the log file, the records written into the
+ * log must be transferred to stable storage.
+ *
+ * Second, it must not be possible for the log file to be removed
+ * without previous file system level operations being flushed to
+ * stable storage. Berkeley DB applications write log records
+ * describing file system operations into the log, then perform the
+ * file system operation, then commit the enclosing transaction
+ * (which flushes the log file to stable storage). Subsequently,
+ * a database environment checkpoint may make it possible for the
+ * application to remove the log file containing the record of the
+ * file system operation. DB's transactional guarantees for file
+ * system operations require the log file removal not succeed until
+ * all previous filesystem operations have been flushed to stable
+ * storage. In other words, the flush of the log file, or the
+ * removal of the log file, must block until all previous
+ * filesystem operations have been flushed to stable storage. This
+ * semantic is not, as far as we know, required by any existing
+ * standards document, but we have never seen a filesystem where
+ * it does not apply.
*/
/*
@@ -55,20 +73,20 @@ __fop_create(dbenv, txn, fhpp, name, appname, mode, flags)
char *real_name;
real_name = NULL;
+ fhp = NULL;
if ((ret =
__db_appname(dbenv, appname, name, 0, NULL, &real_name)) != 0)
return (ret);
if (mode == 0)
- mode = __db_omode("rw----");
+ mode = __db_omode(OWNER_RW);
if (DBENV_LOGGING(dbenv)) {
- memset(&data, 0, sizeof(data));
- data.data = (void *)name;
- data.size = (u_int32_t)strlen(name) + 1;
+ DB_INIT_DBT(data, name, strlen(name) + 1);
if ((ret = __fop_create_log(dbenv, txn, &lsn,
- flags | DB_FLUSH, &data, (u_int32_t)appname, mode)) != 0)
+ flags | DB_FLUSH,
+ &data, (u_int32_t)appname, (u_int32_t)mode)) != 0)
goto err;
}
@@ -115,23 +133,21 @@ __fop_remove(dbenv, txn, fileid, name, appname, flags)
__db_appname(dbenv, appname, name, 0, NULL, &real_name)) != 0)
goto err;
- if (txn == NULL) {
+ if (!IS_REAL_TXN(txn)) {
if (fileid != NULL && (ret = __memp_nameop(
- dbenv, fileid, NULL, real_name, NULL)) != 0)
+ dbenv, fileid, NULL, real_name, NULL, 0)) != 0)
goto err;
} else {
if (DBENV_LOGGING(dbenv)) {
memset(&fdbt, 0, sizeof(ndbt));
fdbt.data = fileid;
fdbt.size = fileid == NULL ? 0 : DB_FILE_ID_LEN;
- memset(&ndbt, 0, sizeof(ndbt));
- ndbt.data = (void *)name;
- ndbt.size = (u_int32_t)strlen(name) + 1;
- if ((ret = __fop_remove_log(dbenv,
- txn, &lsn, flags, &ndbt, &fdbt, appname)) != 0)
+ DB_INIT_DBT(ndbt, name, strlen(name) + 1);
+ if ((ret = __fop_remove_log(dbenv, txn, &lsn,
+ flags, &ndbt, &fdbt, (u_int32_t)appname)) != 0)
goto err;
}
- ret = __txn_remevent(dbenv, txn, real_name, fileid);
+ ret = __txn_remevent(dbenv, txn, real_name, fileid, 0);
}
err: if (real_name != NULL)
@@ -176,7 +192,7 @@ __fop_write(dbenv,
int local_open, ret, t_ret;
char *real_name;
- DB_ASSERT(istmp != 0);
+ DB_ASSERT(dbenv, istmp != 0);
ret = local_open = 0;
real_name = NULL;
@@ -189,11 +205,10 @@ __fop_write(dbenv,
memset(&data, 0, sizeof(data));
data.data = buf;
data.size = size;
- memset(&namedbt, 0, sizeof(namedbt));
- namedbt.data = (void *)name;
- namedbt.size = (u_int32_t)strlen(name) + 1;
- if ((ret = __fop_write_log(dbenv, txn, &lsn, flags,
- &namedbt, appname, pgsize, pageno, off, &data, istmp)) != 0)
+ DB_INIT_DBT(namedbt, name, strlen(name) + 1);
+ if ((ret = __fop_write_log(dbenv, txn,
+ &lsn, flags, &namedbt, (u_int32_t)appname,
+ pgsize, pageno, off, &data, istmp)) != 0)
goto err;
}
@@ -205,8 +220,7 @@ __fop_write(dbenv,
}
/* Seek to offset. */
- if ((ret = __os_seek(dbenv,
- fhp, pgsize, pageno, off, 0, DB_OS_SEEK_SET)) != 0)
+ if ((ret = __os_seek(dbenv, fhp, pageno, pgsize, off)) != 0)
goto err;
/* Now do the write. */
@@ -251,13 +265,9 @@ __fop_rename(dbenv, txn, oldname, newname, fid, appname, flags)
goto err;
if (DBENV_LOGGING(dbenv)) {
- memset(&old, 0, sizeof(old));
- memset(&new, 0, sizeof(new));
+ DB_INIT_DBT(old, oldname, strlen(oldname) + 1);
+ DB_INIT_DBT(new, newname, strlen(newname) + 1);
memset(&fiddbt, 0, sizeof(fiddbt));
- old.data = (void *)oldname;
- old.size = (u_int32_t)strlen(oldname) + 1;
- new.data = (void *)newname;
- new.size = (u_int32_t)strlen(newname) + 1;
fiddbt.data = fid;
fiddbt.size = DB_FILE_ID_LEN;
if ((ret = __fop_rename_log(dbenv, txn, &lsn, flags | DB_FLUSH,
@@ -265,7 +275,7 @@ __fop_rename(dbenv, txn, oldname, newname, fid, appname, flags)
goto err;
}
- ret = __memp_nameop(dbenv, fid, newname, o, n);
+ ret = __memp_nameop(dbenv, fid, newname, o, n, 0);
err: if (o != NULL)
__os_free(dbenv, o);
diff --git a/db/fileops/fop_rec.c b/db/fileops/fop_rec.c
index a9326d532..eced8fd39 100644
--- a/db/fileops/fop_rec.c
+++ b/db/fileops/fop_rec.c
@@ -1,29 +1,52 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2001-2004
- * Sleepycat Software. All rights reserved.
+ * Copyright (c) 2001-2006
+ * Oracle Corporation. All rights reserved.
*
- * $Id: fop_rec.c,v 1.31 2004/09/22 03:45:25 bostic Exp $
+ * $Id: fop_rec.c,v 12.12 2006/08/24 14:46:03 bostic Exp $
*/
#include "db_config.h"
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <string.h>
-#endif
-
#include "db_int.h"
#include "dbinc/db_page.h"
-#include "dbinc/db_shash.h"
#include "dbinc/fop.h"
#include "dbinc/db_am.h"
#include "dbinc/mp.h"
#include "dbinc/txn.h"
/*
+ * The transactional guarantees Berkeley DB provides for file
+ * system level operations (database physical file create, delete,
+ * rename) are based on our understanding of current file system
+ * semantics; a system that does not provide these semantics and
+ * guarantees could be in danger.
+ *
+ * First, as in standard database changes, fsync and fdatasync must
+ * work: when applied to the log file, the records written into the
+ * log must be transferred to stable storage.
+ *
+ * Second, it must not be possible for the log file to be removed
+ * without previous file system level operations being flushed to
+ * stable storage. Berkeley DB applications write log records
+ * describing file system operations into the log, then perform the
+ * file system operation, then commit the enclosing transaction
+ * (which flushes the log file to stable storage). Subsequently,
+ * a database environment checkpoint may make it possible for the
+ * application to remove the log file containing the record of the
+ * file system operation. DB's transactional guarantees for file
+ * system operations require the log file removal not succeed until
+ * all previous filesystem operations have been flushed to stable
+ * storage. In other words, the flush of the log file, or the
+ * removal of the log file, must block until all previous
+ * filesystem operations have been flushed to stable storage. This
+ * semantic is not, as far as we know, required by any existing
+ * standards document, but we have never seen a filesystem where
+ * it does not apply.
+ */
+
+/*
* __fop_create_recover --
* Recovery function for create.
*
@@ -56,7 +79,7 @@ __fop_create_recover(dbenv, dbtp, lsnp, op, info)
(void)__os_unlink(dbenv, real_name);
else if (DB_REDO(op)) {
if ((ret = __os_open(dbenv, real_name,
- DB_OSO_CREATE | DB_OSO_EXCL, argp->mode, &fhp)) == 0)
+ DB_OSO_CREATE | DB_OSO_EXCL, (int)argp->mode, &fhp)) == 0)
(void)__os_closehandle(dbenv, fhp);
else
goto out;
@@ -101,7 +124,7 @@ __fop_remove_recover(dbenv, dbtp, lsnp, op, info)
/* Its ok if the file is not there. */
if (DB_REDO(op))
(void)__memp_nameop(dbenv,
- (u_int8_t *)argp->fid.data, NULL, real_name, NULL);
+ (u_int8_t *)argp->fid.data, NULL, real_name, NULL, 0);
*lsnp = argp->prev_lsn;
out: if (real_name != NULL)
@@ -133,10 +156,10 @@ __fop_write_recover(dbenv, dbtp, lsnp, op, info)
ret = 0;
if (DB_UNDO(op))
- DB_ASSERT(argp->flag != 0);
+ DB_ASSERT(dbenv, argp->flag != 0);
else if (DB_REDO(op))
ret = __fop_write(dbenv,
- argp->txnid, argp->name.data, argp->appname,
+ argp->txnp, argp->name.data, (APPNAME)argp->appname,
NULL, argp->pgsize, argp->pageno, argp->offset,
argp->page.data, argp->page.size, argp->flag, 0);
@@ -209,14 +232,33 @@ __fop_rename_recover(dbenv, dbtp, lsnp, op, info)
goto done;
(void)__os_closehandle(dbenv, fhp);
fhp = NULL;
+ if (DB_REDO(op)) {
+ /*
+ * Check to see if the target file exists. If it
+ * does and it does not have the proper id then
+ * it is a later version. We just remove the source
+ * file since the state of the world is beyond this
+ * point.
+ */
+ if (__os_open(dbenv, real_new, 0, 0, &fhp) == 0 &&
+ __fop_read_meta(dbenv, src, mbuf,
+ DBMETASIZE, fhp, 1, NULL) == 0 &&
+ __db_chk_meta(dbenv, NULL, meta, 1) == 0 &&
+ memcmp(argp->fileid.data,
+ meta->uid, DB_FILE_ID_LEN) != 0) {
+ (void)__memp_nameop(dbenv,
+ fileid, NULL, real_old, NULL, 0);
+ goto done;
+ }
+ }
}
if (DB_UNDO(op))
(void)__memp_nameop(dbenv, fileid,
- (const char *)argp->oldname.data, real_new, real_old);
+ (const char *)argp->oldname.data, real_new, real_old, 0);
if (DB_REDO(op))
(void)__memp_nameop(dbenv, fileid,
- (const char *)argp->newname.data, real_old, real_new);
+ (const char *)argp->newname.data, real_old, real_new, 0);
done: *lsnp = argp->prev_lsn;
out: if (real_new != NULL)
@@ -327,7 +369,7 @@ __fop_file_remove_recover(dbenv, dbtp, lsnp, op, info)
if (cstat == TXN_COMMIT)
(void)__memp_nameop(dbenv,
is_real ? argp->real_fid.data : argp->tmp_fid.data,
- NULL, real_name, NULL);
+ NULL, real_name, NULL, 0);
}
done: *lsnp = argp->prev_lsn;
diff --git a/db/fileops/fop_util.c b/db/fileops/fop_util.c
index 564dc4a36..9da9d4a43 100644
--- a/db/fileops/fop_util.c
+++ b/db/fileops/fop_util.c
@@ -1,25 +1,18 @@
/*-
* See the file LICENSE for redistribution information.
*
- * Copyright (c) 2001-2004
- * Sleepycat Software. All rights reserved.
+ * Copyright (c) 2001-2006
+ * Oracle Corporation. All rights reserved.
*
- * $Id: fop_util.c,v 1.104 2004/09/24 00:43:18 bostic Exp $
+ * $Id: fop_util.c,v 12.36 2006/09/19 15:06:59 bostic Exp $
*/
#include "db_config.h"
-#ifndef NO_SYSTEM_INCLUDES
-#include <sys/types.h>
-
-#include <stdlib.h>
-#include <string.h>
-#endif
-
#include "db_int.h"
#include "dbinc/db_page.h"
-#include "dbinc/db_shash.h"
#include "dbinc/db_am.h"
+#include "dbinc/hash.h"
#include "dbinc/fop.h"
#include "dbinc/lock.h"
#include "dbinc/mp.h"
@@ -27,6 +20,15 @@
#include "dbinc/txn.h"
static int __fop_set_pgsize __P((DB *, DB_FH *, const char *));
+static int __fop_inmem_create __P((DB *, const char *, DB_TXN *, u_int32_t));
+static int __fop_inmem_dummy __P((DB *, DB_TXN *, const char *, u_int8_t *));
+static int __fop_inmem_read_meta __P((DB *, DB_TXN *, const char *, u_int32_t));
+static int __fop_inmem_swap __P((DB *, DB *, DB_TXN *,
+ const char *, const char *, const char *, u_int32_t));
+static int __fop_ondisk_dummy __P((DB *,
+ DB_TXN *, const char *, u_int8_t *, u_int32_t));
+static int __fop_ondisk_swap __P((DB *, DB *, DB_TXN *,
+ const char *, const char *, const char *, u_int32_t, u_int32_t));
/*
* Acquire the environment meta-data lock. The parameters are the
@@ -56,6 +58,14 @@ static int __fop_set_pgsize __P((DB *, DB_FH *, const char *));
} while (0)
#endif
+#define RESET_MPF(D, F) do { \
+ (void)__memp_fclose((D)->mpf, (F)); \
+ (D)->mpf = NULL; \
+ F_CLR((D), DB_AM_OPEN_CALLED); \
+ if ((ret = __memp_fcreate((D)->dbenv, &(D)->mpf)) != 0) \
+ goto err; \
+} while (0)
+
/*
* If we open a file handle and our caller is doing fcntl(2) locking,
* we can't close the handle because that would discard the caller's
@@ -109,7 +119,7 @@ __fop_lock_handle(dbenv, dbp, locker, mode, elockp, flags)
* doing is on the global environment.
*/
if (IS_RECOVERING(dbenv))
- return (elockp == NULL ? 0 : __ENV_LPUT(dbenv, *elockp, 0));
+ return (elockp == NULL ? 0 : __ENV_LPUT(dbenv, *elockp));
memcpy(lock_desc.fileid, dbp->fileid, DB_FILE_ID_LEN);
lock_desc.pgno = dbp->meta_pgno;
@@ -199,24 +209,28 @@ __fop_file_setup(dbp, txn, name, mode, flags, retidp)
DB_FH *fhp;
DB_LOCK elock;
DB_TXN *stxn;
+ DBTYPE save_type;
size_t len;
u_int32_t dflags, locker, oflags;
u_int8_t mbuf[DBMETASIZE];
- int created_locker, ret, retries, t_ret, tmp_created, truncating;
+ int created_locker, create_ok, ret, retries, t_ret, tmp_created;
+ int truncating, was_inval;
char *real_name, *real_tmpname, *tmpname;
- DB_ASSERT(name != NULL);
-
*retidp = TXN_INVALID;
dbenv = dbp->dbenv;
fhp = NULL;
LOCK_INIT(elock);
stxn = NULL;
- created_locker = tmp_created = truncating = 0;
+ created_locker = tmp_created = truncating = was_inval = 0;
real_name = real_tmpname = tmpname = NULL;
dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0;
+ ret = 0;
+ retries = 0;
+ save_type = dbp->type;
+
/*
* Get a lockerid for this handle. There are paths through queue
* rename and remove where this dbp already has a locker, so make
@@ -226,7 +240,7 @@ __fop_file_setup(dbp, txn, name, mode, flags, retidp)
!F_ISSET(dbp, DB_AM_COMPENSATE) &&
!F_ISSET(dbp, DB_AM_RECOVER) &&
dbp->lid == DB_LOCK_INVALIDID) {
- if ((ret = __lock_id(dbenv, &dbp->lid)) != 0)
+ if ((ret = __lock_id(dbenv, &dbp->lid, NULL)) != 0)
goto err;
created_locker = 1;
}
@@ -234,21 +248,29 @@ __fop_file_setup(dbp, txn, name, mode, flags, retidp)
locker = txn == NULL ? dbp->lid : txn->txnid;
- /* Get the real backing file name. */
- if ((ret = __db_appname(dbenv,
- DB_APP_DATA, name, 0, NULL, &real_name)) != 0)
- goto err;
+ oflags = 0;
+ if (F_ISSET(dbp, DB_AM_INMEM))
+ real_name = (char *)name;
+ else {
+ /* Get the real backing file name. */
+ if ((ret = __db_appname(dbenv,
+ DB_APP_DATA, name, 0, NULL, &real_name)) != 0)
+ goto err;
- /* Fill in the default file mode. */
- if (mode == 0)
- mode = __db_omode("rwrw--");
+ /* Fill in the default file mode. */
+ if (mode == 0)
+ mode = __db_omode("rw-rw----");
+
+ if (LF_ISSET(DB_RDONLY))
+ oflags |= DB_OSO_RDONLY;
+ if (LF_ISSET(DB_TRUNCATE))
+ oflags |= DB_OSO_TRUNC;
+ }
- oflags = 0;
- if (LF_ISSET(DB_RDONLY))
- oflags |= DB_OSO_RDONLY;
- if (LF_ISSET(DB_TRUNCATE))
- oflags |= DB_OSO_TRUNC;
retries = 0;
+ create_ok = LF_ISSET(DB_CREATE);
+ LF_CLR(DB_CREATE);
+
retry:
/*
* If we cannot create the file, only retry a few times. We
@@ -257,13 +279,36 @@ retry:
* a previous crash).
*/
if (++retries > DB_RETRY) {
- __db_err(dbenv, "__fop_file_setup: Retry limit (%d) exceeded",
+ __db_errx(dbenv, "__fop_file_setup: Retry limit (%d) exceeded",
DB_RETRY);
goto err;
}
if (!F_ISSET(dbp, DB_AM_COMPENSATE) && !F_ISSET(dbp, DB_AM_RECOVER))
GET_ENVLOCK(dbenv, locker, &elock);
- if ((ret = __os_exists(real_name, NULL)) == 0) {
+ if (name == NULL)
+ ret = ENOENT;
+ else if (F_ISSET(dbp, DB_AM_INMEM)) {
+ ret = __db_dbenv_mpool(dbp, name, flags);
+ /*
+ * We are using __db_dbenv_open as a check for existence.
+ * However, db_dbenv_mpool does an actual open and there
+ * are scenarios where the object exists, but cannot be
+ * opened, because our settings don't match those internally.
+ * We need to check for that explicitly. We'll need the
+ * mpool open to read the meta-data page, so we're going to
+ * have to temporarily turn this dbp into an UNKNOWN one.
+ */
+ if (ret == EINVAL) {
+ was_inval = 1;
+ save_type = dbp->type;
+ dbp->type = DB_UNKNOWN;
+ ret = __db_dbenv_mpool(dbp, name, flags);
+ dbp->type = save_type;
+ }
+ } else
+ ret = __os_exists(dbenv, real_name, NULL);
+
+ if (ret == 0) {
/*
* If the file exists, there are 5 possible cases:
* 1. DB_EXCL was specified so this is an error, unless
@@ -275,12 +320,14 @@ retry:
* of file it is, we should open/create it.
* 3. It is 0-length, we are not doing transactions (i.e.,
* we are sendmail), we should open/create into it.
+ * -- on-disk files only!
* 4. Is it a Berkeley DB file and we should simply open it.
* 5. It is not a BDB file and we should return an error.
*/
- /* We have to open the file. */
-reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0)
+ /* Open file (if there is one). */
+reopen: if (!F_ISSET(dbp, DB_AM_INMEM) &&
+ (ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0)
goto err;
/* Case 2: DB_TRUNCATE: we must do the creation in place. */
@@ -295,33 +342,43 @@ reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0)
}
/* Cases 1,3-5: we need to read the meta-data page. */
- ret = __fop_read_meta(dbenv, real_name, mbuf, sizeof(mbuf), fhp,
- LF_ISSET(DB_FCNTL_LOCKING) && txn == NULL ? 1 : 0, &len);
-
- /* Case 3: 0-length, no txns. */
- if (ret != 0 && len == 0 && txn == NULL) {
- if (LF_ISSET(DB_EXCL)) {
- /* Case 1b: DB_EXCL and 0-lenth file exists. */
- ret = EEXIST;
- goto err;
+ if (F_ISSET(dbp, DB_AM_INMEM))
+ ret = __fop_inmem_read_meta(dbp, txn, name, flags);
+ else {
+ ret = __fop_read_meta(dbenv, real_name, mbuf,
+ sizeof(mbuf), fhp,
+ LF_ISSET(DB_FCNTL_LOCKING) && txn == NULL ? 1 : 0,
+ &len);
+
+ /* Case 3: 0-length, no txns. */
+ if (ret != 0 && len == 0 && txn == NULL) {
+ if (LF_ISSET(DB_EXCL)) {
+ /*
+ * Case 1b: DB_EXCL and
+ * 0-lenth file exists.
+ */
+ ret = EEXIST;
+ goto err;
+ }
+ tmpname = (char *)name;
+ goto creat2;
}
- tmpname = (char *)name;
- goto creat2;
+
+ /* Case 4: This is a valid file. */
+ if (ret == 0)
+ ret = __db_meta_setup(dbenv, dbp,
+ real_name, (DBMETA *)mbuf, flags, 1);
+
}
/* Case 5: Invalid file. */
if (ret != 0)
goto err;
- /* Case 4: This is a valid file. */
- if ((ret = __db_meta_setup(dbenv,
- dbp, real_name, (DBMETA *)mbuf, flags, 1)) != 0)
- goto err;
-
/* Now, get our handle lock. */
if ((ret = __fop_lock_handle(dbenv,
dbp, locker, DB_LOCK_READ, NULL, DB_LOCK_NOWAIT)) == 0) {
- if ((ret = __ENV_LPUT(dbenv, elock, 0)) != 0)
+ if ((ret = __ENV_LPUT(dbenv, elock)) != 0)
goto err;
} else if (ret != DB_LOCK_NOTGRANTED ||
(txn != NULL && F_ISSET(txn, TXN_NOWAIT)))
@@ -341,28 +398,66 @@ reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0)
* We assert it here to make sure we aren't destroying
* any application level FCNTL semantics.
*/
- DB_ASSERT(!LF_ISSET(DB_FCNTL_LOCKING));
- if ((ret = __os_closehandle(dbenv, fhp)) != 0)
- goto err;
- fhp = NULL;
- ret = __fop_lock_handle(dbenv,
- dbp, locker, DB_LOCK_READ, &elock, 0);
- if (ret == DB_LOCK_NOTEXIST)
- goto retry;
- if (ret != 0)
+ DB_ASSERT(dbenv, !LF_ISSET(DB_FCNTL_LOCKING));
+ if (!F_ISSET(dbp, DB_AM_INMEM)) {
+ if ((ret = __os_closehandle(dbenv, fhp)) != 0)
+ goto err;
+ fhp = NULL;
+ }
+ if ((ret = __fop_lock_handle(dbenv,
+ dbp, locker, DB_LOCK_READ, &elock, 0)) != 0) {
+ if (F_ISSET(dbp, DB_AM_INMEM))
+ RESET_MPF(dbp, 0);
goto err;
+ }
+
/*
- * XXX
- * I need to convince myself that I don't need to
- * re-read the metadata page here. If you do need
- * to re-read it you'd better decrypt it too...
+ * It's possible that our DBP was initialized
+ * with a different file last time we opened it.
+ * Therefore, we need to reset the DBP type and then
+ * re-read the meta-data page and reset any other
+ * fields that __db_meta_setup initializes. We
+ * need to shut down this dbp and reopen for in-memory
+ * named databases. Unfortunately __db_refresh is
+ * pretty aggressive at the shutting down, so we need
+ * to do a bunch of restoration.
+ * XXX it would be nice to pull refresh apart into
+ * the stuff you need to do to call __db_env_mpool
+ * and the stuff you can really throw away.
*/
- if ((ret =
- __os_open(dbenv, real_name, 0, 0, &fhp)) != 0)
+ if (F_ISSET(dbp, DB_AM_INMEM)) {
+ if ((ret = __db_refresh(dbp,
+ txn, DB_NOSYNC, NULL, 1)) != 0)
+ goto err;
+ ret = __db_dbenv_mpool(dbp, name, flags);
+ } else
+ ret = __os_open(dbenv, real_name, 0, 0, &fhp);
+
+ if (ret != 0) {
+ if ((ret =
+ __ENV_LPUT(dbenv, dbp->handle_lock)) != 0) {
+ LOCK_INIT(dbp->handle_lock);
+ goto err;
+ }
+ goto retry;
+ }
+
+ dbp->type = save_type;
+ if (F_ISSET(dbp, DB_AM_INMEM))
+ ret = __fop_inmem_read_meta(dbp,
+ txn, name, flags);
+ else if ((ret =
+ __fop_read_meta(dbenv, real_name, mbuf,
+ sizeof(mbuf), fhp,
+ LF_ISSET(DB_FCNTL_LOCKING) && txn == NULL ? 1 : 0,
+ &len)) != 0 ||
+ (ret = __db_meta_setup(dbenv, dbp, real_name,
+ (DBMETA *)mbuf, flags, 1)) != 0)
goto err;
+
}
- /* If we got here, then we now have the handle lock. */
+ /* If we got here, then we have the handle lock. */
/*
* Check for a file in the midst of a rename. If we find that
@@ -370,12 +465,18 @@ reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0)
* that it is in our current transaction (else we would still
* be blocking), so we can continue along and create a new file
* with the same name. In that case, we have to close the file
- * handle because we reuse it below.
+ * handle because we reuse it below. This is a case where
+ * a 'was_inval' above is OK.
*/
if (F_ISSET(dbp, DB_AM_IN_RENAME)) {
- if (LF_ISSET(DB_CREATE)) {
- if ((ret = __os_closehandle(dbenv, fhp)) != 0)
+ was_inval = 0;
+ if (create_ok) {
+ if (F_ISSET(dbp, DB_AM_INMEM)) {
+ RESET_MPF(dbp, DB_MPOOL_DISCARD);
+ } else if ((ret =
+ __os_closehandle(dbenv, fhp)) != 0)
goto err;
+ LF_SET(DB_CREATE);
goto create;
} else {
ret = ENOENT;
@@ -383,6 +484,12 @@ reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0)
}
}
+ /* If we get here, a was_inval is bad. */
+ if (was_inval) {
+ ret = EINVAL;
+ goto err;
+ }
+
/*
* Now, case 1: check for DB_EXCL, because the file that exists
* is not in the middle of a rename, so we have an error. This
@@ -391,7 +498,7 @@ reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0)
* should not have been allowed to open it.
*/
if (LF_ISSET(DB_EXCL)) {
- ret = __ENV_LPUT(dbenv, dbp->handle_lock, 0);
+ ret = __ENV_LPUT(dbenv, dbp->handle_lock);
LOCK_INIT(dbp->handle_lock);
if (ret == 0)
ret = EEXIST;
@@ -401,59 +508,82 @@ reopen: if ((ret = __os_open(dbenv, real_name, oflags, 0, &fhp)) != 0)
}
/* File does not exist. */
- if (!LF_ISSET(DB_CREATE))
+#ifdef HAVE_VXWORKS
+ /*
+ * VxWorks can return file-system specific error codes if the
+ * file does not exist, not ENOENT.
+ */
+ if (!create_ok)
+#else
+ if (!create_ok || ret != ENOENT)
+#endif
goto err;
+ LF_SET(DB_CREATE);
ret = 0;
/*
* We need to create file, which means that we need to set up the file,
* the fileid and the locks. Then we need to call the appropriate
- * routines to create meta-data pages.
+ * routines to create meta-data pages. For in-memory files, we retain
+ * the environment lock, while for on-disk files, we drop the env lock
+ * and create into a temporary.
*/
- if ((ret = __ENV_LPUT(dbenv, elock, 0)) != 0)
+ if (!F_ISSET(dbp, DB_AM_INMEM) &&
+ (ret = __ENV_LPUT(dbenv, elock)) != 0)
goto err;
create: if (txn != NULL && IS_REP_CLIENT(dbenv)) {
- __db_err(dbenv,
+ __db_errx(dbenv,
"Transactional create on replication client disallowed");
ret = EINVAL;
goto err;
}
- if ((ret = __db_backup_name(dbenv, name, txn, &tmpname)) != 0)
- goto err;
- if (TXN_ON(dbenv) && txn != NULL &&
- (ret = __txn_begin(dbenv, txn, &stxn, 0)) != 0)
- goto err;
- if ((ret = __fop_create(dbenv,
- stxn, &fhp, tmpname, DB_APP_DATA, mode, dflags)) != 0) {
- /*
- * If we don't have transactions there is a race on
- * creating the temp file.
- */
- if (!TXN_ON(dbenv) && ret == EEXIST) {
- __os_free(dbenv, tmpname);
- tmpname = NULL;
- __os_yield(dbenv, 1);
- goto retry;
+
+ if (F_ISSET(dbp, DB_AM_INMEM))
+ ret = __fop_inmem_create(dbp, name, txn, flags);
+ else {
+ if ((ret = __db_backup_name(dbenv, name, txn, &tmpname)) != 0)
+ goto err;
+ if (TXN_ON(dbenv) && txn != NULL &&
+ (ret = __txn_begin(dbenv, txn, &stxn, 0)) != 0)
+ goto err;
+ if ((ret = __fop_create(dbenv,
+ stxn, &fhp, tmpname, DB_APP_DATA, mode, dflags)) != 0) {
+ /*
+ * If no transactions, there is a race on creating the
+ * backup file, as the backup file name is the same for
+ * all processes. Wait for the other process to finish
+ * with the name.
+ */
+ if (!TXN_ON(dbenv) && ret == EEXIST) {
+ __os_free(dbenv, tmpname);
+ tmpname = NULL;
+ __os_sleep(dbenv, 1, 0);
+ goto retry;
+ }
+ goto err;
}
- goto err;
+ tmp_created = 1;
}
- tmp_created = 1;
-creat2: if ((ret = __db_appname(dbenv,
- DB_APP_DATA, tmpname, 0, NULL, &real_tmpname)) != 0)
- goto err;
+creat2: if (!F_ISSET(dbp, DB_AM_INMEM)) {
+ if ((ret = __db_appname(dbenv,
+ DB_APP_DATA, tmpname, 0, NULL, &real_tmpname)) != 0)
+ goto err;
- /* Set the pagesize if it isn't yet set. */
- if (dbp->pgsize == 0 &&
- (ret = __fop_set_pgsize(dbp, fhp, real_tmpname)) != 0)
- goto errmsg;
+ /* Set the pagesize if it isn't yet set. */
+ if (dbp->pgsize == 0 &&
+ (ret = __fop_set_pgsize(dbp, fhp, real_tmpname)) != 0)
+ goto errmsg;
- /* Construct a file_id. */
- if ((ret = __os_fileid(dbenv, real_tmpname, 1, dbp->fileid)) != 0)
- goto errmsg;
+ /* Construct a file_id. */
+ if ((ret =
+ __os_fileid(dbenv, real_tmpname, 1, dbp->fileid)) != 0)
+ goto errmsg;
+ }
- if ((ret = __db_new_file(dbp, stxn, fhp, tmpname)) != 0)
+ if ((ret = __db_new_file(dbp,
+ F_ISSET(dbp, DB_AM_INMEM) ? txn : stxn, fhp, tmpname)) != 0)
goto err;
/*
@@ -464,9 +594,12 @@ creat2: if ((ret = __db_appname(dbenv,
/*
* Now move the file into place unless we are creating in place (because
- * we created a database in a file that started out 0-length).
+ * we created a database in a file that started out 0-length). If
+ * this is an in-memory file, we may or may not hold the environment
+ * lock depending on how we got here.
*/
- if (!F_ISSET(dbp, DB_AM_COMPENSATE) && !F_ISSET(dbp, DB_AM_RECOVER))
+ if (!F_ISSET(dbp, DB_AM_COMPENSATE) &&
+ !F_ISSET(dbp, DB_AM_RECOVER) && !LOCK_ISSET(elock))
GET_ENVLOCK(dbenv, locker, &elock);
if (F_ISSET(dbp, DB_AM_IN_RENAME)) {
@@ -474,14 +607,15 @@ creat2: if ((ret = __db_appname(dbenv,
__txn_remrem(dbenv, txn, real_name);
} else if (name == tmpname) {
/* We created it in place. */
- } else if (__os_exists(real_name, NULL) == 0) {
+ } else if (!F_ISSET(dbp, DB_AM_INMEM) &&
+ __os_exists(dbenv, real_name, NULL) == 0) {
/*
* Someone managed to create the file; remove our temp
* and try to open the file that now exists.
*/
(void)__fop_remove(dbenv,
NULL, dbp->fileid, tmpname, DB_APP_DATA, dflags);
- (void)__ENV_LPUT(dbenv, dbp->handle_lock, 0);
+ (void)__ENV_LPUT(dbenv, dbp->handle_lock);
LOCK_INIT(dbp->handle_lock);
if (stxn != NULL) {
@@ -493,10 +627,10 @@ creat2: if ((ret = __db_appname(dbenv,
goto reopen;
}
- if ((ret = __fop_lock_handle(dbenv,
+ if (name != NULL && (ret = __fop_lock_handle(dbenv,
dbp, locker, DB_LOCK_WRITE, &elock, NOWAIT_FLAG(txn))) != 0)
goto err;
- if (tmpname != name && (ret = __fop_rename(dbenv,
+ if (tmpname != NULL && tmpname != name && (ret = __fop_rename(dbenv,
stxn, tmpname, name, dbp->fileid, DB_APP_DATA, dflags)) != 0)
goto err;
@@ -513,7 +647,7 @@ creat2: if ((ret = __db_appname(dbenv,
F_SET(dbp, DB_AM_CREATED);
if (0) {
-errmsg: __db_err(dbenv, "%s: %s", name, db_strerror(ret));
+errmsg: __db_err(dbenv, ret, "%s", name);
err: CLOSE_HANDLE(dbp, fhp);
if (stxn != NULL)
@@ -522,8 +656,8 @@ err: CLOSE_HANDLE(dbp, fhp);
(void)__fop_remove(dbenv,
NULL, NULL, tmpname, DB_APP_DATA, dflags);
if (txn == NULL)
- (void)__ENV_LPUT(dbenv, dbp->handle_lock, 0);
- (void)__ENV_LPUT(dbenv, elock, 0);
+ (void)__ENV_LPUT(dbenv, dbp->handle_lock);
+ (void)__ENV_LPUT(dbenv, elock);
if (created_locker) {
(void)__lock_id_free(dbenv, dbp->lid);
dbp->lid = DB_LOCK_INVALIDID;
@@ -537,7 +671,7 @@ done: /*
*/
if (!truncating && tmpname != NULL && tmpname != name)
__os_free(dbenv, tmpname);
- if (real_name != NULL)
+ if (real_name != name && real_name != NULL)
__os_free(dbenv, real_name);
if (real_tmpname != NULL)
__os_free(dbenv, real_tmpname);
@@ -569,7 +703,7 @@ __fop_set_pgsize(dbp, fhp, name)
* default pagesize to 16K.
*/
if ((ret = __os_ioinfo(dbenv, name, fhp, NULL, NULL, &iopsize)) != 0) {
- __db_err(dbenv, "%s: %s", name, db_strerror(ret));
+ __db_err(dbenv, ret, "%s", name);
return (ret);
}
if (iopsize < 512)
@@ -681,7 +815,7 @@ __fop_subdb_setup(dbp, txn, mname, name, mode, flags)
* If there was no transaction and we created this database,
* then we need to undo the update of the master database.
*/
- if (F_ISSET(dbp, DB_AM_CREATED) && txn != NULL)
+ if (F_ISSET(dbp, DB_AM_CREATED) && txn == NULL)
(void)__db_master_update(mdbp, dbp, txn,
name, dbp->type, MU_REMOVE, NULL, 0);
F_CLR(dbp, DB_AM_CREATED);
@@ -721,7 +855,7 @@ __fop_subdb_setup(dbp, txn, mname, name, mode, flags)
err:
DB_TEST_RECOVERY_LABEL
if (txn == NULL)
- (void)__ENV_LPUT(dbenv, dbp->handle_lock, 0);
+ (void)__ENV_LPUT(dbenv, dbp->handle_lock);
}
/*
@@ -734,7 +868,7 @@ DB_TEST_RECOVERY_LABEL
* before we register this event, we'd better remove any
* events that we've already registered for the master.
*/
- if (!F_ISSET(dbp, DB_AM_RECOVER) && txn != NULL) {
+ if (!F_ISSET(dbp, DB_AM_RECOVER) && IS_REAL_TXN(txn)) {
/* Unregister old master events. */
__txn_remlock(dbenv,
txn, &mdbp->handle_lock, DB_LOCK_INVALIDID);
@@ -777,7 +911,6 @@ __fop_remove_setup(dbp, txn, name, flags)
DB_ENV *dbenv;
DB_FH *fhp;
DB_LOCK elock;
- u_int32_t refcnt;
u_int8_t mbuf[DBMETASIZE];
int ret;
@@ -786,13 +919,14 @@ __fop_remove_setup(dbp, txn, name, flags)
PANIC_CHECK(dbenv);
LOCK_INIT(elock);
fhp = NULL;
+ ret = 0;
/* Create locker if necessary. */
retry: if (LOCKING_ON(dbenv)) {
if (txn != NULL)
dbp->lid = txn->txnid;
else if (dbp->lid == DB_LOCK_INVALIDID) {
- if ((ret = __lock_id(dbenv, &dbp->lid)) != 0)
+ if ((ret = __lock_id(dbenv, &dbp->lid, NULL)) != 0)
goto err;
}
}
@@ -808,7 +942,7 @@ retry: if (LOCKING_ON(dbenv)) {
* that we shouldn't close the handle.
*/
fhp = dbp->saved_open_fhp;
- DB_ASSERT(LF_ISSET(DB_FCNTL_LOCKING) || fhp == NULL);
+ DB_ASSERT(dbenv, LF_ISSET(DB_FCNTL_LOCKING) || fhp == NULL);
/*
* Lock environment to protect file open. That will enable us to
@@ -816,15 +950,24 @@ retry: if (LOCKING_ON(dbenv)) {
* the handle.
*/
GET_ENVLOCK(dbenv, dbp->lid, &elock);
- if (fhp == NULL &&
- (ret = __os_open(dbenv, name, DB_OSO_RDONLY, 0, &fhp)) != 0)
- goto err;
- if ((ret = __fop_read_meta(dbenv,
- name, mbuf, sizeof(mbuf), fhp, 0, NULL)) != 0)
+
+ /* Open database. */
+ if (F_ISSET(dbp, DB_AM_INMEM)) {
+ if ((ret = __db_dbenv_mpool(dbp, name, flags)) == 0)
+ ret = __os_strdup(dbenv, name, &dbp->dname);
+ } else if (fhp == NULL)
+ ret = __os_open(dbenv, name, DB_OSO_RDONLY, 0, &fhp);
+ if (ret != 0)
goto err;
- if ((ret =
- __db_meta_setup(dbenv, dbp, name, (DBMETA *)mbuf, flags, 1)) != 0)
+ /* Get meta-data */
+ if (F_ISSET(dbp, DB_AM_INMEM))
+ ret = __fop_inmem_read_meta(dbp, txn, name, flags);
+ else if ((ret = __fop_read_meta(dbenv,
+ name, mbuf, sizeof(mbuf), fhp, 0, NULL)) == 0)
+ ret = __db_meta_setup(dbenv,
+ dbp, name, (DBMETA *)mbuf, flags, 1);
+ if (ret != 0)
goto err;
/*
@@ -839,55 +982,45 @@ retry: if (LOCKING_ON(dbenv)) {
* Close the file, block on the lock, clean up the dbp, and
* then start all over again.
*/
- if (!LF_ISSET(DB_FCNTL_LOCKING)) {
+ if (!F_ISSET(dbp, DB_AM_INMEM) && !LF_ISSET(DB_FCNTL_LOCKING)) {
(void)__os_closehandle(dbenv, fhp);
fhp = NULL;
}
- if (ret == DB_LOCK_NOTEXIST) {
- if ((ret = __ENV_LPUT(dbenv, elock, 0)) != 0)
- goto err;
- } else if (ret != DB_LOCK_NOTGRANTED ||
+ if (ret != DB_LOCK_NOTGRANTED ||
(txn != NULL && F_ISSET(txn, TXN_NOWAIT)))
goto err;
else if ((ret = __fop_lock_handle(dbenv,
- dbp, dbp->lid, DB_LOCK_WRITE, &elock, 0)) != 0 &&
- ret != DB_LOCK_NOTEXIST)
+ dbp, dbp->lid, DB_LOCK_WRITE, &elock, 0)) != 0)
goto err;
- if (txn != NULL)
- dbp->lid = DB_LOCK_INVALIDID;
- (void)__db_refresh(dbp, txn, DB_NOSYNC, NULL);
+ if (F_ISSET(dbp, DB_AM_INMEM)) {
+ (void)__lock_put(dbenv, &dbp->handle_lock);
+ (void)__db_refresh(dbp, txn, DB_NOSYNC, NULL, 1);
+ } else {
+ if (txn != NULL)
+ dbp->lid = DB_LOCK_INVALIDID;
+ (void)__db_refresh(dbp, txn, DB_NOSYNC, NULL, 0);
+ }
goto retry;
- } else if ((ret = __ENV_LPUT(dbenv, elock, 0)) != 0)
- goto err;
-
- /* Check if the file is already open. */
- if ((ret = __memp_get_refcnt(dbenv, dbp->fileid, &refcnt)) != 0)
+ } else if ((ret = __ENV_LPUT(dbenv, elock)) != 0)
goto err;
- /*
- * Now, error check. If the file is already open (refcnt != 0), then
- * we must have it open (since we got the lock) and we need to panic,
- * because this is a self deadlock and the application has a bug.
- * If the file isn't open, but it's in the midst of a rename then
- * this file doesn't really exist.
- */
- if (refcnt != 0) {
- __db_err(dbenv,
-"Attempting to remove file open in current transaction causing self-deadlock");
- ret = __db_panic(dbenv, DB_LOCK_DEADLOCK);
- } else if (F_ISSET(dbp, DB_AM_IN_RENAME))
+ else if (F_ISSET(dbp, DB_AM_IN_RENAME))
ret = ENOENT;
if (0) {
-err: (void)__ENV_LPUT(dbenv, elock, 0);
+err: (void)__ENV_LPUT(dbenv, elock);
}
if (fhp != NULL && !LF_ISSET(DB_FCNTL_LOCKING))
(void)__os_closehandle(dbenv, fhp);
/*
- * If we are going to proceed with the removal, then we need to make
- * sure that we don't leave any pages around in the mpool.
+ * If this is a real file and we are going to proceed with the removal,
+ * then we need to make sure that we don't leave any pages around in the
+ * mpool since the file is closed and will be reopened again before
+ * access. However, this might be an in-memory file, in which case
+ * we will handle the discard from the mpool later as it's the "real"
+ * removal of the database.
*/
- if (ret == 0)
+ if (ret == 0 && !F_ISSET(dbp, DB_AM_INMEM))
F_SET(dbp, DB_AM_DISCARD);
return (ret);
}
@@ -926,13 +1059,13 @@ __fop_read_meta(dbenv, name, buf, size, fhp, errok, nbytesp)
if (ret != 0) {
if (!errok)
- __db_err(dbenv, "%s: %s", name, db_strerror(ret));
+ __db_err(dbenv, ret, "%s", name);
goto err;
}
if (nr != size) {
if (!errok)
- __db_err(dbenv,
+ __db_errx(dbenv,
"%s: unexpected file type or format", name);
ret = EINVAL;
}
@@ -957,100 +1090,418 @@ __fop_dummy(dbp, txn, old, new, flags)
const char *old, *new;
u_int32_t flags;
{
- DB *tmpdbp, *t2dbp;
+ DB *tmpdbp;
DB_ENV *dbenv;
- DB_FH *fhp;
- DB_LOCK elock;
- DB_LSN lsn;
- DBT fiddbt, namedbt, tmpdbt;
DB_TXN *stxn;
char *back;
- char *realback, *realnew, *realold;
int ret, t_ret;
- size_t len;
u_int8_t mbuf[DBMETASIZE];
- u_int32_t dflags, locker, stxnid;
+ u_int32_t locker;
dbenv = dbp->dbenv;
- LOCK_INIT(elock);
- realback = NULL;
- realnew = NULL;
- realold = NULL;
back = NULL;
stxn = NULL;
- tmpdbp = t2dbp = NULL;
- fhp = NULL;
- dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0;
+ tmpdbp = NULL;
- DB_ASSERT(txn != NULL);
+ DB_ASSERT(dbenv, txn != NULL);
locker = txn->txnid;
- /* Begin sub transaction to encapsulate the rename. */
+ /*
+ * Begin sub transaction to encapsulate the rename. Note that we
+ * expect the inmem_swap calls to complete the sub-transaction,
+ * aborting on error and committing on success.
+ */
if (TXN_ON(dbenv) && (ret = __txn_begin(dbenv, txn, &stxn, 0)) != 0)
goto err;
/* We need to create a dummy file as a place holder. */
if ((ret = __db_backup_name(dbenv, new, stxn, &back)) != 0)
goto err;
+ /* Create a dummy dbp handle. */
+ if ((ret = db_create(&tmpdbp, dbenv, 0)) != 0)
+ goto err;
+
+ memset(mbuf, 0, sizeof(mbuf));
+ ret = F_ISSET(dbp, DB_AM_INMEM) ?
+ __fop_inmem_dummy(tmpdbp, stxn, back, mbuf) :
+ __fop_ondisk_dummy(tmpdbp, stxn, back, mbuf, flags);
+
+ if (ret != 0)
+ goto err;
+
+ ret = F_ISSET(dbp, DB_AM_INMEM) ?
+ __fop_inmem_swap(dbp, tmpdbp, stxn, old, new, back, locker) :
+ __fop_ondisk_swap(dbp, tmpdbp, stxn, old, new, back, locker, flags);
+ stxn = NULL;
+ if (ret != 0)
+ goto err;
+
+err: if (stxn != NULL)
+ (void)__txn_abort(stxn);
+ if (tmpdbp != NULL &&
+ (t_ret = __db_close(tmpdbp, NULL, 0)) != 0 && ret == 0)
+ ret = t_ret;
+ if (back != NULL)
+ __os_free(dbenv, back);
+ return (ret);
+}
+
+/*
+ * __fop_dbrename --
+ * Do the appropriate file locking and file system operations
+ * to effect a dbrename in the absence of transactions (__fop_dummy
+ * and the subsequent calls in __db_rename do the work for the
+ * transactional case).
+ *
+ * PUBLIC: int __fop_dbrename __P((DB *, const char *, const char *));
+ */
+int
+__fop_dbrename(dbp, old, new)
+ DB *dbp;
+ const char *old, *new;
+{
+ DB_ENV *dbenv;
+ DB_LOCK elock;
+ char *real_new, *real_old;
+ int ret, t_ret;
+
+ dbenv = dbp->dbenv;
+ real_new = NULL;
+ real_old = NULL;
+ LOCK_INIT(elock);
+
+ if (F_ISSET(dbp, DB_AM_INMEM)) {
+ real_new = (char *)new;
+ real_old = (char *)old;
+ } else {
+ /* Get full names. */
+ if ((ret = __db_appname(dbenv,
+ DB_APP_DATA, new, 0, NULL, &real_new)) != 0)
+ goto err;
+
+ if ((ret = __db_appname(dbenv,
+ DB_APP_DATA, old, 0, NULL, &real_old)) != 0)
+ goto err;
+
+ }
+
+ /*
+ * It is an error to rename a file over one that already exists,
+ * as that wouldn't be transaction-safe. We check explicitly
+ * for ondisk files, but it's done memp_nameop for in-memory ones.
+ */
+ GET_ENVLOCK(dbenv, dbp->lid, &elock);
+ ret = F_ISSET(dbp, DB_AM_INMEM) ? ENOENT :
+ __os_exists(dbenv, real_new, NULL);
+
+ if (ret == 0) {
+ ret = EEXIST;
+ __db_errx(dbenv, "rename: file %s exists", real_new);
+ goto err;
+ }
+
+ ret = __memp_nameop(dbenv,
+ dbp->fileid, new, real_old, real_new, F_ISSET(dbp, DB_AM_INMEM));
+
+err: if ((t_ret = __ENV_LPUT(dbenv, elock)) != 0 && ret == 0)
+ ret = t_ret;
+ if (!F_ISSET(dbp, DB_AM_INMEM) && real_old != NULL)
+ __os_free(dbenv, real_old);
+ if (!F_ISSET(dbp, DB_AM_INMEM) && real_new != NULL)
+ __os_free(dbenv, real_new);
+ return (ret);
+}
+
+static int
+__fop_inmem_create(dbp, name, txn, flags)
+ DB *dbp;
+ const char *name;
+ DB_TXN *txn;
+ u_int32_t flags;
+{
+ DB_ENV *dbenv;
+ DB_LSN lsn;
+ DBT fid_dbt, name_dbt;
+ int ret;
+ int32_t lfid;
+ u_int32_t *p32;
+
+ dbenv = dbp->dbenv;
+
+ MAKE_INMEM(dbp);
+
+ /* Set the pagesize if it isn't yet set. */
+ if (dbp->pgsize == 0)
+ dbp->pgsize = DB_DEF_IOSIZE;
+
+ /*
+ * Construct a file_id.
+ *
+ * If this file has no name, then we only need a fileid for locking.
+ * If this file has a name, we need the fileid both for locking and
+ * matching in the memory pool. So, with unnamed in-memory databases,
+ * use a lock_id. For named in-memory files, we need to find a value
+ * that we can use to uniquely identify a name/fid pair. We use a
+ * combination of a unique id (__os_unique_id) and a hash of the
+ * original name.
+ */
+ if (name == NULL) {
+ if (LOCKING_ON(dbenv) && (ret =
+ __lock_id(dbenv, (u_int32_t *)dbp->fileid, NULL)) != 0)
+ goto err;
+ } else {
+ p32 = (u_int32_t *)(&dbp->fileid[0]);
+ __os_unique_id(dbenv, p32);
+ p32++;
+ (void)strncpy(
+ (char *)p32, name, DB_FILE_ID_LEN - sizeof(u_int32_t));
+ dbp->preserve_fid = 1;
+
+ if (DBENV_LOGGING(dbenv) && dbp->log_filename != NULL)
+ memcpy(dbp->log_filename->ufid,
+ dbp->fileid, DB_FILE_ID_LEN);
+ }
+
+ /* Now, set the fileid. */
+ if ((ret = __memp_set_fileid(dbp->mpf, dbp->fileid)) != 0)
+ goto err;
+
+ if ((ret = __db_dbenv_mpool(dbp, name, flags)) != 0)
+ goto err;
+
+ if (name != NULL && DBENV_LOGGING(dbenv)) {
+ DB_INIT_DBT(name_dbt, name, strlen(name) + 1);
+ memset(&fid_dbt, 0, sizeof(fid_dbt));
+ fid_dbt.data = dbp->fileid;
+ fid_dbt.size = DB_FILE_ID_LEN;
+ lfid = dbp->log_filename == NULL ?
+ DB_LOGFILEID_INVALID : dbp->log_filename->id;
+ if ((ret = __crdel_inmem_create_log(dbenv, txn,
+ &lsn, 0, lfid, &name_dbt, &fid_dbt, dbp->pgsize)) != 0)
+ goto err;
+ }
+
+ F_SET(dbp, DB_AM_CREATED);
+
+err:
+ return (ret);
+}
+
+static int
+__fop_inmem_read_meta(dbp, txn, name, flags)
+ DB *dbp;
+ DB_TXN *txn;
+ const char *name;
+ u_int32_t flags;
+{
+ DBMETA *metap;
+ db_pgno_t pgno;
+ int ret, t_ret;
+
+ pgno = PGNO_BASE_MD;
+ if ((ret = __memp_fget(dbp->mpf, &pgno, txn, 0, &metap)) != 0)
+ return (ret);
+ ret = __db_meta_setup(dbp->dbenv, dbp, name, metap, flags, 1);
+
+ if ((t_ret = __memp_fput(dbp->mpf, metap, 0)) && ret == 0)
+ ret = t_ret;
+
+ return (ret);
+}
+
+static int
+__fop_ondisk_dummy(dbp, txn, name, mbuf, flags)
+ DB *dbp;
+ DB_TXN *txn;
+ const char *name;
+ u_int8_t *mbuf;
+ u_int32_t flags;
+{
+ DB_ENV *dbenv;
+ int ret;
+ char *realname;
+ u_int32_t dflags;
+
+ realname = NULL;
+ dbenv = dbp->dbenv;
+ dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0;
+
if ((ret = __db_appname(dbenv,
- DB_APP_DATA, back, flags, NULL, &realback)) != 0)
+ DB_APP_DATA, name, flags, NULL, &realname)) != 0)
goto err;
+
if ((ret = __fop_create(dbenv,
- stxn, NULL, back, DB_APP_DATA, 0, dflags)) != 0)
+ txn, NULL, name, DB_APP_DATA, 0, dflags)) != 0)
goto err;
- memset(mbuf, 0, sizeof(mbuf));
if ((ret =
- __os_fileid(dbenv, realback, 1, ((DBMETA *)mbuf)->uid)) != 0)
+ __os_fileid(dbenv, realname, 1, ((DBMETA *)mbuf)->uid)) != 0)
goto err;
+
((DBMETA *)mbuf)->magic = DB_RENAMEMAGIC;
- if ((ret = __fop_write(dbenv, stxn, back,
+ if ((ret = __fop_write(dbenv, txn, name,
DB_APP_DATA, NULL, 0, 0, 0, mbuf, DBMETASIZE, 1, dflags)) != 0)
goto err;
- /* Create a dummy dbp handle. */
- if ((ret = db_create(&tmpdbp, dbenv, 0)) != 0)
+ memcpy(dbp->fileid, ((DBMETA *)mbuf)->uid, DB_FILE_ID_LEN);
+
+err: if (realname != NULL)
+ __os_free(dbenv, realname);
+
+ return (ret);
+}
+
+static int
+__fop_inmem_dummy(dbp, txn, name, mbuf)
+ DB *dbp;
+ DB_TXN *txn;
+ const char *name;
+ u_int8_t *mbuf;
+{
+ DBMETA *metap;
+ db_pgno_t pgno;
+ int ret, t_ret;
+
+ if ((ret = __fop_inmem_create(dbp, name, txn, DB_CREATE)) != 0)
+ return (ret);
+
+ pgno = PGNO_BASE_MD;
+ if ((ret = __memp_fget(dbp->mpf, &pgno, txn,
+ DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &metap)) != 0)
+ return (ret);
+ /* Check file existed. */
+ if (metap->magic != 0)
+ ret = EEXIST;
+ else
+ metap->magic = DB_RENAMEMAGIC;
+
+ /* Copy the fileid onto the meta-data page. */
+ memcpy(metap->uid, dbp->fileid, DB_FILE_ID_LEN);
+
+ if ((t_ret = __memp_fput(dbp->mpf, metap,
+ ret == 0 ? 0 : DB_MPOOL_DISCARD)) != 0 && ret == 0)
+ ret = t_ret;
+
+ if (ret != 0)
goto err;
- memcpy(tmpdbp->fileid, ((DBMETA *)mbuf)->uid, DB_FILE_ID_LEN);
- /* Now, lock the name space while we initialize this file. */
- if ((ret = __db_appname(dbenv,
- DB_APP_DATA, new, 0, NULL, &realnew)) != 0)
+ ((DBMETA *)mbuf)->magic = DB_RENAMEMAGIC;
+
+err: return (ret);
+}
+
+static int
+__fop_ondisk_swap(dbp, tmpdbp, txn, old, new, back, locker, flags)
+ DB *dbp, *tmpdbp;
+ DB_TXN *txn;
+ const char *old, *new, *back;
+ u_int32_t locker, flags;
+{
+ DB_ENV *dbenv;
+ DB_FH *fhp;
+ DB_LOCK elock;
+ DB_LSN lsn;
+ DBT fiddbt, namedbt, tmpdbt;
+ DB_TXN *parent;
+ char *realold, *realnew;
+ int ret, t_ret;
+ u_int8_t mbuf[DBMETASIZE];
+ u_int32_t child_txnid, dflags;
+
+ dbenv = dbp->dbenv;
+ DB_ASSERT(dbenv, txn != NULL);
+ DB_ASSERT(dbenv, old != NULL);
+
+ realold = realnew = NULL;
+ LOCK_INIT(elock);
+ fhp = NULL;
+ dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0;
+
+ if ((ret =
+ __db_appname(dbenv, DB_APP_DATA, new, 0, NULL, &realnew)) != 0)
goto err;
- GET_ENVLOCK(dbenv, locker, &elock);
- if (__os_exists(realnew, NULL) == 0) {
+
+ /* Now, lock the name space while we initialize this file. */
+retry: GET_ENVLOCK(dbenv, locker, &elock);
+ if (__os_exists(dbenv, realnew, NULL) == 0) {
/*
* It is possible that the only reason this file exists is
* because we've done a previous rename of it and we have
* left a placeholder here. We need to check for that case
* and allow this rename to succeed if that's the case.
*/
- if ((ret = db_create(&t2dbp, dbenv, 0)) != 0)
- goto err;
if ((ret = __os_open(dbenv, realnew, 0, 0, &fhp)) != 0)
goto err;
if ((ret = __fop_read_meta(dbenv,
- realnew, mbuf, sizeof(mbuf), fhp, 0, &len)) != 0 ||
+ realnew, mbuf, sizeof(mbuf), fhp, 0, NULL)) != 0 ||
(ret = __db_meta_setup(dbenv,
- t2dbp, realnew, (DBMETA *)mbuf, 0, 1)) != 0) {
+ tmpdbp, realnew, (DBMETA *)mbuf, 0, 1)) != 0) {
ret = EEXIST;
goto err;
}
/*
- * Now, try to acquire the handle lock. If it's from our txn,
- * then we'll get the lock. If it's not, then someone else has
- * it locked, and we need to report this as an error. If we
- * know we can get the lock, we can immediately release it,
- * which we need to do since this is a temporary handle.
+ * Now, try to acquire the handle lock. If the handle is locked
+ * by our current, transaction, then we'll get it and life is
+ * good.
+ *
+ * Alternately, it's not locked at all, we'll get the lock, but
+ * we will realize it exists and consider this an error.
+ *
+ * However, if it's held by another transaction, then there
+ * could be two different scenarios: 1) the file is in the
+ * midst of being created or deleted and when that transaction
+ * is over, we might be able to proceed. 2) the file is open
+ * and exists and we should report an error. In order to
+ * distinguish these two cases, we do the following. First, we
+ * try to acquire a READLOCK. If the handle is in the midst of
+ * being created, then we'll block because a writelock is held.
+ * In that case, we should request a blocking write, and when we
+ * get the lock, we should then go back and check to see if the
+ * object exists and start all over again.
+ *
+ * If we got the READLOCK, then either no one is holding the
+ * lock or someone has an open handle and the fact that the file
+ * exists is problematic. So, in this case, we request the
+ * WRITELOCK non-blocking -- if it succeeds, we're golden. If
+ * it fails, then the file exists and we return EEXIST.
*/
if ((ret = __fop_lock_handle(dbenv,
- t2dbp, locker, DB_LOCK_WRITE, NULL, DB_LOCK_NOWAIT)) != 0)
- ret = EEXIST;
- else {
- (void)__lock_put(dbenv, &t2dbp->handle_lock, 0);
- if (!F_ISSET(t2dbp, DB_AM_IN_RENAME))
+ tmpdbp, locker, DB_LOCK_READ, NULL, DB_LOCK_NOWAIT)) != 0) {
+ /*
+ * Someone holds a writelock. Try for the WRITELOCK
+ * and after we get it, retry.
+ */
+ if ((ret = __fop_lock_handle(dbenv, tmpdbp,
+ locker, DB_LOCK_WRITE, &elock, 0)) != 0)
+ goto err;
+
+ /*
+ * We now have the write lock; release it and start
+ * over.
+ */
+ (void)__lock_put(dbenv, &tmpdbp->handle_lock);
+ (void)__db_refresh(tmpdbp, NULL, 0, NULL, 0);
+ goto retry;
+ } else {
+ /* We got the read lock; try to upgrade it. */
+ ret = __fop_lock_handle(dbenv,
+ tmpdbp, locker, DB_LOCK_WRITE,
+ NULL, DB_LOCK_UPGRADE | DB_LOCK_NOWAIT);
+ if (ret != 0) {
+ /*
+ * We did not get the writelock, so someone
+ * has the handle open. This is an error.
+ */
+ (void)__lock_put(dbenv, &tmpdbp->handle_lock);
+ ret = EEXIST;
+ } else if (F_ISSET(tmpdbp, DB_AM_IN_RENAME))
+ /* We got the lock and are renaming it. */
+ ret = 0;
+ else { /* We got the lock, but the file exists. */
+ (void)__lock_put(dbenv, &tmpdbp->handle_lock);
ret = EEXIST;
+ }
}
if ((t_ret = __os_closehandle(dbenv, fhp)) != 0 && ret == 0)
ret = t_ret;
@@ -1064,10 +1515,10 @@ __fop_dummy(dbp, txn, old, new, flags)
* swap for the handle lock.
*/
if ((ret = __fop_rename(dbenv,
- stxn, old, new, dbp->fileid, DB_APP_DATA, dflags)) != 0)
+ txn, old, new, dbp->fileid, DB_APP_DATA, dflags)) != 0)
goto err;
if ((ret = __fop_rename(dbenv,
- stxn, back, old, tmpdbp->fileid, DB_APP_DATA, dflags)) != 0)
+ txn, back, old, tmpdbp->fileid, DB_APP_DATA, dflags)) != 0)
goto err;
if ((ret = __fop_lock_handle(dbenv,
tmpdbp, locker, DB_LOCK_WRITE, &elock, NOWAIT_FLAG(txn))) != 0)
@@ -1080,108 +1531,174 @@ __fop_dummy(dbp, txn, old, new, flags)
*/
LOCK_INIT(tmpdbp->handle_lock);
- if (stxn != NULL) {
- /* Commit the child. */
- stxnid = stxn->txnid;
- ret = __txn_commit(stxn, 0);
- stxn = NULL;
-
- /* Now log the child information in the parent. */
- memset(&fiddbt, 0, sizeof(fiddbt));
- memset(&tmpdbt, 0, sizeof(fiddbt));
- memset(&namedbt, 0, sizeof(namedbt));
- fiddbt.data = dbp->fileid;
- fiddbt.size = DB_FILE_ID_LEN;
- tmpdbt.data = tmpdbp->fileid;
- tmpdbt.size = DB_FILE_ID_LEN;
- namedbt.data = (void *)old;
- namedbt.size = (u_int32_t)strlen(old) + 1;
- if ((t_ret =
- __fop_file_remove_log(dbenv, txn, &lsn, 0, &fiddbt,
- &tmpdbt, &namedbt, DB_APP_DATA, stxnid)) != 0 && ret == 0)
- ret = t_ret;
- }
+ /* Commit the child. */
+ child_txnid = txn->txnid;
+ parent = txn->parent;
+ ret = __txn_commit(txn, 0);
+ txn = NULL;
+
+ /* Now log the child information in the parent. */
+ memset(&fiddbt, 0, sizeof(fiddbt));
+ fiddbt.data = dbp->fileid;
+ fiddbt.size = DB_FILE_ID_LEN;
+ memset(&tmpdbt, 0, sizeof(fiddbt));
+ tmpdbt.data = tmpdbp->fileid;
+ tmpdbt.size = DB_FILE_ID_LEN;
+ DB_INIT_DBT(namedbt, old, strlen(old) + 1);
+ if ((t_ret = __fop_file_remove_log(dbenv,
+ parent, &lsn, 0, &fiddbt, &tmpdbt, &namedbt,
+ (u_int32_t)DB_APP_DATA, child_txnid)) != 0 && ret == 0)
+ ret = t_ret;
/* This is a delayed delete of the dummy file. */
if ((ret = __db_appname(dbenv,
DB_APP_DATA, old, flags, NULL, &realold)) != 0)
goto err;
- if ((ret = __txn_remevent(dbenv, txn, realold, NULL)) != 0)
+
+ if ((ret = __txn_remevent(dbenv, parent, realold, NULL, 0)) != 0)
goto err;
-err: (void)__ENV_LPUT(dbenv, elock, 0);
- if (stxn != NULL)
- (void)__txn_abort(stxn);
- if (tmpdbp != NULL &&
- (t_ret = __db_close(tmpdbp, NULL, 0)) != 0 && ret == 0)
- ret = t_ret;
- if (t2dbp != NULL &&
- (t_ret = __db_close(t2dbp, NULL, 0)) != 0 && ret == 0)
- ret = t_ret;
- if (fhp != NULL)
- (void)__os_closehandle(dbenv, fhp);
- if (realold != NULL)
- __os_free(dbenv, realold);
+err: if (txn != NULL) /* Ret must already be set, so void abort. */
+ (void)__txn_abort(txn);
+
+ (void)__ENV_LPUT(dbenv, elock);
if (realnew != NULL)
__os_free(dbenv, realnew);
- if (realback != NULL)
- __os_free(dbenv, realback);
- if (back != NULL)
- __os_free(dbenv, back);
+ if (realold != NULL)
+ __os_free(dbenv, realold);
return (ret);
}
-/*
- * __fop_dbrename --
- * Do the appropriate file locking and file system operations
- * to effect a dbrename in the absence of transactions (__fop_dummy
- * and the subsequent calls in __db_rename do the work for the
- * transactional case).
- *
- * PUBLIC: int __fop_dbrename __P((DB *, const char *, const char *));
- */
-int
-__fop_dbrename(dbp, old, new)
- DB *dbp;
- const char *old, *new;
+static int
+__fop_inmem_swap(olddbp, backdbp, txn, old, new, back, locker)
+ DB *olddbp, *backdbp;
+ DB_TXN *txn;
+ const char *old, *new, *back;
+ u_int32_t locker;
{
DB_ENV *dbenv;
DB_LOCK elock;
- char *real_new, *real_old;
+ DB_LSN lsn;
+ DB_TXN *parent;
+ DBT fid_dbt, n1_dbt, n2_dbt;
+ DB *tmpdbp;
int ret, t_ret;
- dbenv = dbp->dbenv;
- real_new = NULL;
- real_old = NULL;
- LOCK_INIT(elock);
+ dbenv = olddbp->dbenv;
+ parent = txn->parent;
+retry: LOCK_INIT(elock);
+ if ((ret = db_create(&tmpdbp, dbenv, 0)) != 0)
+ return (ret);
+ MAKE_INMEM(tmpdbp);
- /* Find the real newname of the file. */
- if ((ret = __db_appname(dbenv,
- DB_APP_DATA, new, 0, NULL, &real_new)) != 0)
- goto err;
+ GET_ENVLOCK(dbenv, locker, &elock);
+ if ((ret = __db_dbenv_mpool(tmpdbp, new, 0)) == 0) {
+ /*
+ * It is possible that the only reason this database exists is
+ * because we've done a previous rename of it and we have
+ * left a placeholder here. We need to check for that case
+ * and allow this rename to succeed if that's the case.
+ */
+
+ if ((ret = __fop_inmem_read_meta(tmpdbp, txn, new, 0)) != 0) {
+ ret = EEXIST;
+ goto err;
+ }
+
+ /*
+ * Now, try to acquire the handle lock. If it's from our txn,
+ * then we'll get the lock. If it's not, then someone else has
+ * it locked. See the comments in __fop_ondisk_swap for
+ * details.
+ */
+ if ((ret = __fop_lock_handle(dbenv,
+ tmpdbp, locker, DB_LOCK_READ, NULL, DB_LOCK_NOWAIT)) != 0) {
+ /*
+ * Someone holds a writelock. Try for the WRITELOCK
+ * and after we get it, retry.
+ */
+ if ((ret = __fop_lock_handle(dbenv, tmpdbp,
+ locker, DB_LOCK_WRITE, &elock, 0)) != 0)
+ goto err;
+
+ /* We have the write lock; release it and start over. */
+ (void)__lock_put(dbenv, &tmpdbp->handle_lock);
+ (void)__db_close(tmpdbp, NULL, DB_NOSYNC);
+ (void)__ENV_LPUT(dbenv, elock);
+ goto retry;
+ } else {
+ (void)__lock_put(dbenv, &tmpdbp->handle_lock);
+ if (!F_ISSET(tmpdbp, DB_AM_IN_RENAME))
+ ret = EEXIST;
+ }
+ if (ret != 0)
+ goto err;
+ }
+
+ /* Log the renames. */
+ if (LOGGING_ON(dbenv)) {
+ /* Rename old to new. */
+ DB_INIT_DBT(fid_dbt, olddbp->fileid, DB_FILE_ID_LEN);
+ DB_INIT_DBT(n1_dbt, old, strlen(old) + 1);
+ DB_INIT_DBT(n2_dbt, new, strlen(new) + 1);
+ if ((ret = __crdel_inmem_rename_log(dbenv, txn, &lsn, 0,
+ &n1_dbt, &n2_dbt, &fid_dbt)) != 0)
+ goto err;
+
+ /* Rename back to old */
+ fid_dbt.data = backdbp->fileid;
+ DB_SET_DBT(n2_dbt, back, strlen(back) + 1);
+ if ((ret = __crdel_inmem_rename_log(dbenv, txn, &lsn, 0,
+ &n2_dbt, &n1_dbt, &fid_dbt)) != 0)
+ goto err;
+ }
/*
- * It is an error to rename a file over one that already exists,
- * as that wouldn't be transaction-safe.
+ * While we have the namespace locked, do the renames and then
+ * swap for the handle lock. If we ran into a file in the midst
+ * of rename, then we need to delete it first, else nameop is
+ * going to consider it an error.
*/
- GET_ENVLOCK(dbenv, dbp->lid, &elock);
- if (__os_exists(real_new, NULL) == 0) {
- ret = EEXIST;
- __db_err(dbenv, "rename: file %s exists", real_new);
- goto err;
+ if (F_ISSET(tmpdbp, DB_AM_IN_RENAME)) {
+ if ((ret = __memp_nameop(dbenv,
+ tmpdbp->fileid, NULL, new, NULL, 1)) != 0)
+ goto err;
+ __txn_remrem(dbenv, parent, new);
}
- if ((ret = __db_appname(dbenv,
- DB_APP_DATA, old, 0, NULL, &real_old)) != 0)
+ if ((ret = __memp_nameop(dbenv, olddbp->fileid, new, old, new, 1)) != 0)
+ goto err;
+ if ((ret =
+ __memp_nameop(dbenv, backdbp->fileid, old, back, old, 1)) != 0)
goto err;
- ret = __memp_nameop(dbenv, dbp->fileid, new, real_old, real_new);
+ if ((ret = __fop_lock_handle(dbenv,
+ tmpdbp, locker, DB_LOCK_WRITE, &elock, 0)) != 0)
+ goto err;
-err: if ((t_ret = __ENV_LPUT(dbenv, elock, 0)) != 0 && ret == 0)
+ /*
+ * We just acquired a transactional lock on the tmp handle.
+ * We need to null out the tmp handle's lock so that it
+ * doesn't create problems for us in the close path.
+ */
+ LOCK_INIT(tmpdbp->handle_lock);
+
+ DB_ASSERT(dbenv, txn != NULL);
+
+ /* Commit the child. */
+ ret = __txn_commit(txn, 0);
+ txn = NULL;
+
+ if ((ret = __db_inmem_remove(backdbp, parent, old)) != 0)
+ goto err;
+
+err: (void)__ENV_LPUT(dbenv, elock);
+
+ if (txn != NULL)
+ (void)__txn_abort(txn);
+
+ if ((t_ret = __db_close(tmpdbp, NULL, 0)) != 0 && ret == 0)
ret = t_ret;
- if (real_old != NULL)
- __os_free(dbenv, real_old);
- if (real_new != NULL)
- __os_free(dbenv, real_new);
+
return (ret);
}