diff options
Diffstat (limited to 'db/fileops/fop_basic.c')
-rw-r--r-- | db/fileops/fop_basic.c | 90 |
1 files changed, 50 insertions, 40 deletions
diff --git a/db/fileops/fop_basic.c b/db/fileops/fop_basic.c index 36a958e95..9563ddbc1 100644 --- a/db/fileops/fop_basic.c +++ b/db/fileops/fop_basic.c @@ -1,22 +1,16 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2001-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 2001-2006 + * Oracle Corporation. All rights reserved. * - * $Id: fop_basic.c,v 1.32 2004/11/15 20:04:50 bostic Exp $ + * $Id: fop_basic.c,v 12.19 2006/09/19 15:06:59 bostic Exp $ */ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <string.h> -#include <sys/types.h> -#endif - #include "db_int.h" #include "dbinc/db_page.h" -#include "dbinc/db_shash.h" #include "dbinc/fop.h" #include "dbinc/log.h" #include "dbinc/mp.h" @@ -24,9 +18,33 @@ #include "dbinc/db_am.h" /* - * This file implements the basic file-level operations. This code - * ought to be fairly independent of DB, other than through its - * error-reporting mechanism. + * The transactional guarantees Berkeley DB provides for file + * system level operations (database physical file create, delete, + * rename) are based on our understanding of current file system + * semantics; a system that does not provide these semantics and + * guarantees could be in danger. + * + * First, as in standard database changes, fsync and fdatasync must + * work: when applied to the log file, the records written into the + * log must be transferred to stable storage. + * + * Second, it must not be possible for the log file to be removed + * without previous file system level operations being flushed to + * stable storage. Berkeley DB applications write log records + * describing file system operations into the log, then perform the + * file system operation, then commit the enclosing transaction + * (which flushes the log file to stable storage). Subsequently, + * a database environment checkpoint may make it possible for the + * application to remove the log file containing the record of the + * file system operation. DB's transactional guarantees for file + * system operations require the log file removal not succeed until + * all previous filesystem operations have been flushed to stable + * storage. In other words, the flush of the log file, or the + * removal of the log file, must block until all previous + * filesystem operations have been flushed to stable storage. This + * semantic is not, as far as we know, required by any existing + * standards document, but we have never seen a filesystem where + * it does not apply. */ /* @@ -55,20 +73,20 @@ __fop_create(dbenv, txn, fhpp, name, appname, mode, flags) char *real_name; real_name = NULL; + fhp = NULL; if ((ret = __db_appname(dbenv, appname, name, 0, NULL, &real_name)) != 0) return (ret); if (mode == 0) - mode = __db_omode("rw----"); + mode = __db_omode(OWNER_RW); if (DBENV_LOGGING(dbenv)) { - memset(&data, 0, sizeof(data)); - data.data = (void *)name; - data.size = (u_int32_t)strlen(name) + 1; + DB_INIT_DBT(data, name, strlen(name) + 1); if ((ret = __fop_create_log(dbenv, txn, &lsn, - flags | DB_FLUSH, &data, (u_int32_t)appname, mode)) != 0) + flags | DB_FLUSH, + &data, (u_int32_t)appname, (u_int32_t)mode)) != 0) goto err; } @@ -115,23 +133,21 @@ __fop_remove(dbenv, txn, fileid, name, appname, flags) __db_appname(dbenv, appname, name, 0, NULL, &real_name)) != 0) goto err; - if (txn == NULL) { + if (!IS_REAL_TXN(txn)) { if (fileid != NULL && (ret = __memp_nameop( - dbenv, fileid, NULL, real_name, NULL)) != 0) + dbenv, fileid, NULL, real_name, NULL, 0)) != 0) goto err; } else { if (DBENV_LOGGING(dbenv)) { memset(&fdbt, 0, sizeof(ndbt)); fdbt.data = fileid; fdbt.size = fileid == NULL ? 0 : DB_FILE_ID_LEN; - memset(&ndbt, 0, sizeof(ndbt)); - ndbt.data = (void *)name; - ndbt.size = (u_int32_t)strlen(name) + 1; - if ((ret = __fop_remove_log(dbenv, - txn, &lsn, flags, &ndbt, &fdbt, appname)) != 0) + DB_INIT_DBT(ndbt, name, strlen(name) + 1); + if ((ret = __fop_remove_log(dbenv, txn, &lsn, + flags, &ndbt, &fdbt, (u_int32_t)appname)) != 0) goto err; } - ret = __txn_remevent(dbenv, txn, real_name, fileid); + ret = __txn_remevent(dbenv, txn, real_name, fileid, 0); } err: if (real_name != NULL) @@ -176,7 +192,7 @@ __fop_write(dbenv, int local_open, ret, t_ret; char *real_name; - DB_ASSERT(istmp != 0); + DB_ASSERT(dbenv, istmp != 0); ret = local_open = 0; real_name = NULL; @@ -189,11 +205,10 @@ __fop_write(dbenv, memset(&data, 0, sizeof(data)); data.data = buf; data.size = size; - memset(&namedbt, 0, sizeof(namedbt)); - namedbt.data = (void *)name; - namedbt.size = (u_int32_t)strlen(name) + 1; - if ((ret = __fop_write_log(dbenv, txn, &lsn, flags, - &namedbt, appname, pgsize, pageno, off, &data, istmp)) != 0) + DB_INIT_DBT(namedbt, name, strlen(name) + 1); + if ((ret = __fop_write_log(dbenv, txn, + &lsn, flags, &namedbt, (u_int32_t)appname, + pgsize, pageno, off, &data, istmp)) != 0) goto err; } @@ -205,8 +220,7 @@ __fop_write(dbenv, } /* Seek to offset. */ - if ((ret = __os_seek(dbenv, - fhp, pgsize, pageno, off, 0, DB_OS_SEEK_SET)) != 0) + if ((ret = __os_seek(dbenv, fhp, pageno, pgsize, off)) != 0) goto err; /* Now do the write. */ @@ -251,13 +265,9 @@ __fop_rename(dbenv, txn, oldname, newname, fid, appname, flags) goto err; if (DBENV_LOGGING(dbenv)) { - memset(&old, 0, sizeof(old)); - memset(&new, 0, sizeof(new)); + DB_INIT_DBT(old, oldname, strlen(oldname) + 1); + DB_INIT_DBT(new, newname, strlen(newname) + 1); memset(&fiddbt, 0, sizeof(fiddbt)); - old.data = (void *)oldname; - old.size = (u_int32_t)strlen(oldname) + 1; - new.data = (void *)newname; - new.size = (u_int32_t)strlen(newname) + 1; fiddbt.data = fid; fiddbt.size = DB_FILE_ID_LEN; if ((ret = __fop_rename_log(dbenv, txn, &lsn, flags | DB_FLUSH, @@ -265,7 +275,7 @@ __fop_rename(dbenv, txn, oldname, newname, fid, appname, flags) goto err; } - ret = __memp_nameop(dbenv, fid, newname, o, n); + ret = __memp_nameop(dbenv, fid, newname, o, n, 0); err: if (o != NULL) __os_free(dbenv, o); |