diff options
Diffstat (limited to 'db/os_windows')
-rw-r--r-- | db/os_windows/os_abs.c | 31 | ||||
-rw-r--r-- | db/os_windows/os_clock.c | 30 | ||||
-rw-r--r-- | db/os_windows/os_config.c | 118 | ||||
-rw-r--r-- | db/os_windows/os_dir.c | 104 | ||||
-rw-r--r-- | db/os_windows/os_errno.c | 405 | ||||
-rw-r--r-- | db/os_windows/os_fid.c | 147 | ||||
-rw-r--r-- | db/os_windows/os_flock.c | 69 | ||||
-rw-r--r-- | db/os_windows/os_fsync.c | 38 | ||||
-rw-r--r-- | db/os_windows/os_getenv.c | 97 | ||||
-rw-r--r-- | db/os_windows/os_handle.c | 119 | ||||
-rw-r--r-- | db/os_windows/os_map.c | 309 | ||||
-rw-r--r-- | db/os_windows/os_open.c | 186 | ||||
-rw-r--r-- | db/os_windows/os_rename.c | 70 | ||||
-rw-r--r-- | db/os_windows/os_rw.c | 186 | ||||
-rw-r--r-- | db/os_windows/os_seek.c | 55 | ||||
-rw-r--r-- | db/os_windows/os_sleep.c | 34 | ||||
-rw-r--r-- | db/os_windows/os_spin.c | 38 | ||||
-rw-r--r-- | db/os_windows/os_stat.c | 84 | ||||
-rw-r--r-- | db/os_windows/os_truncate.c | 90 | ||||
-rw-r--r-- | db/os_windows/os_unlink.c | 109 | ||||
-rw-r--r-- | db/os_windows/os_yield.c | 27 |
21 files changed, 2346 insertions, 0 deletions
diff --git a/db/os_windows/os_abs.c b/db/os_windows/os_abs.c new file mode 100644 index 000000000..e4ace7bfb --- /dev/null +++ b/db/os_windows/os_abs.c @@ -0,0 +1,31 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_abs.c,v 12.3 2006/08/24 14:46:21 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_abspath -- + * Return if a path is an absolute path. + */ +int +__os_abspath(path) + const char *path; +{ + /* + * !!! + * Check for drive specifications, e.g., "C:". In addition, the path + * separator used by the win32 DB (PATH_SEPARATOR) is \; look for both + * / and \ since these are user-input paths. + */ + if (isalpha(path[0]) && path[1] == ':') + path += 2; + return (path[0] == '/' || path[0] == '\\'); +} diff --git a/db/os_windows/os_clock.c b/db/os_windows/os_clock.c new file mode 100644 index 000000000..2a0203237 --- /dev/null +++ b/db/os_windows/os_clock.c @@ -0,0 +1,30 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2001-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_clock.c,v 12.6 2006/08/24 14:46:21 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_clock -- + * Return the current time-of-day clock in seconds and microseconds. + */ +void +__os_clock(dbenv, secsp, usecsp) + DB_ENV *dbenv; + u_int32_t *secsp, *usecsp; /* Seconds and microseconds. */ +{ + struct _timeb now; + + _ftime(&now); + if (secsp != NULL) + *secsp = (u_int32_t)now.time; + if (usecsp != NULL) + *usecsp = now.millitm * 1000; +} diff --git a/db/os_windows/os_config.c b/db/os_windows/os_config.c new file mode 100644 index 000000000..51fc1b3ad --- /dev/null +++ b/db/os_windows/os_config.c @@ -0,0 +1,118 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_config.c,v 12.6 2006/08/24 14:46:21 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_is_winnt -- + * Return 1 if Windows/NT, otherwise 0. + * + * PUBLIC: int __os_is_winnt __P((void)); + */ +int +__os_is_winnt() +{ + static int __os_type = -1; + + /* + * The value of __os_type is computed only once, and cached to + * avoid the overhead of repeated calls to GetVersion(). + */ + if (__os_type == -1) { + if ((GetVersion() & 0x80000000) == 0) + __os_type = 1; + else + __os_type = 0; + } + return (__os_type); +} + +/* + * __os_fs_notzero -- + * Return 1 if allocated filesystem blocks are not zeroed. + */ +int +__os_fs_notzero() +{ + static int __os_notzero = -1; + OSVERSIONINFO osvi; + + /* + * Windows/NT zero-fills pages that were never explicitly written to + * the file. Note however that this is *NOT* documented. In fact, the + * Win32 documentation makes it clear that there are no guarantees that + * uninitialized bytes will be zeroed: + * + * If the file is extended, the contents of the file between the old + * EOF position and the new position are not defined. + * + * Experiments confirm that NT/2K/XP all zero fill for both NTFS and + * FAT32. Cygwin also relies on this behavior. This is the relevant + * comment from Cygwin: + * + * Oops, this is the bug case - Win95 uses whatever is on the disk + * instead of some known (safe) value, so we must seek back and fill + * in the gap with zeros. - DJ + * Note: this bug doesn't happen on NT4, even though the + * documentation for WriteFile() says that it *may* happen on any OS. + * + * We're making a bet, here, but we made it a long time ago and haven't + * yet seen any evidence that it was wrong. + * + * Windows 95/98 and On-Time give random garbage, and that breaks + * Berkeley DB. + * + * The value of __os_notzero is computed only once, and cached to + * avoid the overhead of repeated calls to GetVersion(). + */ + if (__os_notzero == -1) { + if (__os_is_winnt()) { + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + GetVersionEx(&osvi); + if (_tcscmp(osvi.szCSDVersion, _T("RTTarget-32")) == 0) + __os_notzero = 1; /* On-Time */ + else + __os_notzero = 0; /* Windows/NT */ + } else + __os_notzero = 1; /* Not Windows/NT */ + } + return (__os_notzero); +} + +/* + * __os_support_direct_io -- + * Check to see if we support direct I/O. + */ +int +__os_support_direct_io() +{ + return (1); +} + +/* + * __os_support_db_register -- + * Return 1 if the system supports DB_REGISTER. + */ +int +__os_support_db_register() +{ + return (__os_is_winnt()); +} + +/* + * __os_support_replication -- + * Return 1 if the system supports replication. + */ +int +__os_support_replication() +{ + return (__os_is_winnt()); +} diff --git a/db/os_windows/os_dir.c b/db/os_windows/os_dir.c new file mode 100644 index 000000000..4e47fa771 --- /dev/null +++ b/db/os_windows/os_dir.c @@ -0,0 +1,104 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_dir.c,v 12.8 2006/08/24 14:46:21 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_dirlist -- + * Return a list of the files in a directory. + */ +int +__os_dirlist(dbenv, dir, namesp, cntp) + DB_ENV *dbenv; + const char *dir; + char ***namesp; + int *cntp; +{ + HANDLE dirhandle; + WIN32_FIND_DATA fdata; + int arraysz, cnt, ret; + char **names, *onename; + _TCHAR tfilespec[DB_MAXPATHLEN + 1]; + _TCHAR *tdir; + + TO_TSTRING(dbenv, dir, tdir, ret); + if (ret != 0) + return (ret); + + (void)_sntprintf(tfilespec, DB_MAXPATHLEN, + _T("%s%hc*"), tdir, PATH_SEPARATOR[0]); + if ((dirhandle = + FindFirstFile(tfilespec, &fdata)) == INVALID_HANDLE_VALUE) + return (__os_posix_err(__os_get_syserr())); + + names = NULL; + arraysz = cnt = ret = 0; + for (;;) { + if ((fdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) { + if (cnt >= arraysz) { + arraysz += 100; + if ((ret = __os_realloc(dbenv, + arraysz * sizeof(names[0]), &names)) != 0) + goto err; + } + /* + * FROM_TSTRING doesn't necessarily allocate new + * memory, so we must do that explicitly. + * Unfortunately, when compiled with UNICODE, we'll + * copy twice. + */ + FROM_TSTRING(dbenv, fdata.cFileName, onename, ret); + if (ret != 0) + goto err; + ret = __os_strdup(dbenv, onename, &names[cnt]); + FREE_STRING(dbenv, onename); + if (ret != 0) + goto err; + cnt++; + } + if (!FindNextFile(dirhandle, &fdata)) { + if (GetLastError() == ERROR_NO_MORE_FILES) + break; + else { + ret = __os_posix_err(__os_get_syserr()); + goto err; + } + } + } + +err: if (!FindClose(dirhandle) && ret == 0) + ret = __os_posix_err(__os_get_syserr()); + + if (ret == 0) { + *namesp = names; + *cntp = cnt; + } else if (names != NULL) + __os_dirfree(dbenv, names, cnt); + + FREE_STRING(dbenv, tdir); + + return (ret); +} + +/* + * __os_dirfree -- + * Free the list of files. + */ +void +__os_dirfree(dbenv, names, cnt) + DB_ENV *dbenv; + char **names; + int cnt; +{ + while (cnt > 0) + __os_free(dbenv, names[--cnt]); + __os_free(dbenv, names); +} diff --git a/db/os_windows/os_errno.c b/db/os_windows/os_errno.c new file mode 100644 index 000000000..14bdab6c5 --- /dev/null +++ b/db/os_windows/os_errno.c @@ -0,0 +1,405 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_errno.c,v 12.10 2006/09/19 14:14:13 mjc Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_get_errno_ret_zero -- + * Return the last system error, including an error of zero. + */ +int +__os_get_errno_ret_zero() +{ + /* This routine must be able to return the same value repeatedly. */ + return (errno); +} + +/* + * We've seen cases where system calls failed but errno was never set. For + * that reason, __os_get_errno() and __os_get_syserr set errno to EAGAIN if + * it's not already set, to work around the problem. For obvious reasons, + * we can only call this function if we know an error has occurred, that + * is, we can't test the return for a non-zero value after the get call. + * + * __os_get_errno -- + * Return the last ANSI C "errno" value or EAGAIN if the last error + * is zero. + */ +int +__os_get_errno() +{ + /* This routine must be able to return the same value repeatedly. */ + if (errno == 0) + __os_set_errno(EAGAIN); + return (errno); +} + +/* + * __os_get_neterr -- + * Return the last networking error or EAGAIN if the last error is zero. + * + * PUBLIC: int __os_get_neterr __P((void)); + */ +int +__os_get_neterr() +{ + int err; + + /* This routine must be able to return the same value repeatedly. */ + err = WSAGetLastError(); + if (err == 0) + WSASetLastError(err = ERROR_RETRY); + return (err); +} + +/* + * __os_get_syserr -- + * Return the last system error or EAGAIN if the last error is zero. + */ +int +__os_get_syserr() +{ + int err; + + /* This routine must be able to return the same value repeatedly. */ + err = GetLastError(); + if (err == 0) + SetLastError(err = ERROR_RETRY); + return (err); +} + +/* + * __os_set_errno -- + * Set the value of errno. + */ +void +__os_set_errno(evalue) + int evalue; +{ + /* + * This routine is called by the compatibility interfaces (DB 1.85, + * dbm and hsearch). Force values > 0, that is, not one of DB 2.X + * and later's public error returns. If something bad has happened, + * default to EFAULT -- a nasty return. Otherwise, default to EINVAL. + * As the compatibility APIs aren't included on Windows, the Windows + * version of this routine doesn't need this behavior. + */ + errno = + evalue >= 0 ? evalue : (evalue == DB_RUNRECOVERY ? EFAULT : EINVAL); +} + +/* + * __os_strerror -- + * Return a string associated with the system error. + */ +char * +__os_strerror(error, buf, len) + int error; + char *buf; + size_t len; +{ + DB_ASSERT(NULL, error != 0); + + /* + * Explicitly call FormatMessageA, since we want to receive a char + * string back, not a tchar string. + */ + FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, + 0, (DWORD)error, 0, buf, (DWORD)(len - 1), NULL); + buf[len - 1] = '\0'; + + return (buf); +} + +/* + * __os_posix_err -- + * Convert a system error to a POSIX error. + */ +int +__os_posix_err(error) + int error; +{ + /* Handle calls on successful returns. */ + if (error == 0) + return (0); + + /* + * Translate the Windows error codes we care about. + */ + switch (error) { + case ERROR_FILE_NOT_FOUND: + case ERROR_INVALID_DRIVE: + case ERROR_PATH_NOT_FOUND: + return (ENOENT); + + case ERROR_NO_MORE_FILES: + case ERROR_TOO_MANY_OPEN_FILES: + return (EMFILE); + + case ERROR_ACCESS_DENIED: + return (EPERM); + + case ERROR_INVALID_HANDLE: + return (EBADF); + + case ERROR_NOT_ENOUGH_MEMORY: + return (ENOMEM); + + case ERROR_DISK_FULL: + return (ENOSPC); + + case ERROR_ARENA_TRASHED: + case ERROR_BAD_COMMAND: + case ERROR_BAD_ENVIRONMENT: + case ERROR_BAD_FORMAT: + case ERROR_GEN_FAILURE: + case ERROR_INVALID_ACCESS: + case ERROR_INVALID_BLOCK: + case ERROR_INVALID_DATA: + case ERROR_READ_FAULT: + case ERROR_WRITE_FAULT: + return (EFAULT); + + case ERROR_ALREADY_EXISTS: + case ERROR_FILE_EXISTS: + return (EEXIST); + + case ERROR_NOT_SAME_DEVICE: + return (EXDEV); + + case ERROR_WRITE_PROTECT: + return (EACCES); + + case ERROR_LOCK_FAILED: + case ERROR_LOCK_VIOLATION: + case ERROR_NOT_READY: + case ERROR_SHARING_VIOLATION: + return (EBUSY); + + case ERROR_RETRY: + return (EINTR); + } + + /* + * Translate the Windows socket error codes. + */ + switch (error) { + case WSAEADDRINUSE: +#ifdef EADDRINUSE + return (EADDRINUSE); +#else + break; +#endif + case WSAEADDRNOTAVAIL: +#ifdef EADDRNOTAVAIL + return (EADDRNOTAVAIL); +#else + break; +#endif + case WSAEAFNOSUPPORT: +#ifdef EAFNOSUPPORT + return (EAFNOSUPPORT); +#else + break; +#endif + case WSAEALREADY: +#ifdef EALREADY + return (EALREADY); +#else + break; +#endif + case WSAEBADF: + return (EBADF); + case WSAECONNABORTED: +#ifdef ECONNABORTED + return (ECONNABORTED); +#else + break; +#endif + case WSAECONNREFUSED: +#ifdef ECONNREFUSED + return (ECONNREFUSED); +#else + break; +#endif + case WSAECONNRESET: +#ifdef ECONNRESET + return (ECONNRESET); +#else + break; +#endif + case WSAEDESTADDRREQ: +#ifdef EDESTADDRREQ + return (EDESTADDRREQ); +#else + break; +#endif + case WSAEFAULT: + return (EFAULT); + case WSAEHOSTDOWN: +#ifdef EHOSTDOWN + return (EHOSTDOWN); +#else + break; +#endif + case WSAEHOSTUNREACH: +#ifdef EHOSTUNREACH + return (EHOSTUNREACH); +#else + break; +#endif + case WSAEINPROGRESS: +#ifdef EINPROGRESS + return (EINPROGRESS); +#else + break; +#endif + case WSAEINTR: + return (EINTR); + case WSAEINVAL: + return (EINVAL); + case WSAEISCONN: +#ifdef EISCONN + return (EISCONN); +#else + break; +#endif + case WSAELOOP: +#ifdef ELOOP + return (ELOOP); +#else + break; +#endif + case WSAEMFILE: + return (EMFILE); + case WSAEMSGSIZE: +#ifdef EMSGSIZE + return (EMSGSIZE); +#else + break; +#endif + case WSAENAMETOOLONG: + return (ENAMETOOLONG); + case WSAENETDOWN: +#ifdef ENETDOWN + return (ENETDOWN); +#else + break; +#endif + case WSAENETRESET: +#ifdef ENETRESET + return (ENETRESET); +#else + break; +#endif + case WSAENETUNREACH: +#ifdef ENETUNREACH + return (ENETUNREACH); +#else + break; +#endif + case WSAENOBUFS: +#ifdef ENOBUFS + return (ENOBUFS); +#else + break; +#endif + case WSAENOPROTOOPT: +#ifdef ENOPROTOOPT + return (ENOPROTOOPT); +#else + break; +#endif + case WSAENOTCONN: +#ifdef ENOTCONN + return (ENOTCONN); +#else + break; +#endif + case WSANOTINITIALISED: + return (EAGAIN); + case WSAENOTSOCK: +#ifdef ENOTSOCK + return (ENOTSOCK); +#else + break; +#endif + case WSAEOPNOTSUPP: + return (DB_OPNOTSUP); + case WSAEPFNOSUPPORT: +#ifdef EPFNOSUPPORT + return (EPFNOSUPPORT); +#else + break; +#endif + case WSAEPROTONOSUPPORT: +#ifdef EPROTONOSUPPORT + return (EPROTONOSUPPORT); +#else + break; +#endif + case WSAEPROTOTYPE: +#ifdef EPROTOTYPE + return (EPROTOTYPE); +#else + break; +#endif + case WSAESHUTDOWN: +#ifdef ESHUTDOWN + return (ESHUTDOWN); +#else + break; +#endif + case WSAESOCKTNOSUPPORT: +#ifdef ESOCKTNOSUPPORT + return (ESOCKTNOSUPPORT); +#else + break; +#endif + case WSAETIMEDOUT: +#ifdef ETIMEDOUT + return (ETIMEDOUT); +#else + break; +#endif + case WSAETOOMANYREFS: +#ifdef ETOOMANYREFS + return (ETOOMANYREFS); +#else + break; +#endif + case WSAEWOULDBLOCK: +#ifdef EWOULDBLOCK + return (EWOULDBLOCK); +#else + return (EAGAIN); +#endif + case WSAHOST_NOT_FOUND: +#ifdef EHOSTUNREACH + return (EHOSTUNREACH); +#else + break; +#endif + case WSASYSNOTREADY: + return (EAGAIN); + case WSATRY_AGAIN: + return (EAGAIN); + case WSAVERNOTSUPPORTED: + return (DB_OPNOTSUP); + case WSAEACCES: + return (EACCES); + } + + /* + * EFAULT is the default if we don't have a translation. + */ + return (EFAULT); +} diff --git a/db/os_windows/os_fid.c b/db/os_windows/os_fid.c new file mode 100644 index 000000000..9fa2a57c9 --- /dev/null +++ b/db/os_windows/os_fid.c @@ -0,0 +1,147 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_fid.c,v 12.9 2006/08/24 14:46:21 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +#define SERIAL_INIT 0 +static u_int32_t fid_serial = SERIAL_INIT; + +/* + * __os_fileid -- + * Return a unique identifier for a file. + */ +int +__os_fileid(dbenv, fname, unique_okay, fidp) + DB_ENV *dbenv; + const char *fname; + int unique_okay; + u_int8_t *fidp; +{ + pid_t pid; + size_t i; + u_int32_t tmp; + u_int8_t *p; + int ret; + + /* + * The documentation for GetFileInformationByHandle() states that the + * inode-type numbers are not constant between processes. Actually, + * they are, they're the NTFS MFT indexes. So, this works on NTFS, + * but perhaps not on other platforms, and perhaps not over a network. + * Can't think of a better solution right now. + */ + DB_FH *fhp; + BY_HANDLE_FILE_INFORMATION fi; + BOOL retval = FALSE; + + DB_ASSERT(dbenv, fname != NULL); + + /* Clear the buffer. */ + memset(fidp, 0, DB_FILE_ID_LEN); + + /* + * Initialize/increment the serial number we use to help avoid + * fileid collisions. Note that we don't bother with locking; + * it's unpleasant to do from down in here, and if we race on + * this no real harm will be done, since the finished fileid + * has so many other components. + * + * We use the bottom 32-bits of the process ID, hoping they + * are more random than the top 32-bits (should we be on a + * machine with 64-bit process IDs). + * + * We increment by 100000 on each call as a simple way of + * randomizing; simply incrementing seems potentially less useful + * if pids are also simply incremented, since this is process-local + * and we may be one of a set of processes starting up. 100000 + * pushes us out of pid space on most platforms, and has few + * interesting properties in base 2. + */ + if (fid_serial == SERIAL_INIT) { + __os_id(dbenv, &pid, NULL); + fid_serial = pid; + } else + fid_serial += 100000; + + /* + * First we open the file, because we're not given a handle to it. + * If we can't open it, we're in trouble. + */ + if ((ret = __os_open(dbenv, fname, DB_OSO_RDONLY, _S_IREAD, &fhp)) != 0) + return (ret); + + /* File open, get its info */ + if ((retval = GetFileInformationByHandle(fhp->handle, &fi)) == FALSE) + ret = __os_get_syserr(); + (void)__os_closehandle(dbenv, fhp); + + if (retval == FALSE) + return (__os_posix_err(ret)); + + /* + * We want the three 32-bit words which tell us the volume ID and + * the file ID. We make a crude attempt to copy the bytes over to + * the callers buffer. + * + * We don't worry about byte sexing or the actual variable sizes. + * + * When this routine is called from the DB access methods, it's only + * called once -- whatever ID is generated when a database is created + * is stored in the database file's metadata, and that is what is + * saved in the mpool region's information to uniquely identify the + * file. + * + * When called from the mpool layer this routine will be called each + * time a new thread of control wants to share the file, which makes + * things tougher. As far as byte sexing goes, since the mpool region + * lives on a single host, there's no issue of that -- the entire + * region is byte sex dependent. As far as variable sizes go, we make + * the simplifying assumption that 32-bit and 64-bit processes will + * get the same 32-bit values if we truncate any returned 64-bit value + * to a 32-bit value. + */ + tmp = (u_int32_t)fi.nFileIndexLow; + for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = *p++; + tmp = (u_int32_t)fi.nFileIndexHigh; + for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = *p++; + + if (unique_okay) { + /* + * Use the system time to try to get a unique value + * within this process. A millisecond counter + * overflows 32 bits in about 49 days. So we use 8 + * bytes, and don't bother with the volume ID, which + * is not very useful for our purposes. + */ + SYSTEMTIME st; + + GetSystemTime(&st); + tmp = (st.wYear - 1900) * 12 + (st.wMonth - 1); + for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = *p++; + tmp = ((((st.wDay - 1) * 24 + st.wHour) * 60 + + st.wMinute) * 60 + st.wSecond) * 1000 + + st.wMilliseconds; + for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = *p++; + for (p = (u_int8_t *)&fid_serial, i = sizeof(u_int32_t); + i > 0; --i) + *fidp++ = *p++; + } else { + tmp = (u_int32_t)fi.dwVolumeSerialNumber; + for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i) + *fidp++ = *p++; + } + + return (0); +} diff --git a/db/os_windows/os_flock.c b/db/os_windows/os_flock.c new file mode 100644 index 000000000..c08193891 --- /dev/null +++ b/db/os_windows/os_flock.c @@ -0,0 +1,69 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_flock.c,v 1.12 2006/08/24 14:46:21 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_fdlock -- + * Acquire/release a lock on a byte in a file. + */ +int +__os_fdlock(dbenv, fhp, offset, acquire, nowait) + DB_ENV *dbenv; + DB_FH *fhp; + int acquire, nowait; + off_t offset; +{ + int ret; + DWORD low, high; + OVERLAPPED over; + + DB_ASSERT(dbenv, + F_ISSET(fhp, DB_FH_OPENED) && fhp->handle != INVALID_HANDLE_VALUE); + + /* + * Windows file locking interferes with read/write operations, so we + * map the ranges to an area past the end of the file. + */ + DB_ASSERT(dbenv, offset < (u_int64_t)INT64_MAX); + offset = UINT64_MAX - offset; + low = (DWORD)offset; + high = (DWORD)(offset >> 32); + + if (acquire) { + if (nowait) + RETRY_CHK_EINTR_ONLY( + !LockFile(fhp->handle, low, high, 1, 0), ret); + else if (__os_is_winnt()) { + memset(&over, 0, sizeof(over)); + over.Offset = low; + over.OffsetHigh = high; + RETRY_CHK_EINTR_ONLY( + !LockFileEx(fhp->handle, LOCKFILE_EXCLUSIVE_LOCK, + 0, 1, 0, &over), + ret); + } else { + /* Windows 9x/ME doesn't support a blocking call. */ + for (;;) { + RETRY_CHK_EINTR_ONLY( + !LockFile(fhp->handle, low, high, 1, 0), + ret); + if (__os_posix_err(ret) != EAGAIN) + break; + __os_sleep(dbenv, 1, 0); + } + } + } else + RETRY_CHK_EINTR_ONLY( + !UnlockFile(fhp->handle, low, high, 1, 0), ret); + + return (__os_posix_err(ret)); +} diff --git a/db/os_windows/os_fsync.c b/db/os_windows/os_fsync.c new file mode 100644 index 000000000..050d68e55 --- /dev/null +++ b/db/os_windows/os_fsync.c @@ -0,0 +1,38 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_fsync.c,v 12.7 2006/08/24 14:46:21 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_fsync -- + * Flush a file descriptor. + */ +int +__os_fsync(dbenv, fhp) + DB_ENV *dbenv; + DB_FH *fhp; +{ + int ret; + + /* + * Do nothing if the file descriptor has been marked as not requiring + * any sync to disk. + */ + if (F_ISSET(fhp, DB_FH_NOSYNC)) + return (0); + + RETRY_CHK((!FlushFileBuffers(fhp->handle)), ret); + if (ret != 0) { + __db_syserr(dbenv, ret, "FlushFileBuffers"); + ret = __os_posix_err(ret); + } + return (ret); +} diff --git a/db/os_windows/os_getenv.c b/db/os_windows/os_getenv.c new file mode 100644 index 000000000..a42dbd677 --- /dev/null +++ b/db/os_windows/os_getenv.c @@ -0,0 +1,97 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_getenv.c,v 1.4 2006/08/24 14:46:21 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_getenv -- + * Retrieve an environment variable. + */ +int +__os_getenv(dbenv, name, bpp, buflen) + DB_ENV *dbenv; + const char *name; + char **bpp; + size_t buflen; +{ + _TCHAR *tname, tbuf[1024]; + int ret; + char *p; + + /* + * If there's a value and the buffer is large enough: + * copy value into the pointer, return 0 + * If there's a value and the buffer is too short: + * set pointer to NULL, return EINVAL + * If there's no value: + * set pointer to NULL, return 0 + */ + if ((p = getenv(name)) != NULL) { + if (strlen(p) < buflen) { + (void)strcpy(*bpp, p); + return (0); + } + goto small_buf; + } + + TO_TSTRING(dbenv, name, tname, ret); + if (ret != 0) + return (ret); + /* + * The declared size of the tbuf buffer limits the maximum environment + * variable size in Berkeley DB on Windows. If that's too small, or if + * we need to get rid of large allocations on the BDB stack, we should + * malloc the tbuf memory. + */ + ret = GetEnvironmentVariable(tname, tbuf, sizeof(tbuf)); + FREE_STRING(dbenv, tname); + + /* + * If GetEnvironmentVariable succeeds, the return value is the number + * of characters stored in the buffer pointed to by lpBuffer, not + * including the terminating null character. If the buffer is not + * large enough to hold the data, the return value is the buffer size, + * in characters, required to hold the string and its terminating null + * character. If GetEnvironmentVariable fails, the return value is + * zero. If the specified environment variable was not found in the + * environment block, GetLastError returns ERROR_ENVVAR_NOT_FOUND. + */ + if (ret == 0) { + if ((ret = __os_get_syserr()) == ERROR_ENVVAR_NOT_FOUND) { + *bpp = NULL; + return (0); + } + __db_syserr(dbenv, ret, "GetEnvironmentVariable"); + return (__os_posix_err(ret)); + } + if (ret > (int)sizeof(tbuf)) + goto small_buf; + + FROM_TSTRING(dbenv, tbuf, p, ret); + if (ret != 0) + return (ret); + if (strlen(p) < buflen) + (void)strcpy(*bpp, p); + else + *bpp = NULL; + FREE_STRING(dbenv, p); + if (*bpp == NULL) + goto small_buf; + + return (0); + +small_buf: + *bpp = NULL; + __db_errx(dbenv, + "%s: buffer too small to hold environment variable %s", + name, p); + return (EINVAL); +} diff --git a/db/os_windows/os_handle.c b/db/os_windows/os_handle.c new file mode 100644 index 000000000..c03a5ecd8 --- /dev/null +++ b/db/os_windows/os_handle.c @@ -0,0 +1,119 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_handle.c,v 12.10 2006/09/05 15:02:31 mjc Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_openhandle -- + * Open a file, using POSIX 1003.1 open flags. + */ +int +__os_openhandle(dbenv, name, flags, mode, fhpp) + DB_ENV *dbenv; + const char *name; + int flags, mode; + DB_FH **fhpp; +{ + DB_FH *fhp; + int ret, nrepeat, retries; + + if ((ret = __os_calloc(dbenv, 1, sizeof(DB_FH), fhpp)) != 0) + return (ret); + fhp = *fhpp; + + retries = 0; + for (nrepeat = 1; nrepeat < 4; ++nrepeat) { + ret = 0; + fhp->fd = _open(name, flags, mode); + + if (fhp->fd != -1) { + F_SET(fhp, DB_FH_OPENED); + break; + } + + switch (ret = __os_posix_err(__os_get_syserr())) { + case EMFILE: + case ENFILE: + case ENOSPC: + /* + * If it's a "temporary" error, we retry up to 3 times, + * waiting up to 12 seconds. While it's not a problem + * if we can't open a database, an inability to open a + * log file is cause for serious dismay. + */ + __os_sleep(dbenv, nrepeat * 2, 0); + break; + case EAGAIN: + case EBUSY: + case EINTR: + /* + * If an EAGAIN, EBUSY or EINTR, retry immediately for + * DB_RETRY times. + */ + if (++retries < DB_RETRY) + --nrepeat; + break; + } + } + + if (ret != 0) { + (void)__os_closehandle(dbenv, fhp); + *fhpp = NULL; + } + + return (ret); +} + +/* + * __os_closehandle -- + * Close a file. + */ +int +__os_closehandle(dbenv, fhp) + DB_ENV *dbenv; + DB_FH *fhp; +{ + int ret, t_ret; + + ret = 0; + + /* + * If we have a valid handle, close it and unlink any temporary + * file. + */ + if (F_ISSET(fhp, DB_FH_OPENED)) { + if (fhp->handle != INVALID_HANDLE_VALUE) + RETRY_CHK((!CloseHandle(fhp->handle)), ret); + else + RETRY_CHK((_close(fhp->fd)), ret); + + if (fhp->trunc_handle != INVALID_HANDLE_VALUE) { + RETRY_CHK((!CloseHandle(fhp->trunc_handle)), t_ret); + if (t_ret != 0 && ret == 0) + ret = t_ret; + } + + if (ret != 0) { + __db_syserr(dbenv, ret, "CloseHandle"); + ret = __os_posix_err(ret); + } + + /* Unlink the file if we haven't already done so. */ + if (F_ISSET(fhp, DB_FH_UNLINK)) { + (void)__os_unlink(dbenv, fhp->name); + __os_free(dbenv, fhp->name); + } + } + + __os_free(dbenv, fhp); + + return (ret); +} diff --git a/db/os_windows/os_map.c b/db/os_windows/os_map.c new file mode 100644 index 000000000..e254ea429 --- /dev/null +++ b/db/os_windows/os_map.c @@ -0,0 +1,309 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_map.c,v 12.8 2006/08/24 14:46:21 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +static int __os_map + __P((DB_ENV *, char *, REGINFO *, DB_FH *, size_t, int, int, int, void **)); +static int __os_unique_name __P((_TCHAR *, HANDLE, _TCHAR *, size_t)); + +/* + * __os_r_sysattach -- + * Create/join a shared memory region. + */ +int +__os_r_sysattach(dbenv, infop, rp) + DB_ENV *dbenv; + REGINFO *infop; + REGION *rp; +{ + DB_FH *fhp; + int is_system, ret; + + /* + * Try to open/create the file. We DO NOT need to ensure that multiple + * threads/processes attempting to simultaneously create the region are + * properly ordered, our caller has already taken care of that. + */ + if ((ret = __os_open(dbenv, infop->name, + F_ISSET(infop, REGION_CREATE_OK) ? DB_OSO_CREATE: 0, + dbenv->db_mode, &fhp)) != 0) { + __db_err(dbenv, ret, "%s", infop->name); + return (ret); + } + + /* + * On Windows/9X, files that are opened by multiple processes do not + * share data correctly. For this reason, the DB_SYSTEM_MEM flag is + * implied for any application that does not specify the DB_PRIVATE + * flag. + */ + is_system = F_ISSET(dbenv, DB_ENV_SYSTEM_MEM) || + (!F_ISSET(dbenv, DB_ENV_PRIVATE) && __os_is_winnt() == 0); + + /* + * Map the file in. If we're creating an in-system-memory region, + * specify a segment ID (which is never used again) so that the + * calling code writes out the REGENV_REF structure to the primary + * environment file. + */ + ret = __os_map(dbenv, infop->name, infop, fhp, rp->size, + 1, is_system, 0, &infop->addr); + if (ret == 0 && is_system == 1) + rp->segid = 1; + + (void)__os_closehandle(dbenv, fhp); + + return (ret); +} + +/* + * __os_r_sysdetach -- + * Detach from a shared memory region. + */ +int +__os_r_sysdetach(dbenv, infop, destroy) + DB_ENV *dbenv; + REGINFO *infop; + int destroy; +{ + int ret, t_ret; + + if (infop->wnt_handle != NULL) { + (void)CloseHandle(infop->wnt_handle); + infop->wnt_handle = NULL; + } + + ret = !UnmapViewOfFile(infop->addr) ? __os_get_syserr() : 0; + if (ret != 0) { + __db_syserr(dbenv, ret, "UnmapViewOfFile"); + ret = __os_posix_err(ret); + } + + if (!F_ISSET(dbenv, DB_ENV_SYSTEM_MEM) && destroy) { + if (F_ISSET(dbenv, DB_ENV_OVERWRITE)) + (void)__db_file_multi_write(dbenv, infop->name); + if ((t_ret = __os_unlink(dbenv, infop->name)) != 0 && ret == 0) + ret = t_ret; + } + + return (ret); +} + +/* + * __os_mapfile -- + * Map in a shared memory file. + */ +int +__os_mapfile(dbenv, path, fhp, len, is_rdonly, addr) + DB_ENV *dbenv; + char *path; + DB_FH *fhp; + int is_rdonly; + size_t len; + void **addr; +{ + return (__os_map(dbenv, path, NULL, fhp, len, 0, 0, is_rdonly, addr)); +} + +/* + * __os_unmapfile -- + * Unmap the shared memory file. + */ +int +__os_unmapfile(dbenv, addr, len) + DB_ENV *dbenv; + void *addr; + size_t len; +{ + return (!UnmapViewOfFile(addr) ? __os_posix_err(__os_get_syserr()) : 0); +} + +/* + * __os_unique_name -- + * Create a unique identifying name from a pathname (may be absolute or + * relative) and/or a file descriptor. + * + * The name returned must be unique (different files map to different + * names), and repeatable (same files, map to same names). It's not + * so easy to do by name. Should handle not only: + * + * foo.bar == ./foo.bar == c:/whatever_path/foo.bar + * + * but also understand that: + * + * foo.bar == Foo.Bar (FAT file system) + * foo.bar != Foo.Bar (NTFS) + * + * The best solution is to use the file index, found in the file + * information structure (similar to UNIX inode #). + * + * When a file is deleted, its file index may be reused, + * but if the unique name has not gone from its namespace, + * we may get a conflict. So to ensure some tie in to the + * original pathname, we also use the creation time and the + * file basename. This is not a perfect system, but it + * should work for all but anamolous test cases. + * + */ +static int +__os_unique_name(orig_path, hfile, result_path, result_path_len) + _TCHAR *orig_path, *result_path; + HANDLE hfile; + size_t result_path_len; +{ + BY_HANDLE_FILE_INFORMATION fileinfo; + _TCHAR *basename, *p; + + /* + * In Windows, pathname components are delimited by '/' or '\', and + * if neither is present, we need to strip off leading drive letter + * (e.g. c:foo.txt). + */ + basename = _tcsrchr(orig_path, '/'); + p = _tcsrchr(orig_path, '\\'); + if (basename == NULL || (p != NULL && p > basename)) + basename = p; + if (basename == NULL) + basename = _tcsrchr(orig_path, ':'); + + if (basename == NULL) + basename = orig_path; + else + basename++; + + if (!GetFileInformationByHandle(hfile, &fileinfo)) + return (__os_posix_err(__os_get_syserr())); + + (void)_sntprintf(result_path, result_path_len, + _T("__db_shmem.%8.8lx.%8.8lx.%8.8lx.%8.8lx.%8.8lx.%s"), + fileinfo.dwVolumeSerialNumber, + fileinfo.nFileIndexHigh, + fileinfo.nFileIndexLow, + fileinfo.ftCreationTime.dwHighDateTime, + fileinfo.ftCreationTime.dwHighDateTime, + basename); + + return (0); +} + +/* + * __os_map -- + * The mmap(2) function for Windows. + */ +static int +__os_map(dbenv, path, infop, fhp, len, is_region, is_system, is_rdonly, addr) + DB_ENV *dbenv; + REGINFO *infop; + char *path; + DB_FH *fhp; + int is_region, is_system, is_rdonly; + size_t len; + void **addr; +{ + HANDLE hMemory; + int ret, use_pagefile; + _TCHAR *tpath, shmem_name[DB_MAXPATHLEN]; + void *pMemory; + + ret = 0; + if (infop != NULL) + infop->wnt_handle = NULL; + + use_pagefile = is_region && is_system; + + /* + * If creating a region in system space, get a matching name in the + * paging file namespace. + */ + if (use_pagefile) { + TO_TSTRING(dbenv, path, tpath, ret); + if (ret != 0) + return (ret); + ret = __os_unique_name(tpath, fhp->handle, + shmem_name, sizeof(shmem_name)); + FREE_STRING(dbenv, tpath); + if (ret != 0) + return (ret); + } + + /* + * XXX + * DB: We have not implemented copy-on-write here. + * + * If this is an region in system memory, we try to open it using the + * OpenFileMapping() first, and only call CreateFileMapping() if we're + * really creating the section. There are two reasons: + * + * 1) We only create the mapping if we have newly created the region. + * This avoids a long-running problem caused by Windows reference + * counting, where regions that are closed by all processes are + * deleted. It turns out that just checking for a zeroed region + * is not good enough. See [#4882] and [#7127] for the details. + * + * 2) CreateFileMapping seems to mess up making the commit charge to + * the process. It thinks, incorrectly, that when we want to join a + * previously existing section, that it should make a commit charge + * for the whole section. In fact, there is no new committed memory + * whatever. The call can fail if there is insufficient memory free + * to handle the erroneous commit charge. So, we find that the + * bogus commit is not made if we call OpenFileMapping. + */ + hMemory = NULL; + if (use_pagefile) { + hMemory = OpenFileMapping( + is_rdonly ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS, + 0, shmem_name); + + if (hMemory == NULL && F_ISSET(infop, REGION_CREATE_OK)) + hMemory = CreateFileMapping((HANDLE)-1, 0, + is_rdonly ? PAGE_READONLY : PAGE_READWRITE, + 0, (DWORD)len, shmem_name); + } else + hMemory = CreateFileMapping(fhp->handle, 0, + is_rdonly ? PAGE_READONLY : PAGE_READWRITE, + 0, (DWORD)len, NULL); + + if (hMemory == NULL) { + ret = __os_get_syserr(); + __db_syserr(dbenv, ret, "OpenFileMapping"); + return (__db_panic(dbenv, __os_posix_err(ret))); + } + + pMemory = MapViewOfFile(hMemory, + (is_rdonly ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS), 0, 0, len); + if (pMemory == NULL) { + ret = __os_get_syserr(); + __db_syserr(dbenv, ret, "MapViewOfFile"); + return (__db_panic(dbenv, __os_posix_err(ret))); + } + + /* + * XXX + * It turns out that the kernel object underlying the named section + * is reference counted, but that the call to MapViewOfFile() above + * does NOT increment the reference count! So, if we close the handle + * here, the kernel deletes the object from the kernel namespace. + * When a second process comes along to join the region, the kernel + * happily creates a new object with the same name, but completely + * different identity. The two processes then have distinct isolated + * mapped sections, not at all what was wanted. Not closing the handle + * here fixes this problem. We carry the handle around in the region + * structure so we can close it when unmap is called. + */ + if (use_pagefile && infop != NULL) + infop->wnt_handle = hMemory; + else + CloseHandle(hMemory); + + *addr = pMemory; + return (ret); +} diff --git a/db/os_windows/os_open.c b/db/os_windows/os_open.c new file mode 100644 index 000000000..9346722ae --- /dev/null +++ b/db/os_windows/os_open.c @@ -0,0 +1,186 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_open.c,v 12.16 2006/09/12 01:49:36 mjc Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_open -- + * Open a file descriptor. + */ +__os_open(dbenv, name, flags, mode, fhpp) + DB_ENV *dbenv; + const char *name; + u_int32_t flags; + int mode; + DB_FH **fhpp; +{ + return (__os_open_extend(dbenv, name, 0, flags, mode, fhpp)); +} + +/* + * __os_open_extend -- + * Open a file descriptor (including page size and log size information). + */ +int +__os_open_extend(dbenv, name, page_size, flags, mode, fhpp) + DB_ENV *dbenv; + const char *name; + u_int32_t page_size, flags; + int mode; + DB_FH **fhpp; +{ + DB_FH *fhp; + DWORD cluster_size, sector_size, free_clusters, total_clusters; + int access, attr, createflag, nrepeat, ret, share; + _TCHAR *drive, *tname; + _TCHAR dbuf[4]; /* <letter><colon><slash><nul> */ + + fhp = NULL; + tname = NULL; + +#define OKFLAGS \ + (DB_OSO_ABSMODE | DB_OSO_CREATE | DB_OSO_DIRECT | DB_OSO_DSYNC |\ + DB_OSO_EXCL | DB_OSO_RDONLY | DB_OSO_REGION | DB_OSO_SEQ | \ + DB_OSO_TEMP | DB_OSO_TRUNC) + if ((ret = __db_fchk(dbenv, "__os_open", flags, OKFLAGS)) != 0) + return (ret); + + TO_TSTRING(dbenv, name, tname, ret); + if (ret != 0) + goto err; + + if ((ret = __os_calloc(dbenv, 1, sizeof(DB_FH), &fhp)) != 0) + goto err; + + /* + * Otherwise, use the Windows/32 CreateFile interface so that we can + * play magic games with files to get data flush effects similar to + * the POSIX O_DSYNC flag. + * + * !!! + * We currently ignore the 'mode' argument. It would be possible + * to construct a set of security attributes that we could pass to + * CreateFile that would accurately represents the mode. In worst + * case, this would require looking up user and all group names and + * creating an entry for each. Alternatively, we could call the + * _chmod (partial emulation) function after file creation, although + * this leaves us with an obvious race. However, these efforts are + * largely meaningless on FAT, the most common file system, which + * only has a "readable" and "writeable" flag, applying to all users. + */ + access = GENERIC_READ; + if (!LF_ISSET(DB_OSO_RDONLY)) + access |= GENERIC_WRITE; + + share = FILE_SHARE_READ | FILE_SHARE_WRITE; + if (__os_is_winnt()) + share |= FILE_SHARE_DELETE; + attr = FILE_ATTRIBUTE_NORMAL; + + /* + * Reproduce POSIX 1003.1 semantics: if O_CREATE and O_EXCL are both + * specified, fail, returning EEXIST, unless we create the file. + */ + if (LF_ISSET(DB_OSO_CREATE) && LF_ISSET(DB_OSO_EXCL)) + createflag = CREATE_NEW; /* create only if !exist*/ + else if (!LF_ISSET(DB_OSO_CREATE) && LF_ISSET(DB_OSO_TRUNC)) + createflag = TRUNCATE_EXISTING; /* truncate, fail if !exist */ + else if (LF_ISSET(DB_OSO_TRUNC)) + createflag = CREATE_ALWAYS; /* create and truncate */ + else if (LF_ISSET(DB_OSO_CREATE)) + createflag = OPEN_ALWAYS; /* open or create */ + else + createflag = OPEN_EXISTING; /* open only if existing */ + + if (LF_ISSET(DB_OSO_DSYNC)) { + F_SET(fhp, DB_FH_NOSYNC); + attr |= FILE_FLAG_WRITE_THROUGH; + } + + if (LF_ISSET(DB_OSO_SEQ)) + attr |= FILE_FLAG_SEQUENTIAL_SCAN; + else + attr |= FILE_FLAG_RANDOM_ACCESS; + + if (LF_ISSET(DB_OSO_TEMP)) + attr |= FILE_FLAG_DELETE_ON_CLOSE; + + /* + * We can turn filesystem buffering off if the page size is a + * multiple of the disk's sector size. To find the sector size, + * we call GetDiskFreeSpace, which expects a drive name like "d:\\" + * or NULL for the current disk (i.e., a relative path) + */ + if (LF_ISSET(DB_OSO_DIRECT) && page_size != 0 && name[0] != '\0') { + if (name[1] == ':') { + drive = dbuf; + _sntprintf(dbuf, sizeof(dbuf), _T("%c:\\"), tname[0]); + } else + drive = NULL; + + /* + * We ignore all results except sectorsize, but some versions + * of Windows require that the parameters are non-NULL. + */ + if (GetDiskFreeSpace(drive, &cluster_size, + §or_size, &free_clusters, &total_clusters) && + page_size % sector_size == 0) + attr |= FILE_FLAG_NO_BUFFERING; + } + + fhp->handle = fhp->trunc_handle = INVALID_HANDLE_VALUE; + for (nrepeat = 1;; ++nrepeat) { + if (fhp->handle == INVALID_HANDLE_VALUE) + fhp->handle = CreateFile( + tname, access, share, NULL, createflag, attr, 0); + + /* + * Windows does not provide truncate directly. There is no + * safe way to use a handle for truncate concurrently with + * reads or writes. To deal with this, we open a second handle + * used just for truncating. + */ + if (fhp->handle != INVALID_HANDLE_VALUE && + !LF_ISSET(DB_OSO_RDONLY | DB_OSO_TEMP) && + fhp->trunc_handle == INVALID_HANDLE_VALUE) + fhp->trunc_handle = CreateFile( + tname, access, share, NULL, OPEN_EXISTING, attr, 0); + + if (fhp->handle == INVALID_HANDLE_VALUE || + (!LF_ISSET(DB_OSO_RDONLY | DB_OSO_TEMP) && + fhp->trunc_handle == INVALID_HANDLE_VALUE)) { + /* + * If it's a "temporary" error, we retry up to 3 times, + * waiting up to 12 seconds. While it's not a problem + * if we can't open a database, an inability to open a + * log file is cause for serious dismay. + */ + ret = __os_posix_err(__os_get_syserr()); + if ((ret != ENFILE && ret != EMFILE && ret != ENOSPC) || + nrepeat > 3) + goto err; + + __os_sleep(dbenv, nrepeat * 2, 0); + } else + break; + } + + FREE_STRING(dbenv, tname); + + F_SET(fhp, DB_FH_OPENED); + *fhpp = fhp; + return (0); + +err: FREE_STRING(dbenv, tname); + if (fhp != NULL) + (void)__os_closehandle(dbenv, fhp); + return (ret); +} diff --git a/db/os_windows/os_rename.c b/db/os_windows/os_rename.c new file mode 100644 index 000000000..a7bdfac2b --- /dev/null +++ b/db/os_windows/os_rename.c @@ -0,0 +1,70 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_rename.c,v 12.6 2006/08/24 14:46:22 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_rename -- + * Rename a file. + */ +int +__os_rename(dbenv, oldname, newname, silent) + DB_ENV *dbenv; + const char *oldname, *newname; + u_int32_t silent; +{ + _TCHAR *toldname, *tnewname; + int ret; + + TO_TSTRING(dbenv, oldname, toldname, ret); + if (ret != 0) + return (ret); + TO_TSTRING(dbenv, newname, tnewname, ret); + if (ret != 0) { + FREE_STRING(dbenv, toldname); + return (ret); + } + + if (!MoveFile(toldname, tnewname)) + ret = __os_get_syserr(); + + if (__os_posix_err(ret) == EEXIST) { + ret = 0; + if (__os_is_winnt()) { + if (!MoveFileEx( + toldname, tnewname, MOVEFILE_REPLACE_EXISTING)) + ret = __os_get_syserr(); + } else { + /* + * There is no MoveFileEx for Win9x/Me, so we have to + * do the best we can. Note that the MoveFile call + * above would have succeeded if oldname and newname + * refer to the same file, so we don't need to check + * that here. + */ + (void)DeleteFile(tnewname); + if (!MoveFile(toldname, tnewname)) + ret = __os_get_syserr(); + } + } + + FREE_STRING(dbenv, tnewname); + FREE_STRING(dbenv, toldname); + + if (ret != 0) { + if (silent == 0) + __db_syserr( + dbenv, ret, "MoveFileEx %s %s", oldname, newname); + ret = __os_posix_err(ret); + } + + return (ret); +} diff --git a/db/os_windows/os_rw.c b/db/os_windows/os_rw.c new file mode 100644 index 000000000..2d98a0f28 --- /dev/null +++ b/db/os_windows/os_rw.c @@ -0,0 +1,186 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_rw.c,v 12.15 2006/08/24 14:46:22 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_io -- + * Do an I/O. + */ +int +__os_io(dbenv, op, fhp, pgno, pgsize, relative, io_len, buf, niop) + DB_ENV *dbenv; + int op; + DB_FH *fhp; + db_pgno_t pgno; + u_int32_t pgsize, relative, io_len; + u_int8_t *buf; + size_t *niop; +{ + int ret; + + if (__os_is_winnt()) { + ULONG64 off; + OVERLAPPED over; + DWORD nbytes; + if ((off = relative) == 0) + off = (ULONG64)pgsize * pgno; + over.Offset = (DWORD)(off & 0xffffffff); + over.OffsetHigh = (DWORD)(off >> 32); + over.hEvent = 0; /* we don't want asynchronous notifications */ + + switch (op) { + case DB_IO_READ: + if (!ReadFile(fhp->handle, + buf, (DWORD)io_len, &nbytes, &over)) + goto slow; + break; + case DB_IO_WRITE: +#ifdef HAVE_FILESYSTEM_NOTZERO + if (__os_fs_notzero()) + goto slow; +#endif + if (!WriteFile(fhp->handle, + buf, (DWORD)io_len, &nbytes, &over)) + goto slow; + break; + } + if (nbytes == io_len) { + *niop = (size_t)nbytes; + return (0); + } + } + +slow: MUTEX_LOCK(dbenv, fhp->mtx_fh); + + if ((ret = __os_seek(dbenv, fhp, pgno, pgsize, relative)) != 0) + goto err; + + switch (op) { + case DB_IO_READ: + ret = __os_read(dbenv, fhp, buf, io_len, niop); + break; + case DB_IO_WRITE: + ret = __os_write(dbenv, fhp, buf, io_len, niop); + break; + } + +err: MUTEX_UNLOCK(dbenv, fhp->mtx_fh); + + return (ret); +} + +/* + * __os_read -- + * Read from a file handle. + */ +int +__os_read(dbenv, fhp, addr, len, nrp) + DB_ENV *dbenv; + DB_FH *fhp; + void *addr; + size_t len; + size_t *nrp; +{ + size_t offset, nr; + DWORD count; + int ret; + u_int8_t *taddr; + + ret = 0; + for (taddr = addr, + offset = 0; offset < len; taddr += nr, offset += nr) { + RETRY_CHK((!ReadFile(fhp->handle, + taddr, (DWORD)(len - offset), &count, NULL)), ret); + if (count == 0 || ret != 0) + break; + nr = (size_t)count; + } + *nrp = taddr - (u_int8_t *)addr; + if (ret != 0) { + __db_syserr(dbenv, ret, "read: 0x%lx, %lu", + P_TO_ULONG(taddr), (u_long)len - offset); + ret = __os_posix_err(ret); + } + return (ret); +} + +/* + * __os_write -- + * Write to a file handle. + */ +int +__os_write(dbenv, fhp, addr, len, nwp) + DB_ENV *dbenv; + DB_FH *fhp; + void *addr; + size_t len; + size_t *nwp; +{ + int ret; + +#ifdef HAVE_FILESYSTEM_NOTZERO + /* Zero-fill as necessary. */ + if (__os_fs_notzero() && (ret = __os_zerofill(dbenv, fhp)) != 0) + return (ret); +#endif + return (__os_physwrite(dbenv, fhp, addr, len, nwp)); +} + +/* + * __os_physwrite -- + * Physical write to a file handle. + */ +int +__os_physwrite(dbenv, fhp, addr, len, nwp) + DB_ENV *dbenv; + DB_FH *fhp; + void *addr; + size_t len; + size_t *nwp; +{ + size_t offset, nw; + DWORD count; + int ret; + u_int8_t *taddr; + + /* + * Make a last "panic" check. Imagine a thread of control running in + * Berkeley DB, going to sleep. Another thread of control decides to + * run recovery because the environment is broken. The first thing + * recovery does is panic the existing environment, but we only check + * the panic flag when crossing the public API. If the sleeping thread + * wakes up and writes something, we could have two threads of control + * writing the log files at the same time. So, before writing, make a + * last panic check. Obviously, there's still a window, but it's very, + * very small. + */ + PANIC_CHECK(dbenv); + + ret = 0; + for (taddr = addr, + offset = 0; offset < len; taddr += nw, offset += nw) { + RETRY_CHK((!WriteFile(fhp->handle, + taddr, (DWORD)(len - offset), &count, NULL)), ret); + if (ret != 0) + break; + nw = (size_t)count; + } + *nwp = len; + if (ret != 0) { + __db_syserr(dbenv, ret, "write: %#lx, %lu", + P_TO_ULONG(taddr), (u_long)len - offset); + ret = __os_posix_err(ret); + + DB_EVENT(dbenv, DB_EVENT_WRITE_FAILED, NULL); + } + return (ret); +} diff --git a/db/os_windows/os_seek.c b/db/os_windows/os_seek.c new file mode 100644 index 000000000..1a2131879 --- /dev/null +++ b/db/os_windows/os_seek.c @@ -0,0 +1,55 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_seek.c,v 12.8 2006/08/24 14:46:22 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_seek -- + * Seek to a page/byte offset in the file. + */ +int +__os_seek(dbenv, fhp, pgno, pgsize, relative) + DB_ENV *dbenv; + DB_FH *fhp; + db_pgno_t pgno; + u_int32_t pgsize; + u_int32_t relative; +{ + /* Yes, this really is how Microsoft designed their API. */ + union { + __int64 bigint; + struct { + unsigned long low; + long high; + }; + } offbytes; + off_t offset; + int ret; + + offset = (off_t)pgsize * pgno + relative; + + offbytes.bigint = offset; + ret = (SetFilePointer(fhp->handle, offbytes.low, + &offbytes.high, FILE_BEGIN) == (DWORD)-1) ? __os_get_syserr() : 0; + + if (ret == 0) { + fhp->pgsize = pgsize; + fhp->pgno = pgno; + fhp->offset = relative; + } else { + __db_syserr(dbenv, ret, + "seek: %lu: (%lu * %lu) + %lu", (u_long)offset, + (u_long)pgno, (u_long)pgsize, (u_long)relative); + ret = __os_posix_err(ret); + } + + return (ret); +} diff --git a/db/os_windows/os_sleep.c b/db/os_windows/os_sleep.c new file mode 100644 index 000000000..f3709fb1a --- /dev/null +++ b/db/os_windows/os_sleep.c @@ -0,0 +1,34 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_sleep.c,v 12.4 2006/08/24 14:46:22 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_sleep -- + * Yield the processor for a period of time. + */ +void +__os_sleep(dbenv, secs, usecs) + DB_ENV *dbenv; + u_long secs, usecs; /* Seconds and microseconds. */ +{ + COMPQUIET(dbenv, NULL); + + /* Don't require that the values be normalized. */ + for (; usecs >= 1000000; ++secs, usecs -= 1000000) + ; + + /* + * It's important that we yield the processor here so that other + * processes or threads are permitted to run. + */ + Sleep(secs * 1000 + usecs / 1000); +} diff --git a/db/os_windows/os_spin.c b/db/os_windows/os_spin.c new file mode 100644 index 000000000..5d5a23a28 --- /dev/null +++ b/db/os_windows/os_spin.c @@ -0,0 +1,38 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_spin.c,v 12.6 2006/08/24 14:46:22 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_spin -- + * Return the number of default spins before blocking. + */ +u_int32_t +__os_spin(dbenv) + DB_ENV *dbenv; +{ + SYSTEM_INFO SystemInfo; + u_int32_t tas_spins; + + /* Get the number of processors */ + GetSystemInfo(&SystemInfo); + + /* + * Spin 50 times per processor -- we have anecdotal evidence that this + * is a reasonable value. + */ + if (SystemInfo.dwNumberOfProcessors > 1) + tas_spins = 50 * SystemInfo.dwNumberOfProcessors; + else + tas_spins = 1; + + return (tas_spins); +} diff --git a/db/os_windows/os_stat.c b/db/os_windows/os_stat.c new file mode 100644 index 000000000..cd018b83f --- /dev/null +++ b/db/os_windows/os_stat.c @@ -0,0 +1,84 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_stat.c,v 12.9 2006/08/24 14:46:22 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_exists -- + * Return if the file exists. + */ +int +__os_exists(dbenv, path, isdirp) + DB_ENV *dbenv; + const char *path; + int *isdirp; +{ + int ret; + DWORD attrs; + _TCHAR *tpath; + + TO_TSTRING(dbenv, path, tpath, ret); + if (ret != 0) + return (ret); + + RETRY_CHK( + ((attrs = GetFileAttributes(tpath)) == (DWORD)-1 ? 1 : 0), ret); + if (ret == 0) { + if (isdirp != NULL) + *isdirp = (attrs & FILE_ATTRIBUTE_DIRECTORY); + } else + ret = __os_posix_err(ret); + + FREE_STRING(dbenv, tpath); + return (ret); +} + +/* + * __os_ioinfo -- + * Return file size and I/O size; abstracted to make it easier + * to replace. + */ +int +__os_ioinfo(dbenv, path, fhp, mbytesp, bytesp, iosizep) + DB_ENV *dbenv; + const char *path; + DB_FH *fhp; + u_int32_t *mbytesp, *bytesp, *iosizep; +{ + int ret; + BY_HANDLE_FILE_INFORMATION bhfi; + unsigned __int64 filesize; + + RETRY_CHK((!GetFileInformationByHandle(fhp->handle, &bhfi)), ret); + if (ret != 0) { + __db_syserr(dbenv, ret, "GetFileInformationByHandle"); + return (__os_posix_err(ret)); + } + + filesize = ((unsigned __int64)bhfi.nFileSizeHigh << 32) + + bhfi.nFileSizeLow; + + /* Return the size of the file. */ + if (mbytesp != NULL) + *mbytesp = (u_int32_t)(filesize / MEGABYTE); + if (bytesp != NULL) + *bytesp = (u_int32_t)(filesize % MEGABYTE); + + /* + * The filesystem I/O size is not easily available. In particular, + * the values returned by GetDiskFreeSpace() are not very helpful + * (NTFS volumes often report 512B clusters, which are too small to + * be a useful default). + */ + if (iosizep != NULL) + *iosizep = DB_DEF_IOSIZE; + return (0); +} diff --git a/db/os_windows/os_truncate.c b/db/os_windows/os_truncate.c new file mode 100644 index 000000000..936d080ef --- /dev/null +++ b/db/os_windows/os_truncate.c @@ -0,0 +1,90 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2004-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_truncate.c,v 12.12 2006/09/05 15:30:18 mjc Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_truncate -- + * Truncate the file. + */ +int +__os_truncate(dbenv, fhp, pgno, pgsize) + DB_ENV *dbenv; + DB_FH *fhp; + db_pgno_t pgno; + u_int32_t pgsize; +{ + /* Yes, this really is how Microsoft have designed their API */ + union { + __int64 bigint; + struct { + unsigned long low; + long high; + }; + } off; + off_t offset; + int ret; + + ret = 0; + offset = (off_t)pgsize * pgno; + +#ifdef HAVE_FILESYSTEM_NOTZERO + /* + * If the filesystem doesn't zero fill, it isn't safe to extend the + * file, or we end up with junk blocks. Just return in that case. + */ + if (__os_fs_notzero()) { + off_t stat_offset; + u_int32_t mbytes, bytes; + + /* Stat the file. */ + if ((ret = + __os_ioinfo(dbenv, NULL, fhp, &mbytes, &bytes, NULL)) != 0) + return (ret); + stat_offset = (off_t)mbytes * MEGABYTE + bytes; + + if (offset > stat_offset) + return (0); + } +#endif + + /* + * Windows doesn't provide truncate directly. Instead, it has + * SetEndOfFile, which truncates to the current position. To + * deal with that, we open a duplicate file handle for truncating. + * + * We want to retry the truncate call, which involves a SetFilePointer + * and a SetEndOfFile, but there are several complications: + * + * 1) since the Windows API deals in 32-bit values, it's possible that + * the return from SetFilePointer (the low 32-bits) is + * INVALID_SET_FILE_POINTER even when the call has succeeded. So we + * have to also check whether GetLastError() returns NO_ERROR. + * + * 2) when it returns, SetFilePointer overwrites the high bits of the + * offset, so if we need to retry, we have to reset the offset each + * time. + * + * We can't switch to SetFilePointerEx, which knows about 64-bit + * offsets, because it isn't supported on Win9x/ME. + */ + RETRY_CHK((off.bigint = (__int64)pgsize * pgno, + (SetFilePointer(fhp->trunc_handle, off.low, &off.high, FILE_BEGIN) + == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) || + !SetEndOfFile(fhp->trunc_handle)), ret); + + if (ret != 0) { + __db_syserr(dbenv, ret, "SetFilePointer: %lu", pgno * pgsize); + ret = __os_posix_err(ret); + } + + return (ret); +} diff --git a/db/os_windows/os_unlink.c b/db/os_windows/os_unlink.c new file mode 100644 index 000000000..d6a7359c2 --- /dev/null +++ b/db/os_windows/os_unlink.c @@ -0,0 +1,109 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_unlink.c,v 12.15 2006/08/24 14:46:22 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_region_unlink -- + * Remove a shared memory object file. + */ +int +__os_region_unlink(dbenv, path) + DB_ENV *dbenv; + const char *path; +{ + if (F_ISSET(dbenv, DB_ENV_OVERWRITE)) + (void)__db_file_multi_write(dbenv, path); + + return (__os_unlink(dbenv, path)); +} + +/* + * __os_unlink -- + * Remove a file. + */ +int +__os_unlink(dbenv, path) + DB_ENV *dbenv; + const char *path; +{ + HANDLE h; + _TCHAR *tpath, *orig_tpath, buf[DB_MAXPATHLEN]; + u_int32_t id; + int ret, t_ret; + + TO_TSTRING(dbenv, path, tpath, ret); + if (ret != 0) + return (ret); + orig_tpath = tpath; + + /* + * Windows NT and its descendents allow removal of open files, but the + * DeleteFile Win32 system call isn't equivalent to a POSIX unlink. + * Firstly, it only succeeds if FILE_SHARE_DELETE is set when the file + * is opened. Secondly, it leaves the file in a "zombie" state, where + * it can't be opened again, but a new file with the same name can't be + * created either. + * + * Since we depend on being able to recreate files (during recovery, + * say), we have to first rename the file, and then delete it. It + * still hangs around, but with a name we don't care about. The rename + * will fail if the file doesn't exist, which isn't a problem, but if + * it fails for some other reason, we need to know about it or a + * subsequent open may fail for no apparent reason. + */ + if (__os_is_winnt()) { + __os_unique_id(dbenv, &id); + _sntprintf(buf, DB_MAXPATHLEN, _T("%s.del.%010u"), tpath, id); + if (MoveFile(tpath, buf)) + tpath = buf; + else { + ret = __os_get_syserr(); + if (__os_posix_err(ret) != ENOENT) + __db_err(dbenv, ret, + "MoveFile: rename %s to temporary file", + path); + } + + /* + * Try removing the file using the delete-on-close flag. This + * plays nicer with files that are still open than DeleteFile. + */ + h = CreateFile(tpath, 0, FILE_SHARE_READ, NULL, OPEN_EXISTING, + FILE_FLAG_DELETE_ON_CLOSE, 0); + if (h != INVALID_HANDLE_VALUE) { + (void)CloseHandle (h); + if (GetFileAttributes(tpath) == INVALID_FILE_ATTRIBUTES) + goto skipdel; + } + } + + RETRY_CHK((!DeleteFile(tpath)), ret); + +skipdel: + FREE_STRING(dbenv, orig_tpath); + + /* + * XXX + * We shouldn't be testing for an errno of ENOENT here, but ENOENT + * signals that a file is missing, and we attempt to unlink things + * (such as v. 2.x environment regions, in DB_ENV->remove) that we + * are expecting not to be there. Reporting errors in these cases + * is annoying. + */ + if (ret != 0) { + if ((t_ret = __os_posix_err(ret)) != ENOENT) + __db_syserr(dbenv, ret, "DeleteFile: %s", path); + ret = t_ret; + } + + return (ret); +} diff --git a/db/os_windows/os_yield.c b/db/os_windows/os_yield.c new file mode 100644 index 000000000..200633cc7 --- /dev/null +++ b/db/os_windows/os_yield.c @@ -0,0 +1,27 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1997-2006 + * Oracle Corporation. All rights reserved. + * + * $Id: os_yield.c,v 12.7 2006/08/24 14:46:22 bostic Exp $ + */ + +#include "db_config.h" + +#include "db_int.h" + +/* + * __os_yield -- + * Yield the processor. + */ +void +__os_yield(dbenv) + DB_ENV *dbenv; +{ + /* + * The call to Sleep(0) is specified by MSDN to yield the current + * thread's time slice to another thread of equal or greater priority. + */ + Sleep(0); +} |