summaryrefslogtreecommitdiff
path: root/db/os
diff options
context:
space:
mode:
Diffstat (limited to 'db/os')
-rw-r--r--db/os/os_abs.c31
-rw-r--r--db/os/os_alloc.c342
-rw-r--r--db/os/os_dir.c108
-rw-r--r--db/os/os_errno.c44
-rw-r--r--db/os/os_fid.c140
-rw-r--r--db/os/os_finit.c111
-rw-r--r--db/os/os_fsync.c90
-rw-r--r--db/os/os_handle.c165
-rw-r--r--db/os/os_map.c436
-rw-r--r--db/os/os_method.c206
-rw-r--r--db/os/os_oflags.c106
-rw-r--r--db/os/os_open.c226
-rw-r--r--db/os/os_region.c116
-rw-r--r--db/os/os_rename.c46
-rw-r--r--db/os/os_root.c36
-rw-r--r--db/os/os_rpath.c69
-rw-r--r--db/os/os_rw.c147
-rw-r--r--db/os/os_seek.c76
-rw-r--r--db/os/os_sleep.c77
-rw-r--r--db/os/os_spin.c109
-rw-r--r--db/os/os_stat.c108
-rw-r--r--db/os/os_tmpdir.c119
-rw-r--r--db/os/os_unlink.c106
23 files changed, 3014 insertions, 0 deletions
diff --git a/db/os/os_abs.c b/db/os/os_abs.c
new file mode 100644
index 000000000..04be98733
--- /dev/null
+++ b/db/os/os_abs.c
@@ -0,0 +1,31 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_abs.c,v 11.3 2000/02/14 03:00:04 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __os_abspath --
+ * Return if a path is an absolute path.
+ *
+ * PUBLIC: int __os_abspath __P((const char *));
+ */
+int
+__os_abspath(path)
+ const char *path;
+{
+ return (path[0] == '/');
+}
diff --git a/db/os/os_alloc.c b/db/os/os_alloc.c
new file mode 100644
index 000000000..ee4a0f3c9
--- /dev/null
+++ b/db/os/os_alloc.c
@@ -0,0 +1,342 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_alloc.c,v 11.18 2000/11/30 00:58:42 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+#ifdef DIAGNOSTIC
+static void __os_guard __P((void));
+#endif
+
+/*
+ * !!!
+ * Correct for systems that return NULL when you allocate 0 bytes of memory.
+ * There are several places in DB where we allocate the number of bytes held
+ * by the key/data item, and it can be 0. Correct here so that malloc never
+ * returns a NULL for that reason (which behavior is permitted by ANSI). We
+ * could make these calls macros on non-Alpha architectures (that's where we
+ * saw the problem), but it's probably not worth the autoconf complexity.
+ *
+ * !!!
+ * Correct for systems that don't set errno when malloc and friends fail.
+ *
+ * Out of memory.
+ * We wish to hold the whole sky,
+ * But we never will.
+ */
+
+/*
+ * __os_strdup --
+ * The strdup(3) function for DB.
+ *
+ * PUBLIC: int __os_strdup __P((DB_ENV *, const char *, void *));
+ */
+int
+__os_strdup(dbenv, str, storep)
+ DB_ENV *dbenv;
+ const char *str;
+ void *storep;
+{
+ size_t size;
+ int ret;
+ void *p;
+
+ *(void **)storep = NULL;
+
+ size = strlen(str) + 1;
+ if ((ret = __os_malloc(dbenv, size, NULL, &p)) != 0)
+ return (ret);
+
+ memcpy(p, str, size);
+
+ *(void **)storep = p;
+ return (0);
+}
+
+/*
+ * __os_calloc --
+ * The calloc(3) function for DB.
+ *
+ * PUBLIC: int __os_calloc __P((DB_ENV *, size_t, size_t, void *));
+ */
+int
+__os_calloc(dbenv, num, size, storep)
+ DB_ENV *dbenv;
+ size_t num, size;
+ void *storep;
+{
+ void *p;
+ int ret;
+
+ size *= num;
+ if ((ret = __os_malloc(dbenv, size, NULL, &p)) != 0)
+ return (ret);
+
+ memset(p, 0, size);
+
+ *(void **)storep = p;
+ return (0);
+}
+
+/*
+ * __os_malloc --
+ * The malloc(3) function for DB.
+ *
+ * PUBLIC: int __os_malloc __P((DB_ENV *, size_t, void *(*)(size_t), void *));
+ */
+int
+__os_malloc(dbenv, size, db_malloc, storep)
+ DB_ENV *dbenv;
+ size_t size;
+ void *(*db_malloc) __P((size_t)), *storep;
+{
+ int ret;
+ void *p;
+
+ *(void **)storep = NULL;
+
+ /* Never allocate 0 bytes -- some C libraries don't like it. */
+ if (size == 0)
+ ++size;
+#ifdef DIAGNOSTIC
+ else
+ ++size; /* Add room for a guard byte. */
+#endif
+
+ /* Some C libraries don't correctly set errno when malloc(3) fails. */
+ __os_set_errno(0);
+ if (db_malloc != NULL)
+ p = db_malloc(size);
+ else if (__db_jump.j_malloc != NULL)
+ p = __db_jump.j_malloc(size);
+ else
+ p = malloc(size);
+ if (p == NULL) {
+ ret = __os_get_errno();
+ if (ret == 0) {
+ __os_set_errno(ENOMEM);
+ ret = ENOMEM;
+ }
+ __db_err(dbenv,
+ "malloc: %s: %lu", strerror(ret), (u_long)size);
+ return (ret);
+ }
+
+#ifdef DIAGNOSTIC
+ /*
+ * Guard bytes: if #DIAGNOSTIC is defined, we allocate an additional
+ * byte after the memory and set it to a special value that we check
+ * for when the memory is free'd. This is fine for structures, but
+ * not quite so fine for strings. There are places in DB where memory
+ * is allocated sufficient to hold the largest possible string that
+ * we'll see, and then only some subset of the memory is used. To
+ * support this usage, the __os_freestr() function checks the byte
+ * after the string's nul, which may or may not be the last byte in
+ * the originally allocated memory.
+ */
+ memset(p, CLEAR_BYTE, size); /* Initialize guard byte. */
+#endif
+ *(void **)storep = p;
+
+ return (0);
+}
+
+/*
+ * __os_realloc --
+ * The realloc(3) function for DB.
+ *
+ * PUBLIC: int __os_realloc __P((DB_ENV *,
+ * PUBLIC: size_t, void *(*)(void *, size_t), void *));
+ */
+int
+__os_realloc(dbenv, size, db_realloc, storep)
+ DB_ENV *dbenv;
+ size_t size;
+ void *(*db_realloc) __P((void *, size_t)), *storep;
+{
+ int ret;
+ void *p, *ptr;
+
+ ptr = *(void **)storep;
+
+ /* If we haven't yet allocated anything yet, simply call malloc. */
+ if (ptr == NULL && db_realloc == NULL)
+ return (__os_malloc(dbenv, size, NULL, storep));
+
+ /* Never allocate 0 bytes -- some C libraries don't like it. */
+ if (size == 0)
+ ++size;
+#ifdef DIAGNOSTIC
+ else
+ ++size; /* Add room for a guard byte. */
+#endif
+
+ /*
+ * Some C libraries don't correctly set errno when realloc(3) fails.
+ *
+ * Don't overwrite the original pointer, there are places in DB we
+ * try to continue after realloc fails.
+ */
+ __os_set_errno(0);
+ if (db_realloc != NULL)
+ p = db_realloc(ptr, size);
+ else if (__db_jump.j_realloc != NULL)
+ p = __db_jump.j_realloc(ptr, size);
+ else
+ p = realloc(ptr, size);
+ if (p == NULL) {
+ if ((ret = __os_get_errno()) == 0) {
+ ret = ENOMEM;
+ __os_set_errno(ENOMEM);
+ }
+ __db_err(dbenv,
+ "realloc: %s: %lu", strerror(ret), (u_long)size);
+ return (ret);
+ }
+#ifdef DIAGNOSTIC
+ ((u_int8_t *)p)[size - 1] = CLEAR_BYTE; /* Initialize guard byte. */
+#endif
+
+ *(void **)storep = p;
+
+ return (0);
+}
+
+/*
+ * __os_free --
+ * The free(3) function for DB.
+ *
+ * PUBLIC: void __os_free __P((void *, size_t));
+ */
+void
+__os_free(ptr, size)
+ void *ptr;
+ size_t size;
+{
+#ifdef DIAGNOSTIC
+ if (size != 0) {
+ /*
+ * Check that the guard byte (one past the end of the memory) is
+ * still CLEAR_BYTE.
+ */
+ if (((u_int8_t *)ptr)[size] != CLEAR_BYTE)
+ __os_guard();
+
+ /* Clear memory. */
+ if (size != 0)
+ memset(ptr, CLEAR_BYTE, size);
+ }
+#else
+ COMPQUIET(size, 0);
+#endif
+
+ if (__db_jump.j_free != NULL)
+ __db_jump.j_free(ptr);
+ else
+ free(ptr);
+}
+
+/*
+ * __os_freestr --
+ * The free(3) function for DB, freeing a string.
+ *
+ * PUBLIC: void __os_freestr __P((void *));
+ */
+void
+__os_freestr(ptr)
+ void *ptr;
+{
+#ifdef DIAGNOSTIC
+ size_t size;
+
+ size = strlen(ptr) + 1;
+
+ /*
+ * Check that the guard byte (one past the end of the memory) is
+ * still CLEAR_BYTE.
+ */
+ if (((u_int8_t *)ptr)[size] != CLEAR_BYTE)
+ __os_guard();
+
+ /* Clear memory. */
+ memset(ptr, CLEAR_BYTE, size);
+#endif
+
+ if (__db_jump.j_free != NULL)
+ __db_jump.j_free(ptr);
+ else
+ free(ptr);
+}
+
+#ifdef DIAGNOSTIC
+/*
+ * __os_guard --
+ * Complain and abort.
+ */
+static void
+__os_guard()
+{
+ /*
+ * Eventually, once we push a DB_ENV handle down to these
+ * routines, we should use the standard output channels.
+ */
+ fprintf(stderr, "Guard byte incorrect during free.\n");
+ abort();
+ /* NOTREACHED */
+}
+#endif
+
+/*
+ * __ua_memcpy --
+ * Copy memory to memory without relying on any kind of alignment.
+ *
+ * There are places in DB that we have unaligned data, for example,
+ * when we've stored a structure in a log record as a DBT, and now
+ * we want to look at it. Unfortunately, if you have code like:
+ *
+ * struct a {
+ * int x;
+ * } *p;
+ *
+ * void *func_argument;
+ * int local;
+ *
+ * p = (struct a *)func_argument;
+ * memcpy(&local, p->x, sizeof(local));
+ *
+ * compilers optimize to use inline instructions requiring alignment,
+ * and records in the log don't have any particular alignment. (This
+ * isn't a compiler bug, because it's a structure they're allowed to
+ * assume alignment.)
+ *
+ * Casting the memcpy arguments to (u_int8_t *) appears to work most
+ * of the time, but we've seen examples where it wasn't sufficient
+ * and there's nothing in ANSI C that requires that work.
+ *
+ * PUBLIC: void *__ua_memcpy __P((void *, const void *, size_t));
+ */
+void *
+__ua_memcpy(dst, src, len)
+ void *dst;
+ const void *src;
+ size_t len;
+{
+ return ((void *)memcpy(dst, src, len));
+}
diff --git a/db/os/os_dir.c b/db/os/os_dir.c
new file mode 100644
index 000000000..50d00a556
--- /dev/null
+++ b/db/os/os_dir.c
@@ -0,0 +1,108 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_dir.c,v 11.8 2000/06/27 17:29:52 sue Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+# include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+# include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+# include <ndir.h>
+# endif
+#endif
+
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+/*
+ * __os_dirlist --
+ * Return a list of the files in a directory.
+ *
+ * PUBLIC: int __os_dirlist __P((DB_ENV *, const char *, char ***, int *));
+ */
+int
+__os_dirlist(dbenv, dir, namesp, cntp)
+ DB_ENV *dbenv;
+ const char *dir;
+ char ***namesp;
+ int *cntp;
+{
+ struct dirent *dp;
+ DIR *dirp;
+ int arraysz, cnt, ret;
+ char **names;
+
+ if (__db_jump.j_dirlist != NULL)
+ return (__db_jump.j_dirlist(dir, namesp, cntp));
+
+#ifdef HAVE_VXWORKS
+ if ((dirp = opendir((char *)dir)) == NULL)
+#else
+ if ((dirp = opendir(dir)) == NULL)
+#endif
+ return (__os_get_errno());
+ names = NULL;
+ for (arraysz = cnt = 0; (dp = readdir(dirp)) != NULL; ++cnt) {
+ if (cnt >= arraysz) {
+ arraysz += 100;
+ if ((ret = __os_realloc(dbenv,
+ arraysz * sizeof(names[0]), NULL, &names)) != 0)
+ goto nomem;
+ }
+ if ((ret = __os_strdup(dbenv, dp->d_name, &names[cnt])) != 0)
+ goto nomem;
+ }
+ (void)closedir(dirp);
+
+ *namesp = names;
+ *cntp = cnt;
+ return (0);
+
+nomem: if (names != NULL)
+ __os_dirfree(names, cnt);
+ if (dirp != NULL)
+ (void)closedir(dirp);
+ return (ret);
+}
+
+/*
+ * __os_dirfree --
+ * Free the list of files.
+ *
+ * PUBLIC: void __os_dirfree __P((char **, int));
+ */
+void
+__os_dirfree(names, cnt)
+ char **names;
+ int cnt;
+{
+ if (__db_jump.j_dirfree != NULL)
+ __db_jump.j_dirfree(names, cnt);
+ else {
+ while (cnt > 0)
+ __os_free(names[--cnt], 0);
+ __os_free(names, 0);
+ }
+}
diff --git a/db/os/os_errno.c b/db/os/os_errno.c
new file mode 100644
index 000000000..f9b60f635
--- /dev/null
+++ b/db/os/os_errno.c
@@ -0,0 +1,44 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_errno.c,v 11.3 2000/02/14 03:00:05 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <errno.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __os_get_errno --
+ * Return the value of errno.
+ *
+ * PUBLIC: int __os_get_errno __P((void));
+ */
+int
+__os_get_errno()
+{
+ /* This routine must be able to return the same value repeatedly. */
+ return (errno);
+}
+
+/*
+ * __os_set_errno --
+ * Set the value of errno.
+ *
+ * PUBLIC: void __os_set_errno __P((int));
+ */
+void
+__os_set_errno(evalue)
+ int evalue;
+{
+ errno = evalue;
+}
diff --git a/db/os/os_fid.c b/db/os/os_fid.c
new file mode 100644
index 000000000..f853f6a8d
--- /dev/null
+++ b/db/os/os_fid.c
@@ -0,0 +1,140 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_fid.c,v 11.7 2000/10/26 14:17:05 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if TIME_WITH_SYS_TIME
+#include <sys/time.h>
+#include <time.h>
+#else
+#if HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <time.h>
+#endif
+#endif
+
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+
+#define SERIAL_INIT 0
+static u_int32_t fid_serial = SERIAL_INIT;
+
+/*
+ * __os_fileid --
+ * Return a unique identifier for a file.
+ *
+ * PUBLIC: int __os_fileid __P((DB_ENV *, const char *, int, u_int8_t *));
+ */
+int
+__os_fileid(dbenv, fname, unique_okay, fidp)
+ DB_ENV *dbenv;
+ const char *fname;
+ int unique_okay;
+ u_int8_t *fidp;
+{
+ struct stat sb;
+ size_t i;
+ int ret;
+ u_int32_t tmp;
+ u_int8_t *p;
+
+ /* Clear the buffer. */
+ memset(fidp, 0, DB_FILE_ID_LEN);
+
+ /* On POSIX/UNIX, use a dev/inode pair. */
+#ifdef HAVE_VXWORKS
+ if (stat((char *)fname, &sb)) {
+#else
+ if (stat(fname, &sb)) {
+#endif
+ ret = __os_get_errno();
+ __db_err(dbenv, "%s: %s", fname, strerror(ret));
+ return (ret);
+ }
+
+ /*
+ * Initialize/increment the serial number we use to help avoid
+ * fileid collisions. Note that we don't bother with locking;
+ * it's unpleasant to do from down in here, and if we race on
+ * this no real harm will be done, since the finished fileid
+ * has so many other components.
+ *
+ * We increment by 100000 on each call as a simple way of
+ * randomizing; simply incrementing seems potentially less useful
+ * if pids are also simply incremented, since this is process-local
+ * and we may be one of a set of processes starting up. 100000
+ * pushes us out of pid space on most platforms, and has few
+ * interesting properties in base 2.
+ */
+ if (fid_serial == SERIAL_INIT)
+ fid_serial = (u_int32_t)getpid();
+ else
+ fid_serial += 100000;
+
+ /*
+ * !!!
+ * Nothing is ever big enough -- on Sparc V9, st_ino, st_dev and the
+ * time_t types are all 8 bytes. As DB_FILE_ID_LEN is only 20 bytes,
+ * we convert to a (potentially) smaller fixed-size type and use it.
+ *
+ * We don't worry about byte sexing or the actual variable sizes.
+ *
+ * When this routine is called from the DB access methods, it's only
+ * called once -- whatever ID is generated when a database is created
+ * is stored in the database file's metadata, and that is what is
+ * saved in the mpool region's information to uniquely identify the
+ * file.
+ *
+ * When called from the mpool layer this routine will be called each
+ * time a new thread of control wants to share the file, which makes
+ * things tougher. As far as byte sexing goes, since the mpool region
+ * lives on a single host, there's no issue of that -- the entire
+ * region is byte sex dependent. As far as variable sizes go, we make
+ * the simplifying assumption that 32-bit and 64-bit processes will
+ * get the same 32-bit values if we truncate any returned 64-bit value
+ * to a 32-bit value. When we're called from the mpool layer, though,
+ * we need to be careful not to include anything that isn't
+ * reproducible for a given file, such as the timestamp or serial
+ * number.
+ */
+ tmp = (u_int32_t)sb.st_ino;
+ for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i)
+ *fidp++ = *p++;
+
+ tmp = (u_int32_t)sb.st_dev;
+ for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i)
+ *fidp++ = *p++;
+
+ if (unique_okay) {
+ /*
+ * We want the number of seconds, not the high-order 0 bits,
+ * so convert the returned time_t to a (potentially) smaller
+ * fixed-size type.
+ */
+ tmp = (u_int32_t)time(NULL);
+ for (p = (u_int8_t *)&tmp, i = sizeof(u_int32_t); i > 0; --i)
+ *fidp++ = *p++;
+
+ for (p = (u_int8_t *)&fid_serial, i = sizeof(u_int32_t);
+ i > 0; --i)
+ *fidp++ = *p++;
+ }
+
+ return (0);
+}
diff --git a/db/os/os_finit.c b/db/os/os_finit.c
new file mode 100644
index 000000000..23b606ecb
--- /dev/null
+++ b/db/os/os_finit.c
@@ -0,0 +1,111 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_finit.c,v 11.8 2000/11/30 00:58:42 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __os_finit --
+ * Initialize a regular file, optionally zero-filling it as well.
+ *
+ * PUBLIC: int __os_finit __P((DB_ENV *, DB_FH *, size_t, int));
+ */
+int
+__os_finit(dbenv, fhp, size, zerofill)
+ DB_ENV *dbenv;
+ DB_FH *fhp;
+ size_t size;
+ int zerofill;
+{
+ db_pgno_t pages;
+ size_t i;
+ size_t nw;
+ u_int32_t relative;
+ int ret;
+ char buf[OS_VMPAGESIZE];
+
+ /* Write nuls to the new bytes. */
+ memset(buf, 0, sizeof(buf));
+
+ /*
+ * Extend the region by writing the last page. If the region is >4Gb,
+ * increment may be larger than the maximum possible seek "relative"
+ * argument, as it's an unsigned 32-bit value. Break the offset into
+ * pages of 1MB each so that we don't overflow (2^20 + 2^32 is bigger
+ * than any memory I expect to see for awhile).
+ */
+ if ((ret = __os_seek(dbenv, fhp, 0, 0, 0, 0, DB_OS_SEEK_END)) != 0)
+ return (ret);
+ pages = (size - OS_VMPAGESIZE) / MEGABYTE;
+ relative = (size - OS_VMPAGESIZE) % MEGABYTE;
+ if ((ret = __os_seek(dbenv,
+ fhp, MEGABYTE, pages, relative, 0, DB_OS_SEEK_CUR)) != 0)
+ return (ret);
+ if ((ret = __os_write(dbenv, fhp, buf, sizeof(buf), &nw)) != 0)
+ return (ret);
+ if (nw != sizeof(buf))
+ return (EIO);
+
+ /*
+ * We may want to guarantee that there is enough disk space for the
+ * file, so we also write a byte to each page. We write the byte
+ * because reading it is insufficient on systems smart enough not to
+ * instantiate disk pages to satisfy a read (e.g., Solaris).
+ */
+ if (zerofill) {
+ pages = size / MEGABYTE;
+ relative = size % MEGABYTE;
+ if ((ret = __os_seek(dbenv, fhp,
+ MEGABYTE, pages, relative, 1, DB_OS_SEEK_END)) != 0)
+ return (ret);
+
+ /* Write a byte to each page. */
+ for (i = 0; i < size; i += OS_VMPAGESIZE) {
+ if ((ret = __os_write(dbenv, fhp, buf, 1, &nw)) != 0)
+ return (ret);
+ if (nw != 1)
+ return (EIO);
+ if ((ret = __os_seek(dbenv, fhp,
+ 0, 0, OS_VMPAGESIZE - 1, 0, DB_OS_SEEK_CUR)) != 0)
+ return (ret);
+ }
+ }
+ return (0);
+}
+
+/*
+ * __os_fpinit --
+ * Initialize a page in a regular file.
+ *
+ * PUBLIC: int __os_fpinit __P((DB_ENV *, DB_FH *, db_pgno_t, int, int));
+ */
+int
+__os_fpinit(dbenv, fhp, pgno, pagecount, pagesize)
+ DB_ENV *dbenv;
+ DB_FH *fhp;
+ db_pgno_t pgno;
+ int pagecount, pagesize;
+{
+ COMPQUIET(dbenv, NULL);
+ COMPQUIET(fhp, NULL);
+ COMPQUIET(pgno, 0);
+ COMPQUIET(pagecount, 0);
+ COMPQUIET(pagesize, 0);
+
+ return (0);
+}
diff --git a/db/os/os_fsync.c b/db/os/os_fsync.c
new file mode 100644
index 000000000..f5fd5f56a
--- /dev/null
+++ b/db/os/os_fsync.c
@@ -0,0 +1,90 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_fsync.c,v 11.9 2000/04/04 23:29:20 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <fcntl.h> /* XXX: Required by __hp3000s900 */
+#include <unistd.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+#ifdef HAVE_VXWORKS
+#include "ioLib.h"
+
+#define fsync(fd) __vx_fsync(fd);
+
+int
+__vx_fsync(fd)
+ int fd;
+{
+ int ret;
+
+ /*
+ * The results of ioctl are driver dependent. Some will return the
+ * number of bytes sync'ed. Only if it returns 'ERROR' should we
+ * flag it.
+ */
+ if ((ret = ioctl(fd, FIOSYNC, 0)) != ERROR)
+ return (0);
+ return (ret);
+}
+#endif
+
+#ifdef __hp3000s900
+#define fsync(fd) __mpe_fsync(fd);
+
+int
+__mpe_fsync(fd)
+ int fd;
+{
+ extern FCONTROL(short, short, void *);
+
+ FCONTROL(_MPE_FILENO(fd), 2, NULL); /* Flush the buffers */
+ FCONTROL(_MPE_FILENO(fd), 6, NULL); /* Write the EOF */
+ return (0);
+}
+#endif
+
+/*
+ * __os_fsync --
+ * Flush a file descriptor.
+ *
+ * PUBLIC: int __os_fsync __P((DB_ENV *, DB_FH *));
+ */
+int
+__os_fsync(dbenv, fhp)
+ DB_ENV *dbenv;
+ DB_FH *fhp;
+{
+ int ret;
+
+ /*
+ * Do nothing if the file descriptor has been marked as not requiring
+ * any sync to disk.
+ */
+ if (F_ISSET(fhp, DB_FH_NOSYNC))
+ return (0);
+
+ ret = __db_jump.j_fsync != NULL ?
+ __db_jump.j_fsync(fhp->fd) : fsync(fhp->fd);
+
+ if (ret != 0) {
+ ret = __os_get_errno();
+ __db_err(dbenv, "fsync %s", strerror(ret));
+ }
+ return (ret);
+}
diff --git a/db/os/os_handle.c b/db/os/os_handle.c
new file mode 100644
index 000000000..b127c5ff5
--- /dev/null
+++ b/db/os/os_handle.c
@@ -0,0 +1,165 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_handle.c,v 11.19 2000/11/30 00:58:42 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+/*
+ * __os_openhandle --
+ * Open a file, using POSIX 1003.1 open flags.
+ *
+ * PUBLIC: int __os_openhandle __P((DB_ENV *, const char *, int, int, DB_FH *));
+ */
+int
+__os_openhandle(dbenv, name, flags, mode, fhp)
+ DB_ENV *dbenv;
+ const char *name;
+ int flags, mode;
+ DB_FH *fhp;
+{
+ int ret, nrepeat;
+#ifdef HAVE_VXWORKS
+ int newflags;
+#endif
+
+ memset(fhp, 0, sizeof(*fhp));
+
+ /* If the application specified an interface, use it. */
+ if (__db_jump.j_open != NULL) {
+ if ((fhp->fd = __db_jump.j_open(name, flags, mode)) == -1)
+ return (__os_get_errno());
+ F_SET(fhp, DB_FH_VALID);
+ return (0);
+ }
+
+ for (ret = 0, nrepeat = 1; nrepeat < 4; ++nrepeat) {
+#ifdef HAVE_VXWORKS
+ /*
+ * VxWorks does not support O_CREAT on open, you have to use
+ * creat() instead. (It does not support O_EXCL or O_TRUNC
+ * either, even though they are defined "for future support".)
+ * If O_EXCL is specified, single thread and try to open the
+ * file. If successful, return EEXIST. Otherwise, call creat
+ * and then end single threading.
+ */
+ if (LF_ISSET(O_CREAT)) {
+ DB_BEGIN_SINGLE_THREAD;
+ newflags = flags & ~(O_CREAT | O_EXCL);
+ if (LF_ISSET(O_EXCL)) {
+ if ((fhp->fd =
+ open(name, newflags, mode)) != -1) {
+ /*
+ * If we get here, we want O_EXCL
+ * create, and it exists. Close and
+ * return EEXISTS.
+ */
+ (void)close(fhp->fd);
+ DB_END_SINGLE_THREAD;
+ return (EEXIST);
+ }
+ /*
+ * XXX
+ * Assume any error means non-existence.
+ * Unfortunately return values (even for
+ * non-existence) are driver specific so
+ * there is no single error we can use to
+ * verify we truly got the equivalent of
+ * ENOENT.
+ */
+ }
+ fhp->fd = creat(name, newflags);
+ DB_END_SINGLE_THREAD;
+ } else
+
+ /* FALLTHROUGH */
+#endif
+#ifdef __VMS
+ /*
+ * !!!
+ * Open with full sharing on VMS.
+ *
+ * We use these flags because they are the ones set by the VMS
+ * CRTL mmap() call when it opens a file, and we have to be
+ * able to open files that mmap() has previously opened, e.g.,
+ * when we're joining already existing DB regions.
+ */
+ fhp->fd = open(name, flags, mode, "shr=get,put,upd,del,upi");
+#else
+ fhp->fd = open(name, flags, mode);
+#endif
+
+ if (fhp->fd == -1) {
+ /*
+ * If it's a "temporary" error, we retry up to 3 times,
+ * waiting up to 12 seconds. While it's not a problem
+ * if we can't open a database, an inability to open a
+ * log file is cause for serious dismay.
+ */
+ ret = __os_get_errno();
+ if (ret == ENFILE || ret == EMFILE || ret == ENOSPC) {
+ (void)__os_sleep(dbenv, nrepeat * 2, 0);
+ continue;
+ }
+ } else {
+#if defined(HAVE_FCNTL_F_SETFD)
+ /* Deny file descriptor access to any child process. */
+ if (fcntl(fhp->fd, F_SETFD, 1) == -1) {
+ ret = __os_get_errno();
+ __db_err(dbenv, "fcntl(F_SETFD): %s",
+ strerror(ret));
+ (void)__os_closehandle(fhp);
+ } else
+#endif
+ F_SET(fhp, DB_FH_VALID);
+ }
+ break;
+ }
+
+ return (ret);
+}
+
+/*
+ * __os_closehandle --
+ * Close a file.
+ *
+ * PUBLIC: int __os_closehandle __P((DB_FH *));
+ */
+int
+__os_closehandle(fhp)
+ DB_FH *fhp;
+{
+ int ret;
+
+ /* Don't close file descriptors that were never opened. */
+ DB_ASSERT(F_ISSET(fhp, DB_FH_VALID) && fhp->fd != -1);
+
+ ret = __db_jump.j_close != NULL ?
+ __db_jump.j_close(fhp->fd) : close(fhp->fd);
+
+ /*
+ * Smash the POSIX file descriptor -- it's never tested, but we want
+ * to catch any mistakes.
+ */
+ fhp->fd = -1;
+ F_CLR(fhp, DB_FH_VALID);
+
+ return (ret == 0 ? 0 : __os_get_errno());
+}
diff --git a/db/os/os_map.c b/db/os/os_map.c
new file mode 100644
index 000000000..bb96a917d
--- /dev/null
+++ b/db/os/os_map.c
@@ -0,0 +1,436 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_map.c,v 11.32 2000/11/30 00:58:42 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#ifdef HAVE_MMAP
+#include <sys/mman.h>
+#endif
+
+#ifdef HAVE_SHMGET
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#endif
+
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_ext.h"
+#include "os_jump.h"
+
+#ifdef HAVE_MMAP
+static int __os_map __P((DB_ENV *, char *, DB_FH *, size_t, int, int, void **));
+#endif
+#ifndef HAVE_SHMGET
+static int __db_nosystemmem __P((DB_ENV *));
+#endif
+
+/*
+ * __os_r_sysattach --
+ * Create/join a shared memory region.
+ *
+ * PUBLIC: int __os_r_sysattach __P((DB_ENV *, REGINFO *, REGION *));
+ */
+int
+__os_r_sysattach(dbenv, infop, rp)
+ DB_ENV *dbenv;
+ REGINFO *infop;
+ REGION *rp;
+{
+ if (F_ISSET(dbenv, DB_ENV_SYSTEM_MEM)) {
+ /*
+ * If the region is in system memory on UNIX, we use shmget(2).
+ *
+ * !!!
+ * There exist spinlocks that don't work in shmget memory, e.g.,
+ * the HP/UX msemaphore interface. If we don't have locks that
+ * will work in shmget memory, we better be private and not be
+ * threaded. If we reach this point, we know we're public, so
+ * it's an error.
+ */
+#if defined(MUTEX_NO_SHMGET_LOCKS)
+ __db_err(dbenv,
+ "architecture does not support locks inside system shared memory");
+ return (EINVAL);
+#endif
+#if defined(HAVE_SHMGET)
+ {
+ key_t segid;
+ int id, ret;
+
+ /*
+ * We could potentially create based on REGION_CREATE_OK, but
+ * that's dangerous -- we might get crammed in sideways if
+ * some of the expected regions exist but others do not. Also,
+ * if the requested size differs from an existing region's
+ * actual size, then all sorts of nasty things can happen.
+ * Basing create solely on REGION_CREATE is much safer -- a
+ * recovery will get us straightened out.
+ */
+ if (F_ISSET(infop, REGION_CREATE)) {
+ /*
+ * The application must give us a base System V IPC key
+ * value. Adjust that value based on the region's ID,
+ * and correct so the user's original value appears in
+ * the ipcs output.
+ */
+ if (dbenv->shm_key == INVALID_REGION_SEGID) {
+ __db_err(dbenv,
+ "no base system shared memory ID specified");
+ return (EINVAL);
+ }
+ segid = (key_t)(dbenv->shm_key + (infop->id - 1));
+
+ /*
+ * If map to an existing region, assume the application
+ * crashed and we're restarting. Delete the old region
+ * and re-try. If that fails, return an error, the
+ * application will have to select a different segment
+ * ID or clean up some other way.
+ */
+ if ((id = shmget(segid, 0, 0)) != -1) {
+ (void)shmctl(id, IPC_RMID, NULL);
+ if ((id = shmget(segid, 0, 0)) != -1) {
+ __db_err(dbenv,
+ "shmget: key: %ld: shared system memory region already exists",
+ (long)segid);
+ return (EAGAIN);
+ }
+ }
+ if ((id =
+ shmget(segid, rp->size, IPC_CREAT | 0600)) == -1) {
+ ret = __os_get_errno();
+ __db_err(dbenv,
+ "shmget: key: %ld: unable to create shared system memory region: %s",
+ (long)segid, strerror(ret));
+ return (ret);
+ }
+ rp->segid = id;
+ } else
+ id = rp->segid;
+
+ if ((infop->addr = shmat(id, NULL, 0)) == (void *)-1) {
+ infop->addr = NULL;
+ ret = __os_get_errno();
+ __db_err(dbenv,
+ "shmat: id %d: unable to attach to shared system memory region: %s",
+ id, strerror(ret));
+ return (ret);
+ }
+
+ return (0);
+ }
+#else
+ return (__db_nosystemmem(dbenv));
+#endif
+ }
+
+#ifdef HAVE_MMAP
+ {
+ DB_FH fh;
+ int ret;
+
+ /*
+ * Try to open/create the shared region file. We DO NOT need to
+ * ensure that multiple threads/processes attempting to
+ * simultaneously create the region are properly ordered,
+ * our caller has already taken care of that.
+ */
+ if ((ret = __os_open(dbenv, infop->name, DB_OSO_REGION |
+ (F_ISSET(infop, REGION_CREATE_OK) ? DB_OSO_CREATE : 0),
+ infop->mode, &fh)) != 0)
+ __db_err(dbenv, "%s: %s", infop->name, db_strerror(ret));
+
+ /*
+ * If we created the file, grow it to its full size before mapping
+ * it in. We really want to avoid touching the buffer cache after
+ * mmap(2) is called, doing anything else confuses the hell out of
+ * systems without merged VM/buffer cache systems, or, more to the
+ * point, *badly* merged VM/buffer cache systems.
+ */
+ if (ret == 0 && F_ISSET(infop, REGION_CREATE))
+ ret = __os_finit(dbenv,
+ &fh, rp->size, DB_GLOBAL(db_region_init));
+
+ /* Map the file in. */
+ if (ret == 0)
+ ret = __os_map(dbenv,
+ infop->name, &fh, rp->size, 1, 0, &infop->addr);
+
+ (void)__os_closehandle(&fh);
+
+ return (ret);
+ }
+#else
+ COMPQUIET(infop, NULL);
+ COMPQUIET(rp, NULL);
+ __db_err(dbenv,
+ "architecture lacks mmap(2), shared environments not possible");
+ return (__db_eopnotsup(dbenv));
+#endif
+}
+
+/*
+ * __os_r_sysdetach --
+ * Detach from a shared memory region.
+ *
+ * PUBLIC: int __os_r_sysdetach __P((DB_ENV *, REGINFO *, int));
+ */
+int
+__os_r_sysdetach(dbenv, infop, destroy)
+ DB_ENV *dbenv;
+ REGINFO *infop;
+ int destroy;
+{
+ REGION *rp;
+
+ rp = infop->rp;
+
+ if (F_ISSET(dbenv, DB_ENV_SYSTEM_MEM)) {
+#ifdef HAVE_SHMGET
+ int ret, segid;
+
+ /*
+ * We may be about to remove the memory referenced by rp,
+ * save the segment ID, and (optionally) wipe the original.
+ */
+ segid = rp->segid;
+ if (destroy)
+ rp->segid = INVALID_REGION_SEGID;
+
+ if (shmdt(infop->addr) != 0) {
+ ret = __os_get_errno();
+ __db_err(dbenv, "shmdt: %s", strerror(ret));
+ return (ret);
+ }
+
+ if (destroy && shmctl(segid, IPC_RMID,
+ NULL) != 0 && (ret = __os_get_errno()) != EINVAL) {
+ __db_err(dbenv,
+ "shmctl: id %ld: unable to delete system shared memory region: %s",
+ segid, strerror(ret));
+ return (ret);
+ }
+
+ return (0);
+#else
+ return (__db_nosystemmem(dbenv));
+#endif
+ }
+
+#ifdef HAVE_MMAP
+#ifdef HAVE_MUNLOCK
+ if (F_ISSET(dbenv, DB_ENV_LOCKDOWN))
+ (void)munlock(infop->addr, rp->size);
+#endif
+ if (munmap(infop->addr, rp->size) != 0) {
+ int ret;
+
+ ret = __os_get_errno();
+ __db_err(dbenv, "munmap: %s", strerror(ret));
+ return (ret);
+ }
+
+ if (destroy && __os_region_unlink(dbenv, infop->name) != 0)
+ return (__os_get_errno());
+
+ return (0);
+#else
+ COMPQUIET(destroy, 0);
+ return (EINVAL);
+#endif
+}
+
+/*
+ * __os_mapfile --
+ * Map in a shared memory file.
+ *
+ * PUBLIC: int __os_mapfile __P((DB_ENV *,
+ * PUBLIC: char *, DB_FH *, size_t, int, void **));
+ */
+int
+__os_mapfile(dbenv, path, fhp, len, is_rdonly, addrp)
+ DB_ENV *dbenv;
+ char *path;
+ DB_FH *fhp;
+ int is_rdonly;
+ size_t len;
+ void **addrp;
+{
+#if defined(HAVE_MMAP) && !defined(HAVE_QNX)
+ return (__os_map(dbenv, path, fhp, len, 0, is_rdonly, addrp));
+#else
+ COMPQUIET(dbenv, NULL);
+ COMPQUIET(path, NULL);
+ COMPQUIET(fhp, NULL);
+ COMPQUIET(is_rdonly, 0);
+ COMPQUIET(len, 0);
+ COMPQUIET(addrp, NULL);
+ return (EINVAL);
+#endif
+}
+
+/*
+ * __os_unmapfile --
+ * Unmap the shared memory file.
+ *
+ * PUBLIC: int __os_unmapfile __P((DB_ENV *, void *, size_t));
+ */
+int
+__os_unmapfile(dbenv, addr, len)
+ DB_ENV *dbenv;
+ void *addr;
+ size_t len;
+{
+ /* If the user replaced the map call, call through their interface. */
+ if (__db_jump.j_unmap != NULL)
+ return (__db_jump.j_unmap(addr, len));
+
+#ifdef HAVE_MMAP
+#ifdef HAVE_MUNLOCK
+ if (F_ISSET(dbenv, DB_ENV_LOCKDOWN))
+ (void)munlock(addr, len);
+#else
+ COMPQUIET(dbenv, NULL);
+#endif
+ return (munmap(addr, len) ? __os_get_errno() : 0);
+#else
+ COMPQUIET(dbenv, NULL);
+
+ return (EINVAL);
+#endif
+}
+
+#ifdef HAVE_MMAP
+/*
+ * __os_map --
+ * Call the mmap(2) function.
+ */
+static int
+__os_map(dbenv, path, fhp, len, is_region, is_rdonly, addrp)
+ DB_ENV *dbenv;
+ char *path;
+ DB_FH *fhp;
+ int is_region, is_rdonly;
+ size_t len;
+ void **addrp;
+{
+ void *p;
+ int flags, prot, ret;
+
+ /* If the user replaced the map call, call through their interface. */
+ if (__db_jump.j_map != NULL)
+ return (__db_jump.j_map
+ (path, len, is_region, is_rdonly, addrp));
+
+ /*
+ * If it's read-only, it's private, and if it's not, it's shared.
+ * Don't bother with an additional parameter.
+ */
+ flags = is_rdonly ? MAP_PRIVATE : MAP_SHARED;
+
+#ifdef MAP_FILE
+ /*
+ * Historically, MAP_FILE was required for mapping regular files,
+ * even though it was the default. Some systems have it, some
+ * don't, some that have it set it to 0.
+ */
+ flags |= MAP_FILE;
+#endif
+
+ /*
+ * I know of no systems that implement the flag to tell the system
+ * that the region contains semaphores, but it's not an unreasonable
+ * thing to do, and has been part of the design since forever. I
+ * don't think anyone will object, but don't set it for read-only
+ * files, it doesn't make sense.
+ */
+#ifdef MAP_HASSEMAPHORE
+ if (is_region && !is_rdonly)
+ flags |= MAP_HASSEMAPHORE;
+#else
+ COMPQUIET(is_region, 0);
+#endif
+
+ prot = PROT_READ | (is_rdonly ? 0 : PROT_WRITE);
+
+ /*
+ * XXX
+ * Work around a bug in the VMS V7.1 mmap() implementation. To map
+ * a file into memory on VMS it needs to be opened in a certain way,
+ * originally. To get the file opened in that certain way, the VMS
+ * mmap() closes the file and re-opens it. When it does this, it
+ * doesn't flush any caches out to disk before closing. The problem
+ * this causes us is that when the memory cache doesn't get written
+ * out, the file isn't big enough to match the memory chunk and the
+ * mmap() call fails. This call to fsync() fixes the problem. DEC
+ * thinks this isn't a bug because of language in XPG5 discussing user
+ * responsibility for on-disk and in-memory synchronization.
+ */
+#ifdef VMS
+ if (__os_fsync(dbenv, fhp) == -1)
+ return (__os_get_errno());
+#endif
+
+ /* MAP_FAILED was not defined in early mmap implementations. */
+#ifndef MAP_FAILED
+#define MAP_FAILED -1
+#endif
+ if ((p = mmap(NULL,
+ len, prot, flags, fhp->fd, (off_t)0)) == (void *)MAP_FAILED) {
+ ret = __os_get_errno();
+ __db_err(dbenv, "mmap: %s", strerror(ret));
+ return (ret);
+ }
+
+#ifdef HAVE_MLOCK
+ /*
+ * If it's a region, we want to make sure that the memory isn't paged.
+ * For example, Solaris will page large mpools because it thinks that
+ * I/O buffer memory is more important than we are. The mlock system
+ * call may or may not succeed (mlock is restricted to the super-user
+ * on some systems). Currently, the only other use of mmap in DB is
+ * to map read-only databases -- we don't want them paged, either, so
+ * the call isn't conditional.
+ */
+ if (F_ISSET(dbenv, DB_ENV_LOCKDOWN) && mlock(p, len) != 0) {
+ ret = __os_get_errno();
+ (void)munmap(p, len);
+ __db_err(dbenv, "mlock: %s", strerror(ret));
+ return (ret);
+ }
+#else
+ COMPQUIET(dbenv, NULL);
+#endif
+
+ *addrp = p;
+ return (0);
+}
+#endif
+
+#ifndef HAVE_SHMGET
+/*
+ * __db_nosystemmem --
+ * No system memory environments error message.
+ */
+static int
+__db_nosystemmem(dbenv)
+ DB_ENV *dbenv;
+{
+ __db_err(dbenv,
+ "architecture doesn't support environments in system memory");
+ return (__db_eopnotsup(dbenv));
+}
+#endif
diff --git a/db/os/os_method.c b/db/os/os_method.c
new file mode 100644
index 000000000..0e2bd3947
--- /dev/null
+++ b/db/os/os_method.c
@@ -0,0 +1,206 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_method.c,v 11.6 2000/11/15 19:25:39 sue Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+struct __db_jumptab __db_jump;
+
+int
+db_env_set_func_close(func_close)
+ int (*func_close) __P((int));
+{
+ __db_jump.j_close = func_close;
+ return (0);
+}
+
+int
+db_env_set_func_dirfree(func_dirfree)
+ void (*func_dirfree) __P((char **, int));
+{
+ __db_jump.j_dirfree = func_dirfree;
+ return (0);
+}
+
+int
+db_env_set_func_dirlist(func_dirlist)
+ int (*func_dirlist) __P((const char *, char ***, int *));
+{
+ __db_jump.j_dirlist = func_dirlist;
+ return (0);
+}
+
+int
+db_env_set_func_exists(func_exists)
+ int (*func_exists) __P((const char *, int *));
+{
+ __db_jump.j_exists = func_exists;
+ return (0);
+}
+
+int
+db_env_set_func_free(func_free)
+ void (*func_free) __P((void *));
+{
+ __db_jump.j_free = func_free;
+ return (0);
+}
+
+int
+db_env_set_func_fsync(func_fsync)
+ int (*func_fsync) __P((int));
+{
+ __db_jump.j_fsync = func_fsync;
+ return (0);
+}
+
+int
+db_env_set_func_ioinfo(func_ioinfo)
+ int (*func_ioinfo)
+ __P((const char *, int, u_int32_t *, u_int32_t *, u_int32_t *));
+{
+ __db_jump.j_ioinfo = func_ioinfo;
+ return (0);
+}
+
+int
+db_env_set_func_malloc(func_malloc)
+ void *(*func_malloc) __P((size_t));
+{
+ __db_jump.j_malloc = func_malloc;
+ return (0);
+}
+
+int
+db_env_set_func_map(func_map)
+ int (*func_map) __P((char *, size_t, int, int, void **));
+{
+ __db_jump.j_map = func_map;
+ return (0);
+}
+
+int
+db_env_set_func_open(func_open)
+ int (*func_open) __P((const char *, int, ...));
+{
+ __db_jump.j_open = func_open;
+ return (0);
+}
+
+int
+db_env_set_func_read(func_read)
+ ssize_t (*func_read) __P((int, void *, size_t));
+{
+ __db_jump.j_read = func_read;
+ return (0);
+}
+
+int
+db_env_set_func_realloc(func_realloc)
+ void *(*func_realloc) __P((void *, size_t));
+{
+ __db_jump.j_realloc = func_realloc;
+ return (0);
+}
+
+int
+db_env_set_func_rename(func_rename)
+ int (*func_rename) __P((const char *, const char *));
+{
+ __db_jump.j_rename = func_rename;
+ return (0);
+}
+
+int
+db_env_set_func_seek(func_seek)
+ int (*func_seek) __P((int, size_t, db_pgno_t, u_int32_t, int, int));
+{
+ __db_jump.j_seek = func_seek;
+ return (0);
+}
+
+int
+db_env_set_func_sleep(func_sleep)
+ int (*func_sleep) __P((u_long, u_long));
+{
+ __db_jump.j_sleep = func_sleep;
+ return (0);
+}
+
+int
+db_env_set_func_unlink(func_unlink)
+ int (*func_unlink) __P((const char *));
+{
+ __db_jump.j_unlink = func_unlink;
+ return (0);
+}
+
+int
+db_env_set_func_unmap(func_unmap)
+ int (*func_unmap) __P((void *, size_t));
+{
+ __db_jump.j_unmap = func_unmap;
+ return (0);
+}
+
+int
+db_env_set_func_write(func_write)
+ ssize_t (*func_write) __P((int, const void *, size_t));
+{
+ __db_jump.j_write = func_write;
+ return (0);
+}
+
+int
+db_env_set_func_yield(func_yield)
+ int (*func_yield) __P((void));
+{
+ __db_jump.j_yield = func_yield;
+ return (0);
+}
+
+int
+db_env_set_pageyield(onoff)
+ int onoff;
+{
+ DB_GLOBAL(db_pageyield) = onoff;
+ return (0);
+}
+
+int
+db_env_set_panicstate(onoff)
+ int onoff;
+{
+ DB_GLOBAL(db_panic) = onoff;
+ return (0);
+}
+
+int
+db_env_set_region_init(onoff)
+ int onoff;
+{
+ DB_GLOBAL(db_region_init) = onoff;
+ return (0);
+}
+
+int
+db_env_set_tas_spins(tas_spins)
+ u_int32_t tas_spins;
+{
+ DB_GLOBAL(db_tas_spins) = tas_spins;
+ return (0);
+}
diff --git a/db/os/os_oflags.c b/db/os/os_oflags.c
new file mode 100644
index 000000000..fd413bdac
--- /dev/null
+++ b/db/os/os_oflags.c
@@ -0,0 +1,106 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_oflags.c,v 11.6 2000/10/27 20:32:02 dda Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <fcntl.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __db_oflags --
+ * Convert open(2) flags to DB flags.
+ *
+ * PUBLIC: u_int32_t __db_oflags __P((int));
+ */
+u_int32_t
+__db_oflags(oflags)
+ int oflags;
+{
+ u_int32_t dbflags;
+
+ dbflags = 0;
+
+ if (oflags & O_CREAT)
+ dbflags |= DB_CREATE;
+
+ if (oflags & O_TRUNC)
+ dbflags |= DB_TRUNCATE;
+
+ /*
+ * !!!
+ * Convert POSIX 1003.1 open(2) mode flags to DB flags. This isn't
+ * an exact science as few POSIX implementations have a flag value
+ * for O_RDONLY, it's simply the lack of a write flag.
+ */
+#ifndef O_ACCMODE
+#define O_ACCMODE (O_RDONLY | O_RDWR | O_WRONLY)
+#endif
+ switch (oflags & O_ACCMODE) {
+ case O_RDWR:
+ case O_WRONLY:
+ break;
+ default:
+ dbflags |= DB_RDONLY;
+ break;
+ }
+ return (dbflags);
+}
+
+/*
+ * __db_omode --
+ * Convert a permission string to the correct open(2) flags.
+ *
+ * PUBLIC: int __db_omode __P((const char *));
+ */
+int
+__db_omode(perm)
+ const char *perm;
+{
+ int mode;
+
+#ifndef S_IRUSR
+#ifdef DB_WIN32
+#define S_IRUSR S_IREAD /* R for owner */
+#define S_IWUSR S_IWRITE /* W for owner */
+#define S_IRGRP 0 /* R for group */
+#define S_IWGRP 0 /* W for group */
+#define S_IROTH 0 /* R for other */
+#define S_IWOTH 0 /* W for other */
+#else
+#define S_IRUSR 0000400 /* R for owner */
+#define S_IWUSR 0000200 /* W for owner */
+#define S_IRGRP 0000040 /* R for group */
+#define S_IWGRP 0000020 /* W for group */
+#define S_IROTH 0000004 /* R for other */
+#define S_IWOTH 0000002 /* W for other */
+#endif /* DB_WIN32 */
+#endif
+ mode = 0;
+ if (perm[0] == 'r')
+ mode |= S_IRUSR;
+ if (perm[1] == 'w')
+ mode |= S_IWUSR;
+ if (perm[2] == 'r')
+ mode |= S_IRGRP;
+ if (perm[3] == 'w')
+ mode |= S_IWGRP;
+ if (perm[4] == 'r')
+ mode |= S_IROTH;
+ if (perm[5] == 'w')
+ mode |= S_IWOTH;
+ return (mode);
+}
diff --git a/db/os/os_open.c b/db/os/os_open.c
new file mode 100644
index 000000000..cdc75cd73
--- /dev/null
+++ b/db/os/os_open.c
@@ -0,0 +1,226 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_open.c,v 11.21 2001/01/11 18:19:53 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <fcntl.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+
+#ifdef HAVE_QNX
+static int __os_region_open __P((DB_ENV *, const char *, int, int, DB_FH *));
+#endif
+
+/*
+ * __os_open --
+ * Open a file.
+ *
+ * PUBLIC: int __os_open __P((DB_ENV *, const char *, u_int32_t, int, DB_FH *));
+ */
+int
+__os_open(dbenv, name, flags, mode, fhp)
+ DB_ENV *dbenv;
+ const char *name;
+ u_int32_t flags;
+ int mode;
+ DB_FH *fhp;
+{
+ int oflags, ret;
+
+ oflags = 0;
+
+#if defined(O_BINARY)
+ /*
+ * If there's a binary-mode open flag, set it, we never want any
+ * kind of translation. Some systems do translations by default,
+ * e.g., with Cygwin, the default mode for an open() is set by the
+ * mode of the mount that underlies the file.
+ */
+ oflags |= O_BINARY;
+#endif
+
+ /*
+ * DB requires the POSIX 1003.1 semantic that two files opened at the
+ * same time with DB_OSO_CREATE/O_CREAT and DB_OSO_EXCL/O_EXCL flags
+ * set return an EEXIST failure in at least one.
+ */
+ if (LF_ISSET(DB_OSO_CREATE))
+ oflags |= O_CREAT;
+
+ if (LF_ISSET(DB_OSO_EXCL))
+ oflags |= O_EXCL;
+
+#if defined(O_DSYNC) && defined(XXX_NEVER_SET)
+ /*
+ * !!!
+ * We should get better performance if we push the log files to disk
+ * immediately instead of waiting for the sync. However, Solaris
+ * (and likely any other system based on the 4BSD filesystem releases),
+ * doesn't implement O_DSYNC correctly, only flushing data blocks and
+ * not inode or indirect blocks.
+ */
+ if (LF_ISSET(DB_OSO_LOG))
+ oflags |= O_DSYNC;
+#endif
+
+ if (LF_ISSET(DB_OSO_RDONLY))
+ oflags |= O_RDONLY;
+ else
+ oflags |= O_RDWR;
+
+ if (LF_ISSET(DB_OSO_TRUNC))
+ oflags |= O_TRUNC;
+
+#ifdef HAVE_QNX
+ if (LF_ISSET(DB_OSO_REGION))
+ return (__os_region_open(dbenv, name, oflags, mode, fhp));
+#endif
+ /* Open the file. */
+ if ((ret = __os_openhandle(dbenv, name, oflags, mode, fhp)) != 0)
+ return (ret);
+
+ /*
+ * Delete any temporary file.
+ *
+ * !!!
+ * There's a race here, where we've created a file and we crash before
+ * we can unlink it. Temporary files aren't common in DB, regardless,
+ * it's not a security problem because the file is empty. There's no
+ * reasonable way to avoid the race (playing signal games isn't worth
+ * the portability nightmare), so we just live with it.
+ */
+ if (LF_ISSET(DB_OSO_TEMP))
+ (void)__os_unlink(dbenv, name);
+
+ return (0);
+}
+
+#ifdef HAVE_QNX
+/*
+ * __os_region_open --
+ * Open a shared memory region file using POSIX shm_open.
+ */
+static int
+__os_region_open(dbenv, name, oflags, mode, fhp)
+ DB_ENV *dbenv;
+ const char *name;
+ int oflags;
+ int mode;
+ DB_FH *fhp;
+{
+ int ret;
+ char *newname;
+
+ if ((ret = __os_shmname(dbenv, name, &newname)) != 0)
+ goto err;
+ memset(fhp, 0, sizeof(*fhp));
+ fhp->fd = shm_open(newname, oflags, mode);
+ if (fhp->fd == -1)
+ ret = __os_get_errno();
+ else {
+#ifdef HAVE_FCNTL_F_SETFD
+ /* Deny file descriptor acces to any child process. */
+ if (fcntl(fhp->fd, F_SETFD, 1) == -1) {
+ ret = __os_get_errno();
+ __db_err(dbenv, "fcntl(F_SETFD): %s", strerror(ret));
+ __os_closehandle(fhp);
+ } else
+#endif
+ F_SET(fhp, DB_FH_VALID);
+ }
+ /*
+ * Once we have created the object, we don't need the name
+ * anymore. Other callers of this will convert themselves.
+ */
+err:
+ if (newname != NULL)
+ __os_free(newname, 0);
+ return (ret);
+}
+
+/*
+ * __os_shmname --
+ * Translate a pathname into a shm_open memory object name.
+ *
+ * PUBLIC: int __os_shmname __P((DB_ENV *, const char *, char **));
+ */
+int
+__os_shmname(dbenv, name, newnamep)
+ DB_ENV *dbenv;
+ const char *name;
+ char **newnamep;
+{
+ int ret;
+ size_t size;
+ char *p, *q, *tmpname;
+
+ *newnamep = NULL;
+
+ /*
+ * POSIX states that the name for a shared memory object
+ * may begin with a slash '/' and support for subsequent
+ * slashes is implementation-dependent. The one implementation
+ * we know of right now, QNX, forbids subsequent slashes.
+ * We don't want to be parsing pathnames for '.' and '..' in
+ * the middle. In order to allow easy conversion, just take
+ * the last component as the shared memory name. This limits
+ * the namespace a bit, but makes our job a lot easier.
+ *
+ * We should not be modifying user memory, so we use our own.
+ * Caller is responsible for freeing the memory we give them.
+ */
+ if ((ret = __os_strdup(dbenv, name, &tmpname)) != 0)
+ return (ret);
+ /*
+ * Skip over filename component.
+ * We set that separator to '\0' so that we can do another
+ * __db_rpath. However, we immediately set it then to ':'
+ * so that we end up with the tailing directory:filename.
+ * We require a home directory component. Return an error
+ * if there isn't one.
+ */
+ p = __db_rpath(tmpname);
+ if (p == NULL)
+ return (EINVAL);
+ if (p != tmpname) {
+ *p = '\0';
+ q = p;
+ p = __db_rpath(tmpname);
+ *q = ':';
+ }
+ if (p != NULL) {
+ /*
+ * If we have a path component, copy and return it.
+ */
+ ret = __os_strdup(dbenv, p, newnamep);
+ __os_free(tmpname, 0);
+ return (ret);
+ }
+
+ /*
+ * We were given just a directory name with no path components.
+ * Add a leading slash, and copy the remainder.
+ */
+ size = strlen(tmpname) + 2;
+ if ((ret = __os_malloc(dbenv, size, NULL, &p)) != 0)
+ return (ret);
+ p[0] = '/';
+ memcpy(&p[1], tmpname, size-1);
+ __os_free(tmpname, 0);
+ *newnamep = p;
+ return (0);
+}
+#endif
diff --git a/db/os/os_region.c b/db/os/os_region.c
new file mode 100644
index 000000000..1e36fc2cb
--- /dev/null
+++ b/db/os/os_region.c
@@ -0,0 +1,116 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_region.c,v 11.9 2000/11/30 00:58:42 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+/*
+ * __os_r_attach --
+ * Attach to a shared memory region.
+ *
+ * PUBLIC: int __os_r_attach __P((DB_ENV *, REGINFO *, REGION *));
+ */
+int
+__os_r_attach(dbenv, infop, rp)
+ DB_ENV *dbenv;
+ REGINFO *infop;
+ REGION *rp;
+{
+ int ret;
+ /* Round off the requested size for the underlying VM. */
+ OS_VMROUNDOFF(rp->size);
+
+#ifdef DB_REGIONSIZE_MAX
+ /* Some architectures have hard limits on the maximum region size. */
+ if (rp->size > DB_REGIONSIZE_MAX) {
+ __db_err(dbenv, "region size %lu is too large; maximum is %lu",
+ (u_long)rp->size, (u_long)DB_REGIONSIZE_MAX);
+ return (EINVAL);
+ }
+#endif
+
+ /*
+ * If a region is private, malloc the memory.
+ *
+ * !!!
+ * If this fails because the region is too large to malloc, mmap(2)
+ * using the MAP_ANON or MAP_ANONYMOUS flags would be an alternative.
+ * I don't know of any architectures (yet!) where malloc is a problem.
+ */
+ if (F_ISSET(dbenv, DB_ENV_PRIVATE)) {
+#if defined(MUTEX_NO_MALLOC_LOCKS)
+ /*
+ * !!!
+ * There exist spinlocks that don't work in malloc memory, e.g.,
+ * the HP/UX msemaphore interface. If we don't have locks that
+ * will work in malloc memory, we better not be private or not
+ * be threaded.
+ */
+ if (F_ISSET(dbenv, DB_ENV_THREAD)) {
+ __db_err(dbenv, "%s",
+ "architecture does not support locks inside process-local (malloc) memory");
+ __db_err(dbenv, "%s",
+ "application may not specify both DB_PRIVATE and DB_THREAD");
+ return (EINVAL);
+ }
+#endif
+ if ((ret =
+ __os_malloc(dbenv, rp->size, NULL, &infop->addr)) != 0)
+ return (ret);
+#if defined(UMRW) && !defined(DIAGNOSTIC)
+ memset(infop->addr, CLEAR_BYTE, rp->size);
+#endif
+ return (0);
+ }
+
+ /* If the user replaced the map call, call through their interface. */
+ if (__db_jump.j_map != NULL)
+ return (__db_jump.j_map(infop->name,
+ rp->size, 1, 0, &infop->addr));
+
+ return (__os_r_sysattach(dbenv, infop, rp));
+}
+
+/*
+ * __os_r_detach --
+ * Detach from a shared memory region.
+ *
+ * PUBLIC: int __os_r_detach __P((DB_ENV *, REGINFO *, int));
+ */
+int
+__os_r_detach(dbenv, infop, destroy)
+ DB_ENV *dbenv;
+ REGINFO *infop;
+ int destroy;
+{
+ REGION *rp;
+
+ rp = infop->rp;
+
+ /* If a region is private, free the memory. */
+ if (F_ISSET(dbenv, DB_ENV_PRIVATE)) {
+ __os_free(infop->addr, rp->size);
+ return (0);
+ }
+
+ /* If the user replaced the map call, call through their interface. */
+ if (__db_jump.j_unmap != NULL)
+ return (__db_jump.j_unmap(infop->addr, rp->size));
+
+ return (__os_r_sysdetach(dbenv, infop, destroy));
+}
diff --git a/db/os/os_rename.c b/db/os/os_rename.c
new file mode 100644
index 000000000..8108bba67
--- /dev/null
+++ b/db/os/os_rename.c
@@ -0,0 +1,46 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_rename.c,v 11.6 2000/04/14 16:56:33 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+/*
+ * __os_rename --
+ * Rename a file.
+ *
+ * PUBLIC: int __os_rename __P((DB_ENV *, const char *, const char *));
+ */
+int
+__os_rename(dbenv, old, new)
+ DB_ENV *dbenv;
+ const char *old, *new;
+{
+ int ret;
+
+ ret = __db_jump.j_rename != NULL ?
+ __db_jump.j_rename(old, new) : rename(old, new);
+
+ if (ret == -1) {
+ ret = __os_get_errno();
+ __db_err(dbenv, "Rename %s %s: %s", old, new, strerror(ret));
+ }
+
+ return (ret);
+}
diff --git a/db/os/os_root.c b/db/os/os_root.c
new file mode 100644
index 000000000..753285c1b
--- /dev/null
+++ b/db/os/os_root.c
@@ -0,0 +1,36 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_root.c,v 11.4 2000/02/14 03:00:05 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * __os_isroot --
+ * Return if user has special permissions.
+ *
+ * PUBLIC: int __os_isroot __P((void));
+ */
+int
+__os_isroot()
+{
+#ifdef HAVE_GETUID
+ return (getuid() == 0);
+#else
+ return (0);
+#endif
+}
diff --git a/db/os/os_rpath.c b/db/os/os_rpath.c
new file mode 100644
index 000000000..75d394ef2
--- /dev/null
+++ b/db/os/os_rpath.c
@@ -0,0 +1,69 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_rpath.c,v 11.5 2000/06/30 13:40:30 sue Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#ifdef HAVE_VXWORKS
+#include "iosLib.h"
+#endif
+
+/*
+ * __db_rpath --
+ * Return the last path separator in the path or NULL if none found.
+ *
+ * PUBLIC: char *__db_rpath __P((const char *));
+ */
+char *
+__db_rpath(path)
+ const char *path;
+{
+ const char *s, *last;
+#ifdef HAVE_VXWORKS
+ DEV_HDR *dummy;
+ char *ptail;
+
+ /*
+ * VxWorks devices can be rooted at any name. We want to
+ * skip over the device name and not take into account any
+ * PATH_SEPARATOR characters that might be in that name.
+ *
+ * XXX [#2393]
+ * VxWorks supports having a filename directly follow a device
+ * name with no separator. I.e. to access a file 'xxx' in
+ * the top level directory of a device mounted at "mydrive"
+ * you could say "mydrivexxx" or "mydrive/xxx" or "mydrive\xxx".
+ * We do not support the first usage here.
+ * XXX
+ */
+ if ((dummy = iosDevFind((char *)path, &ptail)) == NULL)
+ s = path;
+ else
+ s = ptail;
+#else
+ s = path;
+#endif
+
+ last = NULL;
+ if (PATH_SEPARATOR[1] != '\0') {
+ for (; s[0] != '\0'; ++s)
+ if (strchr(PATH_SEPARATOR, s[0]) != NULL)
+ last = s;
+ } else
+ for (; s[0] != '\0'; ++s)
+ if (s[0] == PATH_SEPARATOR[0])
+ last = s;
+ return ((char *)last);
+}
diff --git a/db/os/os_rw.c b/db/os/os_rw.c
new file mode 100644
index 000000000..7e8e1255d
--- /dev/null
+++ b/db/os/os_rw.c
@@ -0,0 +1,147 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_rw.c,v 11.15 2000/11/15 19:25:39 sue Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+/*
+ * __os_io --
+ * Do an I/O.
+ *
+ * PUBLIC: int __os_io __P((DB_ENV *, DB_IO *, int, size_t *));
+ */
+int
+__os_io(dbenv, db_iop, op, niop)
+ DB_ENV *dbenv;
+ DB_IO *db_iop;
+ int op;
+ size_t *niop;
+{
+ int ret;
+
+#if defined(HAVE_PREAD) && defined(HAVE_PWRITE)
+ switch (op) {
+ case DB_IO_READ:
+ if (__db_jump.j_read != NULL)
+ goto slow;
+ *niop = pread(db_iop->fhp->fd, db_iop->buf,
+ db_iop->bytes, (off_t)db_iop->pgno * db_iop->pagesize);
+ break;
+ case DB_IO_WRITE:
+ if (__db_jump.j_write != NULL)
+ goto slow;
+ *niop = pwrite(db_iop->fhp->fd, db_iop->buf,
+ db_iop->bytes, (off_t)db_iop->pgno * db_iop->pagesize);
+ break;
+ }
+ if (*niop == (size_t)db_iop->bytes)
+ return (0);
+slow:
+#endif
+ MUTEX_THREAD_LOCK(dbenv, db_iop->mutexp);
+
+ if ((ret = __os_seek(dbenv, db_iop->fhp,
+ db_iop->pagesize, db_iop->pgno, 0, 0, DB_OS_SEEK_SET)) != 0)
+ goto err;
+ switch (op) {
+ case DB_IO_READ:
+ ret = __os_read(dbenv,
+ db_iop->fhp, db_iop->buf, db_iop->bytes, niop);
+ break;
+ case DB_IO_WRITE:
+ ret = __os_write(dbenv,
+ db_iop->fhp, db_iop->buf, db_iop->bytes, niop);
+ break;
+ }
+
+err: MUTEX_THREAD_UNLOCK(dbenv, db_iop->mutexp);
+
+ return (ret);
+
+}
+
+/*
+ * __os_read --
+ * Read from a file handle.
+ *
+ * PUBLIC: int __os_read __P((DB_ENV *, DB_FH *, void *, size_t, size_t *));
+ */
+int
+__os_read(dbenv, fhp, addr, len, nrp)
+ DB_ENV *dbenv;
+ DB_FH *fhp;
+ void *addr;
+ size_t len;
+ size_t *nrp;
+{
+ size_t offset;
+ ssize_t nr;
+ int ret;
+ u_int8_t *taddr;
+
+ for (taddr = addr,
+ offset = 0; offset < len; taddr += nr, offset += nr) {
+ if ((nr = __db_jump.j_read != NULL ?
+ __db_jump.j_read(fhp->fd, taddr, len - offset) :
+ read(fhp->fd, taddr, len - offset)) < 0) {
+ ret = __os_get_errno();
+ __db_err(dbenv, "read: 0x%x, %lu: %s", taddr,
+ (u_long)len-offset, strerror(ret));
+ return (ret);
+ }
+ if (nr == 0)
+ break;
+ }
+ *nrp = taddr - (u_int8_t *)addr;
+ return (0);
+}
+
+/*
+ * __os_write --
+ * Write to a file handle.
+ *
+ * PUBLIC: int __os_write __P((DB_ENV *, DB_FH *, void *, size_t, size_t *));
+ */
+int
+__os_write(dbenv, fhp, addr, len, nwp)
+ DB_ENV *dbenv;
+ DB_FH *fhp;
+ void *addr;
+ size_t len;
+ size_t *nwp;
+{
+ size_t offset;
+ ssize_t nw;
+ int ret;
+ u_int8_t *taddr;
+
+ for (taddr = addr,
+ offset = 0; offset < len; taddr += nw, offset += nw)
+ if ((nw = __db_jump.j_write != NULL ?
+ __db_jump.j_write(fhp->fd, taddr, len - offset) :
+ write(fhp->fd, taddr, len - offset)) < 0) {
+ ret = __os_get_errno();
+ __db_err(dbenv, "write: 0x%x, %lu: %s", taddr,
+ (u_long)len-offset, strerror(ret));
+ return (ret);
+ }
+ *nwp = len;
+ return (0);
+}
diff --git a/db/os/os_seek.c b/db/os/os_seek.c
new file mode 100644
index 000000000..1c4dc2238
--- /dev/null
+++ b/db/os/os_seek.c
@@ -0,0 +1,76 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_seek.c,v 11.12 2000/11/30 00:58:42 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+/*
+ * __os_seek --
+ * Seek to a page/byte offset in the file.
+ *
+ * PUBLIC: int __os_seek __P((DB_ENV *,
+ * PUBLIC: DB_FH *, size_t, db_pgno_t, u_int32_t, int, DB_OS_SEEK));
+ */
+int
+__os_seek(dbenv, fhp, pgsize, pageno, relative, isrewind, db_whence)
+ DB_ENV *dbenv;
+ DB_FH *fhp;
+ size_t pgsize;
+ db_pgno_t pageno;
+ u_int32_t relative;
+ int isrewind;
+ DB_OS_SEEK db_whence;
+{
+ off_t offset;
+ int ret, whence;
+
+ switch (db_whence) {
+ case DB_OS_SEEK_CUR:
+ whence = SEEK_CUR;
+ break;
+ case DB_OS_SEEK_END:
+ whence = SEEK_END;
+ break;
+ case DB_OS_SEEK_SET:
+ whence = SEEK_SET;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if (__db_jump.j_seek != NULL)
+ ret = __db_jump.j_seek(fhp->fd,
+ pgsize, pageno, relative, isrewind, whence);
+ else {
+ offset = (off_t)pgsize * pageno + relative;
+ if (isrewind)
+ offset = -offset;
+ ret =
+ lseek(fhp->fd, offset, whence) == -1 ? __os_get_errno() : 0;
+ }
+
+ if (ret != 0)
+ __db_err(dbenv, "seek: %lu %d %d: %s",
+ (u_long)pgsize * pageno + relative,
+ isrewind, db_whence, strerror(ret));
+
+ return (ret);
+}
diff --git a/db/os/os_sleep.c b/db/os/os_sleep.c
new file mode 100644
index 000000000..137cd73b7
--- /dev/null
+++ b/db/os/os_sleep.c
@@ -0,0 +1,77 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_sleep.c,v 11.7 2000/04/07 14:26:36 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#ifdef HAVE_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+
+#ifdef HAVE_VXWORKS
+#include <sys/times.h>
+#include <time.h>
+#else
+#if TIME_WITH_SYS_TIME
+#include <sys/time.h>
+#include <time.h>
+#else
+#if HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <time.h>
+#endif /* HAVE_SYS_TIME_H */
+#endif /* TIME_WITH SYS_TIME */
+#endif /* HAVE_VXWORKS */
+
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+/*
+ * __os_sleep --
+ * Yield the processor for a period of time.
+ *
+ * PUBLIC: int __os_sleep __P((DB_ENV *, u_long, u_long));
+ */
+int
+__os_sleep(dbenv, secs, usecs)
+ DB_ENV *dbenv;
+ u_long secs, usecs; /* Seconds and microseconds. */
+{
+ struct timeval t;
+ int ret;
+
+ /* Don't require that the values be normalized. */
+ for (; usecs >= 1000000; usecs -= 1000000)
+ ++secs;
+
+ if (__db_jump.j_sleep != NULL)
+ return (__db_jump.j_sleep(secs, usecs));
+
+ /*
+ * It's important that we yield the processor here so that other
+ * processes or threads are permitted to run.
+ */
+ t.tv_sec = secs;
+ t.tv_usec = usecs;
+ ret = select(0, NULL, NULL, NULL, &t) == -1 ? __os_get_errno() : 0;
+
+ if (ret != 0)
+ __db_err(dbenv, "select: %s", strerror(ret));
+
+ return (ret);
+}
diff --git a/db/os/os_spin.c b/db/os/os_spin.c
new file mode 100644
index 000000000..b0800b988
--- /dev/null
+++ b/db/os/os_spin.c
@@ -0,0 +1,109 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_spin.c,v 11.5 2000/03/30 01:46:42 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#if defined(HAVE_PSTAT_GETDYNAMIC)
+#include <sys/pstat.h>
+#endif
+
+#include <limits.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+#if defined(HAVE_PSTAT_GETDYNAMIC)
+/*
+ * __os_pstat_getdynamic --
+ * HP/UX.
+ */
+static int
+__os_pstat_getdynamic()
+{
+ struct pst_dynamic psd;
+
+ return (pstat_getdynamic(&psd,
+ sizeof(psd), (size_t)1, 0) == -1 ? 1 : psd.psd_proc_cnt);
+}
+#endif
+
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
+/*
+ * __os_sysconf --
+ * Solaris, Linux.
+ */
+static int
+__os_sysconf()
+{
+ int nproc;
+
+ return ((nproc = sysconf(_SC_NPROCESSORS_ONLN)) > 1 ? nproc : 1);
+}
+#endif
+
+/*
+ * __os_spin --
+ * Return the number of default spins before blocking.
+ *
+ * PUBLIC: int __os_spin __P((void));
+ */
+int
+__os_spin()
+{
+ /*
+ * If the application specified a value or we've already figured it
+ * out, return it.
+ *
+ * XXX
+ * We don't want to repeatedly call the underlying function because
+ * it can be expensive (e.g., requiring multiple filesystem accesses
+ * under Debian Linux).
+ */
+ if (DB_GLOBAL(db_tas_spins) != 0)
+ return (DB_GLOBAL(db_tas_spins));
+
+ DB_GLOBAL(db_tas_spins) = 1;
+#if defined(HAVE_PSTAT_GETDYNAMIC)
+ DB_GLOBAL(db_tas_spins) = __os_pstat_getdynamic();
+#endif
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
+ DB_GLOBAL(db_tas_spins) = __os_sysconf();
+#endif
+
+ /*
+ * Spin 50 times per processor, we have anecdotal evidence that this
+ * is a reasonable value.
+ */
+ if (DB_GLOBAL(db_tas_spins) != 1)
+ DB_GLOBAL(db_tas_spins) *= 50;
+
+ return (DB_GLOBAL(db_tas_spins));
+}
+
+/*
+ * __os_yield --
+ * Yield the processor.
+ *
+ * PUBLIC: void __os_yield __P((DB_ENV*, u_long));
+ */
+void
+__os_yield(dbenv, usecs)
+ DB_ENV *dbenv;
+ u_long usecs;
+{
+ if (__db_jump.j_yield != NULL && __db_jump.j_yield() == 0)
+ return;
+ __os_sleep(dbenv, 0, usecs);
+}
diff --git a/db/os/os_stat.c b/db/os/os_stat.c
new file mode 100644
index 000000000..1590e8ecd
--- /dev/null
+++ b/db/os/os_stat.c
@@ -0,0 +1,108 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_stat.c,v 11.8 2000/10/27 20:32:02 dda Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+/*
+ * __os_exists --
+ * Return if the file exists.
+ *
+ * PUBLIC: int __os_exists __P((const char *, int *));
+ */
+int
+__os_exists(path, isdirp)
+ const char *path;
+ int *isdirp;
+{
+ struct stat sb;
+
+ if (__db_jump.j_exists != NULL)
+ return (__db_jump.j_exists(path, isdirp));
+
+#ifdef HAVE_VXWORKS
+ if (stat((char *)path, &sb) != 0)
+#else
+ if (stat(path, &sb) != 0)
+#endif
+ return (__os_get_errno());
+
+#if !defined(S_ISDIR) || defined(STAT_MACROS_BROKEN)
+#ifdef DB_WIN32
+#define S_ISDIR(m) (_S_IFDIR & (m))
+#else
+#define S_ISDIR(m) (((m) & 0170000) == 0040000)
+#endif
+#endif
+ if (isdirp != NULL)
+ *isdirp = S_ISDIR(sb.st_mode);
+
+ return (0);
+}
+
+/*
+ * __os_ioinfo --
+ * Return file size and I/O size; abstracted to make it easier
+ * to replace.
+ *
+ * PUBLIC: int __os_ioinfo __P((DB_ENV *, const char *,
+ * PUBLIC: DB_FH *, u_int32_t *, u_int32_t *, u_int32_t *));
+ */
+int
+__os_ioinfo(dbenv, path, fhp, mbytesp, bytesp, iosizep)
+ DB_ENV *dbenv;
+ const char *path;
+ DB_FH *fhp;
+ u_int32_t *mbytesp, *bytesp, *iosizep;
+{
+ int ret;
+ struct stat sb;
+
+ if (__db_jump.j_ioinfo != NULL)
+ return (__db_jump.j_ioinfo(path,
+ fhp->fd, mbytesp, bytesp, iosizep));
+
+ if (fstat(fhp->fd, &sb) == -1) {
+ ret = __os_get_errno();
+ __db_err(dbenv, "fstat: %s", strerror(ret));
+ return (ret);
+ }
+
+ /* Return the size of the file. */
+ if (mbytesp != NULL)
+ *mbytesp = sb.st_size / MEGABYTE;
+ if (bytesp != NULL)
+ *bytesp = sb.st_size % MEGABYTE;
+
+ /*
+ * Return the underlying filesystem blocksize, if available.
+ *
+ * XXX
+ * Check for a 0 size -- the HP MPE/iX architecture has st_blksize,
+ * but it's always 0.
+ */
+#ifdef HAVE_ST_BLKSIZE
+ if (iosizep != NULL && (*iosizep = sb.st_blksize) == 0)
+ *iosizep = DB_DEF_IOSIZE;
+#else
+ if (iosizep != NULL)
+ *iosizep = DB_DEF_IOSIZE;
+#endif
+ return (0);
+}
diff --git a/db/os/os_tmpdir.c b/db/os/os_tmpdir.c
new file mode 100644
index 000000000..0dff5c5b7
--- /dev/null
+++ b/db/os/os_tmpdir.c
@@ -0,0 +1,119 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_tmpdir.c,v 11.16 2001/01/08 20:42:06 bostic Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdlib.h>
+#endif
+
+#include "db_int.h"
+
+#ifdef macintosh
+#include <TFileSpec.h>
+#endif
+
+/*
+ * __os_tmpdir --
+ * Set the temporary directory path.
+ *
+ * The order of items in the list structure and the order of checks in
+ * the environment are documented.
+ *
+ * PUBLIC: int __os_tmpdir __P((DB_ENV *, u_int32_t));
+ */
+int
+__os_tmpdir(dbenv, flags)
+ DB_ENV *dbenv;
+ u_int32_t flags;
+{
+ /*
+ * !!!
+ * Don't change this to:
+ *
+ * static const char * const list[]
+ *
+ * because it creates a text relocation in position independent code.
+ */
+ static const char * list[] = {
+ "/var/tmp",
+ "/usr/tmp",
+ "/temp", /* Windows. */
+ "/tmp",
+ "C:/temp", /* Windows. */
+ "C:/tmp", /* Windows. */
+ NULL
+ };
+ const char * const *lp, *p;
+
+ /* Use the environment if it's permitted and initialized. */
+ if (LF_ISSET(DB_USE_ENVIRON) ||
+ (LF_ISSET(DB_USE_ENVIRON_ROOT) && __os_isroot())) {
+ if ((p = getenv("TMPDIR")) != NULL && p[0] == '\0') {
+ __db_err(dbenv, "illegal TMPDIR environment variable");
+ return (EINVAL);
+ }
+ /* Windows */
+ if (p == NULL && (p = getenv("TEMP")) != NULL && p[0] == '\0') {
+ __db_err(dbenv, "illegal TEMP environment variable");
+ return (EINVAL);
+ }
+ /* Windows */
+ if (p == NULL && (p = getenv("TMP")) != NULL && p[0] == '\0') {
+ __db_err(dbenv, "illegal TMP environment variable");
+ return (EINVAL);
+ }
+ /* Macintosh */
+ if (p == NULL &&
+ (p = getenv("TempFolder")) != NULL && p[0] == '\0') {
+ __db_err(dbenv,
+ "illegal TempFolder environment variable");
+ return (EINVAL);
+ }
+ if (p != NULL)
+ return (__os_strdup(dbenv, p, &dbenv->db_tmp_dir));
+ }
+
+#ifdef macintosh
+ /* Get the path to the temporary folder. */
+ {FSSpec spec;
+
+ if (!Special2FSSpec(kTemporaryFolderType,
+ kOnSystemDisk, 0, &spec))
+ return (__os_strdup(dbenv,
+ FSp2FullPath(&spec), &dbenv->db_tmp_dir));
+ }
+#endif
+#ifdef DB_WIN32
+ /* Get the path to the temporary directory. */
+ {int isdir, len;
+ char *eos, temp[MAXPATHLEN + 1];
+
+ if ((len = GetTempPath(sizeof(temp) - 1, temp)) > 2) {
+ eos = &temp[len];
+ *eos-- = '\0';
+ if (*eos == '\\' || *eos == '/')
+ *eos = '\0';
+ if (__os_exists(temp, &isdir) == 0 && isdir != 0)
+ return (__os_strdup(dbenv,
+ temp, &dbenv->db_tmp_dir));
+ }
+ }
+#endif
+
+ /* Step through the static list looking for a possibility. */
+ for (lp = list; *lp != NULL; ++lp)
+ if (__os_exists(*lp, NULL) == 0)
+ return (__os_strdup(dbenv, *lp, &dbenv->db_tmp_dir));
+ return (0);
+}
diff --git a/db/os/os_unlink.c b/db/os/os_unlink.c
new file mode 100644
index 000000000..56c401fe3
--- /dev/null
+++ b/db/os/os_unlink.c
@@ -0,0 +1,106 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997, 1998, 1999, 2000
+ * Sleepycat Software. All rights reserved.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: os_unlink.c,v 11.13 2000/11/30 00:58:42 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_jump.h"
+
+/*
+ * __os_unlink --
+ * Remove a file.
+ *
+ * PUBLIC: int __os_unlink __P((DB_ENV *, const char *));
+ */
+int
+__os_unlink(dbenv, path)
+ DB_ENV *dbenv;
+ const char *path;
+{
+ int ret;
+
+ ret = __db_jump.j_unlink != NULL ?
+ __db_jump.j_unlink(path) :
+#ifdef HAVE_VXWORKS
+ unlink((char *)path);
+#else
+ unlink(path);
+#endif
+ if (ret == -1) {
+ ret = __os_get_errno();
+ /*
+ * XXX
+ * We really shouldn't be looking at this value ourselves,
+ * but ENOENT usually signals that a file is missing, and
+ * we attempt to unlink things (such as v. 2.x environment
+ * regions, in DB_ENV->remove) that we're expecting not to
+ * be there. Reporting errors in these cases is annoying.
+ */
+#ifdef HAVE_VXWORKS
+ /*
+ * XXX
+ * The results of unlink are file system driver specific
+ * on VxWorks. In the case of removing a file that did
+ * not exist, some, at least, return an error, but with
+ * an errno of 0, not ENOENT.
+ *
+ * Code below falls through to original if-statement only
+ * we didn't get a "successful" error.
+ */
+ if (ret != 0)
+ /* FALLTHROUGH */
+#endif
+ if (ret != ENOENT)
+ __db_err(dbenv, "Unlink: %s: %s", path, strerror(ret));
+ }
+
+ return (ret);
+}
+
+/*
+ * __os_region_unlink --
+ * Remove a shared memory object file.
+ *
+ * PUBLIC: int __os_region_unlink __P((DB_ENV *, const char *));
+ */
+int
+__os_region_unlink(dbenv, path)
+ DB_ENV *dbenv;
+ const char *path;
+{
+#ifdef HAVE_QNX
+ int ret;
+ char *newname;
+
+ if ((ret = __os_shmname(dbenv, path, &newname)) != 0)
+ goto err;
+
+ if ((ret = shm_unlink(newname)) != 0) {
+ ret = __os_get_errno();
+ if (ret != ENOENT)
+ __db_err(dbenv, "Shm_unlink: %s: %s",
+ newname, strerror(ret));
+ }
+err:
+ if (newname != NULL)
+ __os_free(newname, 0);
+ return (ret);
+#else
+ return (__os_unlink(dbenv, path));
+#endif
+}