summaryrefslogtreecommitdiff
path: root/daemons
diff options
context:
space:
mode:
authorAnas Nashif <anas.nashif@intel.com>2013-03-05 01:47:43 -0800
committerAnas Nashif <anas.nashif@intel.com>2013-03-05 01:47:43 -0800
commit44a3c2255bc480c82f34db156553a595606d8a0b (patch)
tree5e6df96a6c6e40207cb3a711860e16b543918c0d /daemons
parent8bd28eea831fd5215c12e6fcecc8e9a772398ed9 (diff)
downloaddevice-mapper-44a3c2255bc480c82f34db156553a595606d8a0b.tar.gz
device-mapper-44a3c2255bc480c82f34db156553a595606d8a0b.tar.bz2
device-mapper-44a3c2255bc480c82f34db156553a595606d8a0b.zip
Imported Upstream version 2.02.98upstream/2.02.98upstream/1.02.77
Diffstat (limited to 'daemons')
-rw-r--r--daemons/Makefile.in12
-rw-r--r--daemons/clvmd/Makefile.in18
-rw-r--r--daemons/clvmd/clvm.h28
-rw-r--r--daemons/clvmd/clvmd-cman.c14
-rw-r--r--daemons/clvmd/clvmd-command.c130
-rw-r--r--daemons/clvmd/clvmd-comms.h10
-rw-r--r--daemons/clvmd/clvmd-corosync.c78
-rw-r--r--daemons/clvmd/clvmd-gulm.c1010
-rw-r--r--daemons/clvmd/clvmd-gulm.h9
-rw-r--r--daemons/clvmd/clvmd-openais.c17
-rw-r--r--daemons/clvmd/clvmd-singlenode.c167
-rw-r--r--daemons/clvmd/clvmd.c498
-rw-r--r--daemons/clvmd/clvmd.h5
-rw-r--r--daemons/clvmd/lvm-functions.c264
-rw-r--r--daemons/clvmd/lvm-functions.h3
-rw-r--r--daemons/clvmd/refresh_clvmd.c63
-rw-r--r--daemons/clvmd/tcp-comms.c502
-rw-r--r--daemons/clvmd/tcp-comms.h13
-rw-r--r--daemons/cmirrord/clogd.c28
-rw-r--r--daemons/cmirrord/cluster.c173
-rw-r--r--daemons/cmirrord/functions.c64
-rw-r--r--daemons/cmirrord/link_mon.c4
-rw-r--r--daemons/cmirrord/local.c22
-rw-r--r--daemons/dmeventd/.exported_symbols1
-rw-r--r--daemons/dmeventd/Makefile.in8
-rw-r--r--daemons/dmeventd/dmeventd.c373
-rw-r--r--daemons/dmeventd/dmeventd.h9
-rw-r--r--daemons/dmeventd/libdevmapper-event.c142
-rw-r--r--daemons/dmeventd/libdevmapper-event.h4
-rw-r--r--daemons/dmeventd/plugins/Makefile.in29
-rw-r--r--daemons/dmeventd/plugins/lvm2/.exported_symbols1
-rw-r--r--daemons/dmeventd/plugins/lvm2/Makefile.in6
-rw-r--r--daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c36
-rw-r--r--daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h3
-rw-r--r--daemons/dmeventd/plugins/mirror/Makefile.in6
-rw-r--r--daemons/dmeventd/plugins/mirror/dmeventd_mirror.c42
-rw-r--r--daemons/dmeventd/plugins/raid/.exported_symbols3
-rw-r--r--daemons/dmeventd/plugins/raid/Makefile.in36
-rw-r--r--daemons/dmeventd/plugins/raid/dmeventd_raid.c172
-rw-r--r--daemons/dmeventd/plugins/snapshot/Makefile.in6
-rw-r--r--daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c99
-rw-r--r--daemons/dmeventd/plugins/thin/.exported_symbols3
-rw-r--r--daemons/dmeventd/plugins/thin/Makefile.in36
-rw-r--r--daemons/dmeventd/plugins/thin/dmeventd_thin.c447
-rw-r--r--daemons/lvmetad/Makefile.in59
-rw-r--r--daemons/lvmetad/lvmetad-client.h81
-rw-r--r--daemons/lvmetad/lvmetad-core.c1204
-rwxr-xr-xdaemons/lvmetad/test.sh16
-rw-r--r--daemons/lvmetad/testclient.c127
49 files changed, 3662 insertions, 2419 deletions
diff --git a/daemons/Makefile.in b/daemons/Makefile.in
index ce400d7..c83e0da 100644
--- a/daemons/Makefile.in
+++ b/daemons/Makefile.in
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -15,10 +15,14 @@ srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
-.PHONY: dmeventd clvmd cmirrord
+ifeq ("@BUILD_LVMETAD@", "yes")
+ SUBDIRS += lvmetad
+endif
+
+.PHONY: dmeventd clvmd cmirrord lvmetad
ifneq ("@CLVMD@", "none")
- SUBDIRS = clvmd
+ SUBDIRS += clvmd
endif
ifeq ("@BUILD_CMIRRORD@", "yes")
@@ -33,7 +37,7 @@ endif
endif
ifeq ($(MAKECMDGOALS),distclean)
- SUBDIRS = clvmd cmirrord dmeventd
+ SUBDIRS = clvmd cmirrord dmeventd lvmetad
endif
include $(top_builddir)/make.tmpl
diff --git a/daemons/clvmd/Makefile.in b/daemons/clvmd/Makefile.in
index ee19e6c..9ca11ba 100644
--- a/daemons/clvmd/Makefile.in
+++ b/daemons/clvmd/Makefile.in
@@ -15,18 +15,16 @@ srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
-CCS_LIBS = @CCS_LIBS@
-CCS_CFLAGS = @CCS_CFLAGS@
CMAN_LIBS = @CMAN_LIBS@
CMAN_CFLAGS = @CMAN_CFLAGS@
+CMAP_LIBS = @CMAP_LIBS@
+CMAP_CFLAGS = @CMAP_CFLAGS@
CONFDB_LIBS = @CONFDB_LIBS@
CONFDB_CFLAGS = @CONFDB_CFLAGS@
CPG_LIBS = @CPG_LIBS@
CPG_CFLAGS = @CPG_CFLAGS@
DLM_LIBS = @DLM_LIBS@
DLM_CFLAGS = @DLM_CFLAGS@
-GULM_LIBS = @GULM_LIBS@
-GULM_CFLAGS = @GULM_CFLAGS@
QUORUM_LIBS = @QUORUM_LIBS@
QUORUM_CFLAGS = @QUORUM_CFLAGS@
SALCK_LIBS = @SALCK_LIBS@
@@ -42,13 +40,6 @@ ifeq ("@DEBUG@", "yes")
DEFS += -DDEBUG
endif
-ifneq (,$(findstring gulm,, "@CLVMD@,"))
- SOURCES += clvmd-gulm.c tcp-comms.c
- LMLIBS += $(CCS_LIBS) $(GULM_LIBS)
- CFLAGS += $(CCS_CFLAGS) $(GULM_CFLAGS)
- DEFS += -DUSE_GULM
-endif
-
ifneq (,$(findstring cman,, "@CLVMD@,"))
SOURCES += clvmd-cman.c
LMLIBS += $(CMAN_LIBS) $(CONFDB_LIBS) $(DLM_LIBS)
@@ -65,8 +56,8 @@ endif
ifneq (,$(findstring corosync,, "@CLVMD@,"))
SOURCES += clvmd-corosync.c
- LMLIBS += $(CONFDB_LIBS) $(CPG_LIBS) $(DLM_LIBS) $(QUORUM_LIBS)
- CFLAGS += $(CONFDB_CFLAGS) $(CPG_CFLAGS) $(DLM_CFLAGS) $(QUORUM_CFLAGS)
+ LMLIBS += $(CMAP_LIBS) $(CONFDB_LIBS) $(CPG_LIBS) $(DLM_LIBS) $(QUORUM_LIBS)
+ CFLAGS += $(CMAP_CFLAGS) $(CONFDB_CFLAGS) $(CPG_CFLAGS) $(DLM_CFLAGS) $(QUORUM_CFLAGS)
DEFS += -DUSE_COROSYNC
endif
@@ -76,7 +67,6 @@ ifneq (,$(findstring singlenode,, &quot;@CLVMD@,&quot;))
endif
ifeq ($(MAKECMDGOALS),distclean)
- SOURCES += clvmd-gulm.c tcp-comms.c
SOURCES += clvmd-cman.c
SOURCES += clvmd-openais.c
SOURCES += clvmd-corosync.c
diff --git a/daemons/clvmd/clvm.h b/daemons/clvmd/clvm.h
index c9ea10c..8e24f15 100644
--- a/daemons/clvmd/clvm.h
+++ b/daemons/clvmd/clvm.h
@@ -30,22 +30,23 @@ struct clvm_header {
uint16_t xid; /* Transaction ID */
uint32_t clientid; /* Only used in Daemon->Daemon comms */
int32_t status; /* For replies, whether request succeeded */
- uint32_t arglen; /* Length of argument below.
- If >1500 then it will be passed
+ uint32_t arglen; /* Length of argument below.
+ If >1500 then it will be passed
around the cluster in the system LV */
char node[1]; /* Actually a NUL-terminated string, node name.
- If this is empty then the command is
- forwarded to all cluster nodes unless
- FLAG_LOCAL is also set. */
- char args[1]; /* Arguments for the command follow the
+ If this is empty then the command is
+ forwarded to all cluster nodes unless
+ FLAG_LOCAL or FLAG_REMOTE is also set. */
+ char args[1]; /* Arguments for the command follow the
node name, This member is only
valid if the node name is empty */
} __attribute__ ((packed));
/* Flags */
-#define CLVMD_FLAG_LOCAL 1 /* Only do this on the local node */
-#define CLVMD_FLAG_SYSTEMLV 2 /* Data in system LV under my node name */
-#define CLVMD_FLAG_NODEERRS 4 /* Reply has errors in node-specific portion */
+#define CLVMD_FLAG_LOCAL 1 /* Only do this on the local node */
+#define CLVMD_FLAG_SYSTEMLV 2 /* Data in system LV under my node name */
+#define CLVMD_FLAG_NODEERRS 4 /* Reply has errors in node-specific portion */
+#define CLVMD_FLAG_REMOTE 8 /* Do this on all nodes except for the local node */
/* Name of the local socket to communicate between lvm and clvmd */
static const char CLVMD_SOCKNAME[]= DEFAULT_RUN_DIR "/clvmd.sock";
@@ -53,7 +54,7 @@ static const char CLVMD_SOCKNAME[]= DEFAULT_RUN_DIR "/clvmd.sock";
/* Internal commands & replies */
#define CLVMD_CMD_REPLY 1
#define CLVMD_CMD_VERSION 2 /* Send version around cluster when we start */
-#define CLVMD_CMD_GOAWAY 3 /* Die if received this - we are running
+#define CLVMD_CMD_GOAWAY 3 /* Die if received this - we are running
an incompatible version */
#define CLVMD_CMD_TEST 4 /* Just for mucking about */
@@ -71,4 +72,11 @@ static const char CLVMD_SOCKNAME[]= DEFAULT_RUN_DIR "/clvmd.sock";
#define CLVMD_CMD_SET_DEBUG 42
#define CLVMD_CMD_VG_BACKUP 43
#define CLVMD_CMD_RESTART 44
+#define CLVMD_CMD_SYNC_NAMES 45
+
+/* Used internally by some callers, but not part of the protocol.*/
+#define NODE_ALL "*"
+#define NODE_LOCAL "."
+#define NODE_REMOTE "^"
+
#endif
diff --git a/daemons/clvmd/clvmd-cman.c b/daemons/clvmd/clvmd-cman.c
index 52da2ac..7e76dc4 100644
--- a/daemons/clvmd/clvmd-cman.c
+++ b/daemons/clvmd/clvmd-cman.c
@@ -89,16 +89,17 @@ static int _init_cluster(void)
DEBUGLOG("CMAN initialisation complete\n");
/* Create a lockspace for LV & VG locks to live in */
- lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
+ lockspace = dlm_open_lockspace(LOCKSPACE_NAME);
if (!lockspace) {
- if (errno == EEXIST) {
- lockspace = dlm_open_lockspace(LOCKSPACE_NAME);
- }
+ lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
if (!lockspace) {
- syslog(LOG_ERR, "Unable to create lockspace for CLVM: %m");
+ syslog(LOG_ERR, "Unable to create DLM lockspace for CLVM: %m");
return -1;
}
- }
+ DEBUGLOG("Created DLM lockspace for CLVMD.\n");
+ } else
+ DEBUGLOG("Opened existing DLM lockspace for CLVMD.\n");
+
dlm_ls_pthread_init(lockspace);
DEBUGLOG("DLM initialisation complete\n");
return 0;
@@ -478,6 +479,7 @@ static int _get_cluster_name(char *buf, int buflen)
}
static struct cluster_ops _cluster_cman_ops = {
+ .name = "cman",
.cluster_init_completed = _cluster_init_completed,
.cluster_send_message = _cluster_send_message,
.name_from_csid = _name_from_csid,
diff --git a/daemons/clvmd/clvmd-command.c b/daemons/clvmd/clvmd-command.c
index 49f1197..c5cd461 100644
--- a/daemons/clvmd/clvmd-command.c
+++ b/daemons/clvmd/clvmd-command.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -51,19 +51,16 @@
*/
#include "clvmd-common.h"
-
-#include <pthread.h>
-
#include "clvmd-comms.h"
#include "clvm.h"
#include "clvmd.h"
+#include "lvm-globals.h"
#include "lvm-functions.h"
#include "locking.h"
#include <sys/utsname.h>
-extern debug_t debug;
extern struct cluster_ops *clops;
static int restart_clvmd(void);
@@ -81,6 +78,9 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
unsigned char lock_cmd;
unsigned char lock_flags;
+ /* Reset test mode before we start */
+ init_test(0);
+
/* Do the command */
switch (msg->cmd) {
/* Just a test message */
@@ -96,7 +96,9 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
*buf = new_buf;
}
if (*buf) {
- uname(&nodeinfo);
+ if (uname(&nodeinfo))
+ memset(&nodeinfo, 0, sizeof(nodeinfo));
+
*retlen = 1 + dm_snprintf(*buf, buflen,
"TEST from %s: %s v%s",
nodeinfo.nodename, args,
@@ -110,14 +112,18 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
lockname = &args[2];
/* Check to see if the VG is in use by LVM1 */
status = do_check_lvm1(lockname);
+ if (lock_flags & LCK_TEST_MODE)
+ init_test(1);
do_lock_vg(lock_cmd, lock_flags, lockname);
break;
case CLVMD_CMD_LOCK_LV:
/* This is the biggie */
- lock_cmd = args[0] & (LCK_NONBLOCK | LCK_HOLD | LCK_SCOPE_MASK | LCK_TYPE_MASK);
+ lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
+ if (lock_flags & LCK_TEST_MODE)
+ init_test(1);
status = do_lock_lv(lock_cmd, lock_flags, lockname);
/* Replace EIO with something less scary */
if (status == EIO) {
@@ -139,12 +145,16 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen,
do_refresh_cache();
break;
+ case CLVMD_CMD_SYNC_NAMES:
+ lvm_do_fs_unlock();
+ break;
+
case CLVMD_CMD_SET_DEBUG:
- debug = args[0];
+ clvmd_set_debug((debug_t) args[0]);
break;
case CLVMD_CMD_RESTART:
- restart_clvmd();
+ status = restart_clvmd();
break;
case CLVMD_CMD_GET_CLUSTERNAME:
@@ -182,7 +192,6 @@ static int lock_vg(struct local_client *client)
struct clvm_header *header =
(struct clvm_header *) client->bits.localsock.cmd;
unsigned char lock_cmd;
- unsigned char lock_flags;
int lock_mode;
char *args = header->node + strlen(header->node) + 1;
int lkid;
@@ -204,7 +213,7 @@ static int lock_vg(struct local_client *client)
lock_cmd = args[0] & (LCK_NONBLOCK | LCK_HOLD | LCK_SCOPE_MASK | LCK_TYPE_MASK);
lock_mode = ((int)lock_cmd & LCK_TYPE_MASK);
- lock_flags = args[1];
+ /* lock_flags = args[1]; */
lockname = &args[2];
DEBUGLOG("doing PRE command LOCK_VG '%s' at %x (client=%p)\n", lockname, lock_cmd, client);
@@ -228,7 +237,8 @@ static int lock_vg(struct local_client *client)
if (status)
status = errno;
else
- dm_hash_insert(lock_hash, lockname, (void *)(long)lkid);
+ if (!dm_hash_insert(lock_hash, lockname, (void *)(long)lkid))
+ return ENOMEM;
}
return status;
@@ -245,10 +255,11 @@ int do_pre_command(struct local_client *client)
unsigned char lock_cmd;
unsigned char lock_flags;
char *args = header->node + strlen(header->node) + 1;
- int lockid;
+ int lockid = 0;
int status = 0;
char *lockname;
+ init_test(0);
switch (header->cmd) {
case CLVMD_CMD_TEST:
status = sync_lock("CLVMD_TEST", LCK_EXCL, 0, &lockid);
@@ -267,6 +278,8 @@ int do_pre_command(struct local_client *client)
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
+ if (lock_flags & LCK_TEST_MODE)
+ init_test(1);
status = pre_lock_lv(lock_cmd, lock_flags, lockname);
break;
@@ -274,6 +287,7 @@ int do_pre_command(struct local_client *client)
case CLVMD_CMD_GET_CLUSTERNAME:
case CLVMD_CMD_SET_DEBUG:
case CLVMD_CMD_VG_BACKUP:
+ case CLVMD_CMD_SYNC_NAMES:
case CLVMD_CMD_LOCK_QUERY:
case CLVMD_CMD_RESTART:
break;
@@ -297,6 +311,7 @@ int do_post_command(struct local_client *client)
char *args = header->node + strlen(header->node) + 1;
char *lockname;
+ init_test(0);
switch (header->cmd) {
case CLVMD_CMD_TEST:
status =
@@ -304,18 +319,18 @@ int do_post_command(struct local_client *client)
client->bits.localsock.private = 0;
break;
- case CLVMD_CMD_LOCK_VG:
- case CLVMD_CMD_VG_BACKUP:
- case CLVMD_CMD_LOCK_QUERY:
- /* Nothing to do here */
- break;
-
case CLVMD_CMD_LOCK_LV:
lock_cmd = args[0];
lock_flags = args[1];
lockname = &args[2];
+ if (lock_flags & LCK_TEST_MODE)
+ init_test(1);
status = post_lock_lv(lock_cmd, lock_flags, lockname);
break;
+
+ default:
+ /* Nothing to do here */
+ break;
}
return status;
}
@@ -324,90 +339,93 @@ int do_post_command(struct local_client *client)
/* Called when the client is about to be deleted */
void cmd_client_cleanup(struct local_client *client)
{
- if (client->bits.localsock.private) {
-
struct dm_hash_node *v;
- struct dm_hash_table *lock_hash =
- (struct dm_hash_table *)client->bits.localsock.private;
+ struct dm_hash_table *lock_hash;
+ int lkid;
+ char *lockname;
- dm_hash_iterate(v, lock_hash) {
- int lkid = (int)(long)dm_hash_get_data(lock_hash, v);
- char *lockname = dm_hash_get_key(lock_hash, v);
+ if (!client->bits.localsock.private)
+ return;
+ lock_hash = (struct dm_hash_table *)client->bits.localsock.private;
+
+ dm_hash_iterate(v, lock_hash) {
+ lkid = (int)(long)dm_hash_get_data(lock_hash, v);
+ lockname = dm_hash_get_key(lock_hash, v);
DEBUGLOG("cleanup: Unlocking lock %s %x\n", lockname, lkid);
- sync_unlock(lockname, lkid);
+ (void) sync_unlock(lockname, lkid);
}
dm_hash_destroy(lock_hash);
client->bits.localsock.private = 0;
- }
}
static int restart_clvmd(void)
{
- char **argv = NULL;
- char *debug_arg = NULL, *lv_name;
- int i, argc = 0, max_locks = 0;
+ const char **argv;
+ char *lv_name;
+ int argc = 0, max_locks = 0;
struct dm_hash_node *hn = NULL;
+ char debug_arg[16];
+ const char *clvmd = getenv("LVM_CLVMD_BINARY") ? : CLVMD_PATH;
DEBUGLOG("clvmd restart requested\n");
/* Count exclusively-open LVs */
- hn = NULL;
do {
hn = get_next_excl_lock(hn, &lv_name);
- if (lv_name)
+ if (lv_name) {
max_locks++;
- } while (hn && *lv_name);
+ if (!*lv_name)
+ break; /* FIXME: Is this error ? */
+ }
+ } while (hn);
/* clvmd + locks (-E uuid) + debug (-d X) + NULL */
- argv = malloc((max_locks * 2 + 4) * sizeof(*argv));
- if (!argv)
+ if (!(argv = malloc((max_locks * 2 + 5) * sizeof(*argv))))
goto_out;
/*
* Build the command-line
*/
- argv[argc++] = strdup("clvmd");
- if (!argv[0])
- goto_out;
+ argv[argc++] = "clvmd";
/* Propogate debug options */
- if (debug) {
- if (!(debug_arg = malloc(16)) ||
- dm_snprintf(debug_arg, 16, "-d%d", (int)debug) < 0)
+ if (clvmd_get_debug()) {
+ if (dm_snprintf(debug_arg, sizeof(debug_arg), "-d%u", clvmd_get_debug()) < 0)
goto_out;
argv[argc++] = debug_arg;
}
+ argv[argc++] = "-I";
+ argv[argc++] = clops->name;
+
/* Now add the exclusively-open LVs */
+ hn = NULL;
do {
hn = get_next_excl_lock(hn, &lv_name);
if (lv_name) {
- argv[argc] = strdup("-E");
- if (!argv[argc++])
- goto_out;
- argv[argc] = strdup(lv_name);
- if (!argv[argc++])
- goto_out;
-
+ if (!*lv_name)
+ break; /* FIXME: Is this error ? */
+ argv[argc++] = "-E";
+ argv[argc++] = lv_name;
DEBUGLOG("excl lock: %s\n", lv_name);
- hn = get_next_excl_lock(hn, &lv_name);
}
- } while (hn && *lv_name);
- argv[argc++] = NULL;
+ } while (hn);
+ argv[argc] = NULL;
/* Exec new clvmd */
+ DEBUGLOG("--- Restarting %s ---\n", clvmd);
+ for (argc = 1; argv[argc]; argc++) DEBUGLOG("--- %d: %s\n", argc, argv[argc]);
+
/* NOTE: This will fail when downgrading! */
- execve(CLVMD_PATH, argv, NULL);
+ execvp(clvmd, (char **)argv);
out:
/* We failed */
DEBUGLOG("Restart of clvmd failed.\n");
- for (i = 0; i < argc && argv[i]; i++)
- free(argv[i]);
free(argv);
- return 0;
+ return EIO;
}
diff --git a/daemons/clvmd/clvmd-comms.h b/daemons/clvmd/clvmd-comms.h
index fbcfe8b..7207334 100644
--- a/daemons/clvmd/clvmd-comms.h
+++ b/daemons/clvmd/clvmd-comms.h
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -23,6 +23,7 @@
struct local_client;
struct cluster_ops {
+ const char *name;
void (*cluster_init_completed) (void);
int (*cluster_send_message) (const void *buf, int msglen,
@@ -54,13 +55,6 @@ struct cluster_ops {
};
-#ifdef USE_GULM
-# include "tcp-comms.h"
-struct cluster_ops *init_gulm_cluster(void);
-#define MAX_CSID_LEN GULM_MAX_CSID_LEN
-#define MAX_CLUSTER_MEMBER_NAME_LEN GULM_MAX_CLUSTER_MEMBER_NAME_LEN
-#endif
-
#ifdef USE_CMAN
# include <netinet/in.h>
# include "libcman.h"
diff --git a/daemons/clvmd/clvmd-corosync.c b/daemons/clvmd/clvmd-corosync.c
index cfe7150..d85ec1e 100644
--- a/daemons/clvmd/clvmd-corosync.c
+++ b/daemons/clvmd/clvmd-corosync.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2009-2012 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -30,7 +30,15 @@
#include <corosync/cpg.h>
#include <corosync/quorum.h>
-#include <corosync/confdb.h>
+
+#ifdef HAVE_COROSYNC_CONFDB_H
+# include <corosync/confdb.h>
+#elif defined HAVE_COROSYNC_CMAP_H
+# include <corosync/cmap.h>
+#else
+# error "Either HAVE_COROSYNC_CONFDB_H or HAVE_COROSYNC_CMAP_H must be defined."
+#endif
+
#include <libdlm.h>
#include <syslog.h>
@@ -274,6 +282,10 @@ static int _init_cluster(void)
{
cs_error_t err;
+#ifdef QUORUM_SET /* corosync/quorum.h */
+ uint32_t quorum_type;
+#endif
+
node_hash = dm_hash_create(100);
err = cpg_initialize(&cpg_handle,
@@ -285,8 +297,21 @@ static int _init_cluster(void)
return cs_to_errno(err);
}
+#ifdef QUORUM_SET
+ err = quorum_initialize(&quorum_handle,
+ &quorum_callbacks,
+ &quorum_type);
+
+ if (quorum_type != QUORUM_SET) {
+ syslog(LOG_ERR, "Corosync quorum service is not configured");
+ DEBUGLOG("Corosync quorum service is not configured");
+ return EINVAL;
+ }
+#else
err = quorum_initialize(&quorum_handle,
&quorum_callbacks);
+#endif
+
if (err != CS_OK) {
syslog(LOG_ERR, "Cannot initialise Corosync quorum service: %d",
err);
@@ -294,19 +319,18 @@ static int _init_cluster(void)
return cs_to_errno(err);
}
-
/* Create a lockspace for LV & VG locks to live in */
- lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
+ lockspace = dlm_open_lockspace(LOCKSPACE_NAME);
if (!lockspace) {
- if (errno == EEXIST) {
- lockspace = dlm_open_lockspace(LOCKSPACE_NAME);
- }
+ lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
if (!lockspace) {
- syslog(LOG_ERR, "Unable to create lockspace for CLVM: %m");
- quorum_finalize(quorum_handle);
+ syslog(LOG_ERR, "Unable to create DLM lockspace for CLVM: %m");
return -1;
}
- }
+ DEBUGLOG("Created DLM lockspace for CLVMD.\n");
+ } else
+ DEBUGLOG("Opened existing DLM lockspace for CLVMD.\n");
+
dlm_ls_pthread_init(lockspace);
DEBUGLOG("DLM initialisation complete\n");
@@ -552,6 +576,7 @@ static int _cluster_send_message(const void *buf, int msglen, const char *csid,
return cs_to_errno(err);
}
+#ifdef HAVE_COROSYNC_CONFDB_H
/*
* We are not necessarily connected to a Red Hat Cluster system,
* but if we are, this returns the cluster name from cluster.conf.
@@ -598,7 +623,40 @@ out:
return 0;
}
+#elif defined HAVE_COROSYNC_CMAP_H
+
+static int _get_cluster_name(char *buf, int buflen)
+{
+ cmap_handle_t cmap_handle = 0;
+ int result;
+ char *name = NULL;
+
+ /* This is a default in case everything else fails */
+ strncpy(buf, "Corosync", buflen);
+
+ /* Look for a cluster name in cmap */
+ result = cmap_initialize(&cmap_handle);
+ if (result != CS_OK)
+ return 0;
+
+ result = cmap_get_string(cmap_handle, "totem.cluster_name", &name);
+ if (result != CS_OK)
+ goto out;
+
+ memset(buf, 0, buflen);
+ strncpy(buf, name, buflen - 1);
+
+out:
+ if (name)
+ free(name);
+ cmap_finalize(cmap_handle);
+ return 0;
+}
+
+#endif
+
static struct cluster_ops _cluster_corosync_ops = {
+ .name = "corosync",
.cluster_init_completed = NULL,
.cluster_send_message = _cluster_send_message,
.name_from_csid = _name_from_csid,
diff --git a/daemons/clvmd/clvmd-gulm.c b/daemons/clvmd/clvmd-gulm.c
deleted file mode 100644
index 3561004..0000000
--- a/daemons/clvmd/clvmd-gulm.c
+++ /dev/null
@@ -1,1010 +0,0 @@
-/*
- * Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
- *
- * This file is part of LVM2.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU Lesser General Public License v.2.1.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/*
- * This provides the interface between clvmd and gulm as the cluster
- * and lock manager.
- *
- * It also provides the "liblm" functions too as it's hard (and pointless)
- * to seperate them out when using gulm.
- *
- * What it does /not/ provide is the communications between clvmd daemons
- * on the cluster nodes. That is done in tcp-comms.c
- */
-
-#include "clvmd-common.h"
-
-#include <pthread.h>
-#include <sys/utsname.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/file.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <stdint.h>
-#include <signal.h>
-#include <fcntl.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <utmpx.h>
-#include <syslog.h>
-#include <assert.h>
-#include <ccs.h>
-#include <libgulm.h>
-
-#include "locking.h"
-#include "clvm.h"
-#include "clvmd-comms.h"
-#include "lvm-functions.h"
-#include "clvmd.h"
-#include "clvmd-gulm.h"
-
-/* Hash list of nodes in the cluster */
-static struct dm_hash_table *node_hash;
-
-/* hash list of outstanding lock requests */
-static struct dm_hash_table *lock_hash;
-
-/* Copy of the current quorate state */
-static uint8_t gulm_quorate = 0;
-static enum {INIT_NOTDONE, INIT_DONE, INIT_WAITQUORATE} init_state = INIT_NOTDONE;
-
-/* Number of active nodes */
-static int num_nodes;
-
-static char *cluster_name;
-static int in_shutdown = 0;
-
-static pthread_mutex_t lock_start_mutex;
-static volatile int lock_start_flag;
-
-struct node_info
-{
- enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state;
- char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN];
-};
-
-struct lock_wait
-{
- pthread_cond_t cond;
- pthread_mutex_t mutex;
- int status;
-};
-
-/* Forward */
-static int read_from_core_sock(struct local_client *client, char *buf, int len, const char *csid,
- struct local_client **new_client);
-static int read_from_lock_sock(struct local_client *client, char *buf, int len, const char *csid,
- struct local_client **new_client);
-static int get_all_cluster_nodes(void);
-static int _csid_from_name(char *csid, const char *name);
-static void _cluster_closedown(void);
-
-/* In tcp-comms.c */
-extern struct dm_hash_table *sock_hash;
-
-static int add_internal_client(int fd, fd_callback_t callback)
-{
- struct local_client *client;
-
- DEBUGLOG("Add_internal_client, fd = %d\n", fd);
-
- /* Add a GULM file descriptor it to the main loop */
- client = malloc(sizeof(struct local_client));
- if (!client)
- {
- DEBUGLOG("malloc failed\n");
- return -1;
- }
-
- memset(client, 0, sizeof(struct local_client));
- client->fd = fd;
- client->type = CLUSTER_INTERNAL;
- client->callback = callback;
- add_client(client);
-
- /* Set Close-on-exec */
- fcntl(fd, F_SETFD, 1);
-
- return 0;
-}
-
-/* Gulm library handle */
-static gulm_interface_p gulm_if;
-static lg_core_callbacks_t core_callbacks;
-static lg_lockspace_callbacks_t lock_callbacks;
-
-static void badsig_handler(int sig)
-{
- DEBUGLOG("got sig %d\n", sig);
- _cluster_closedown();
- exit(0);
-}
-
-static void _reread_config(void)
-{
- /* Re-read CCS node list */
- DEBUGLOG("Re-reading CCS config\n");
- get_all_cluster_nodes();
-}
-
-static int _init_cluster(void)
-{
- int status;
- int ccs_h;
- int port = 0;
- char *portstr;
-
- /* Get cluster name from CCS */
- ccs_h = ccs_force_connect(NULL, 0);
- if (ccs_h < 0)
- {
- syslog(LOG_ERR, "Cannot login in to CCSD server\n");
- return -1;
- }
-
- ccs_get(ccs_h, "//cluster/@name", &cluster_name);
- DEBUGLOG("got cluster name %s\n", cluster_name);
-
- if (!ccs_get(ccs_h, "//cluster/clvm/@port", &portstr))
- {
- port = atoi(portstr);
- free(portstr);
- DEBUGLOG("got port number %d\n", port);
-
- if (port <= 0 && port >= 65536)
- port = 0;
- }
-
- ccs_disconnect(ccs_h);
-
- /* Block locking until we are logged in */
- pthread_mutex_init(&lock_start_mutex, NULL);
- pthread_mutex_lock(&lock_start_mutex);
- lock_start_flag = 1;
-
- node_hash = dm_hash_create(100);
- lock_hash = dm_hash_create(10);
-
- /* Get all nodes from CCS */
- if (get_all_cluster_nodes())
- return -1;
-
- /* Initialise GULM library */
- status = lg_initialize(&gulm_if, cluster_name, "clvmd");
- if (status)
- {
- DEBUGLOG("lg_initialize failed: %d\n", status);
- return status;
- }
-
- /* Connect to core - we are not "important" :-) */
- status = lg_core_login(gulm_if, 0);
- if (status)
- {
- DEBUGLOG("lg_core_login failed: %d\n", status);
- return status;
- }
-
- /* Initialise the inter-node comms */
- status = init_comms(port);
- if (status)
- return status;
-
- /* Add core FD to the list */
- status = add_internal_client(lg_core_selector(gulm_if), read_from_core_sock);
- if (status)
- {
- DEBUGLOG("can't allocate client space\n");
- return status;
- }
-
- /* Connect to the lock server */
- if (lg_lock_login(gulm_if, "CLVM"))
- {
- syslog(LOG_ERR, "Cannot login in to LOCK server\n");
- DEBUGLOG("Cannot login in to LOCK server\n");
- exit(88);
- }
-
- /* Add lockspace FD to the list */
- status = add_internal_client(lg_lock_selector(gulm_if), read_from_lock_sock);
- if (status)
- {
- DEBUGLOG("can't allocate client space\n");
- exit(status);
- }
-
- /* Request a list of nodes, we can't really do anything until
- this comes back */
- status = lg_core_nodelist(gulm_if);
- if (status)
- {
- DEBUGLOG("lg_core_nodelist failed: %d\n", status);
- return status;
- }
-
- /* So I can kill it without taking GULM down too */
- signal(SIGINT, badsig_handler);
- signal(SIGTERM, badsig_handler);
-
- return 0;
-}
-
-static void _cluster_closedown(void)
-{
- DEBUGLOG("cluster_closedown\n");
- in_shutdown = 1;
- destroy_lvhash();
- lg_lock_logout(gulm_if);
- lg_core_logout(gulm_if);
- lg_release(gulm_if);
-}
-
-/* Expire locks for a named node, or us */
-#define GIO_KEY_SIZE 46
-static void drop_expired_locks(char *nodename)
-{
- struct utsname nodeinfo;
- uint8_t mask[GIO_KEY_SIZE];
-
- DEBUGLOG("Dropping expired locks for %s\n", nodename?nodename:"(null)");
- memset(mask, 0xff, GIO_KEY_SIZE);
-
- if (!nodename)
- {
- uname(&nodeinfo);
- nodename = nodeinfo.nodename;
- }
-
- if (lg_lock_drop_exp(gulm_if, nodename, mask, GIO_KEY_SIZE))
- {
- DEBUGLOG("Error calling lg_lock_drop_exp()\n");
- }
-}
-
-
-static int read_from_core_sock(struct local_client *client, char *buf, int len, const char *csid,
- struct local_client **new_client)
-{
- int status;
-
- *new_client = NULL;
- status = lg_core_handle_messages(gulm_if, &core_callbacks, NULL);
- return status<0 ? status : 1;
-}
-
-static int read_from_lock_sock(struct local_client *client, char *buf, int len, const char *csid,
- struct local_client **new_client)
-{
- int status;
-
- *new_client = NULL;
- status = lg_lock_handle_messages(gulm_if, &lock_callbacks, NULL);
- return status<0 ? status : 1;
-}
-
-
-/* CORE callback routines */
-static int core_login_reply(void *misc, uint64_t gen, uint32_t error, uint32_t rank, uint8_t corestate)
-{
- DEBUGLOG("CORE Got a Login reply. gen:%lld err:%d rank:%d corestate:%d\n",
- gen, error, rank, corestate);
-
- if (error)
- exit(error);
-
- /* Get the current core state (for quorum) */
- lg_core_corestate(gulm_if);
-
- return 0;
-}
-
-static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate)
-{
- if (nodestate == lg_core_Logged_in)
- {
- /* Don't clobber NODE_CLVMD state */
- if (ninfo->state != NODE_CLVMD)
- {
- if (ninfo->state == NODE_UNKNOWN ||
- ninfo->state == NODE_DOWN)
- num_nodes++;
-
- ninfo->state = NODE_UP;
- }
- }
- else
- {
- if (nodestate == lg_core_Expired ||
- nodestate == lg_core_Fenced ||
- nodestate == lg_core_Logged_out)
- {
- if (ninfo->state != NODE_DOWN)
- num_nodes--;
- ninfo->state = NODE_DOWN;
- }
- }
- /* Gulm doesn't always send node DOWN events, so even if this a a node UP we must
- * assume (ahem) that it prevously went down at some time. So we close
- * the sockets here to make sure that we don't have any dead connections
- * to that node.
- */
- tcp_remove_client(csid);
-
- DEBUGLOG("set_node_state, '%s' state = %d num_nodes=%d\n",
- ninfo->name, ninfo->state, num_nodes);
-}
-
-static struct node_info *add_or_set_node(char *name, struct in6_addr *ip, uint8_t state)
-{
- struct node_info *ninfo;
-
- ninfo = dm_hash_lookup_binary(node_hash, (char *)ip, GULM_MAX_CSID_LEN);
- if (!ninfo)
- {
- /* If we can't find that node then re-read the config file in case it
- was added after we were started */
- DEBUGLOG("Node %s not found, re-reading config file\n", name);
- get_all_cluster_nodes();
-
- /* Now try again */
- ninfo = dm_hash_lookup_binary(node_hash, (char *)ip, GULM_MAX_CSID_LEN);
- if (!ninfo)
- {
- DEBUGLOG("Ignoring node %s, not part of the SAN cluster\n", name);
- return NULL;
- }
- }
-
- set_node_state(ninfo, (char *)ip, state);
-
- return ninfo;
-}
-
-static void _get_our_csid(char *csid)
-{
- get_our_gulm_csid(csid);
-}
-
-static int core_nodelist(void *misc, lglcb_t type, char *name, struct in6_addr *ip, uint8_t state)
-{
- DEBUGLOG("CORE nodelist\n");
-
- if (type == lglcb_start)
- {
- DEBUGLOG("Got Nodelist, start\n");
- }
- else
- {
- if (type == lglcb_item)
- {
- DEBUGLOG("Got nodelist, item: %s, %#x\n", name, state);
-
- add_or_set_node(name, ip, state);
- }
- else
- {
- if (type == lglcb_stop)
- {
- char ourcsid[GULM_MAX_CSID_LEN];
-
- DEBUGLOG("Got Nodelist, stop\n");
- if (gulm_quorate)
- {
- clvmd_cluster_init_completed();
- init_state = INIT_DONE;
- }
- else
- {
- if (init_state == INIT_NOTDONE)
- init_state = INIT_WAITQUORATE;
- }
-
- /* Mark ourself as up */
- _get_our_csid(ourcsid);
- gulm_add_up_node(ourcsid);
- }
- else
- {
- DEBUGLOG("Unknown lglcb_t %#x\n", type);
- }
- }
- }
-
- return 0;
-}
-
-static int core_statechange(void *misc, uint8_t corestate, uint8_t quorate, struct in6_addr *masterip, char *mastername)
-{
- DEBUGLOG("CORE Got statechange. quorate:%d, corestate:%x mastername:%s\n",
- quorate, corestate, mastername);
-
- gulm_quorate = quorate;
- if (quorate && init_state == INIT_WAITQUORATE)
- {
- clvmd_cluster_init_completed();
- init_state = INIT_DONE;
- }
- return 0;
-}
-
-static int core_nodechange(void *misc, char *nodename, struct in6_addr *nodeip, uint8_t nodestate)
-{
- struct node_info *ninfo;
-
- DEBUGLOG("CORE node change, name=%s, state = %d\n", nodename, nodestate);
-
- /* If we don't get nodeip here, try a lookup by name */
- if (!nodeip)
- _csid_from_name((char *)nodeip, nodename);
- if (!nodeip)
- return 0;
-
- ninfo = add_or_set_node(nodename, nodeip, nodestate);
- if (!ninfo)
- return 0;
-
- /* Check if we need to drop any expired locks */
- if (ninfo->state == NODE_DOWN)
- {
- drop_expired_locks(nodename);
- }
-
- return 0;
-}
-static int core_error(void *misc, uint32_t err)
-{
- DEBUGLOG("CORE error: %d\n", err);
- // Not sure what happens here
- return 0;
-}
-
-/* LOCK callback routines */
-static int lock_login_reply(void *misc, uint32_t error, uint8_t which)
-{
- DEBUGLOG("LOCK Got a Login reply. err:%d which:%d\n",
- error, which);
-
- if (error)
- exit(error);
-
- /* Drop any expired locks for us that might be hanging around */
- drop_expired_locks(NULL);
-
- /* Enable locking operations in other threads */
- if (lock_start_flag)
- {
- lock_start_flag = 0;
- pthread_mutex_unlock(&lock_start_mutex);
- }
-
- return 0;
-}
-
-static int lock_lock_state(void *misc, uint8_t *key, uint16_t keylen,
- uint64_t subid, uint64_t start, uint64_t stop,
- uint8_t state, uint32_t flags, uint32_t error,
- uint8_t *LVB, uint16_t LVBlen)
-{
- struct lock_wait *lwait;
-
- DEBUGLOG("LOCK lock state: %s, error = %d\n", key, error);
-
- /* No waiting process to wake up when we are shutting down */
- if (in_shutdown)
- return 0;
-
- lwait = dm_hash_lookup(lock_hash, key);
- if (!lwait)
- {
- DEBUGLOG("Can't find hash entry for resource %s\n", key);
- return 0;
- }
- lwait->status = error;
- pthread_mutex_lock(&lwait->mutex);
- pthread_cond_signal(&lwait->cond);
- pthread_mutex_unlock(&lwait->mutex);
-
- return 0;
-}
-static int lock_error(void *misc, uint32_t err)
-{
- DEBUGLOG("LOCK error: %d\n", err);
- // Not sure what happens here
- return 0;
-}
-
-
-/* CORE callbacks */
-static lg_core_callbacks_t core_callbacks = {
- .login_reply = core_login_reply,
- .nodelist = core_nodelist,
- .statechange = core_statechange,
- .nodechange = core_nodechange,
- .error = core_error,
-};
-
-/* LOCK callbacks */
-static lg_lockspace_callbacks_t lock_callbacks = {
- .login_reply = lock_login_reply,
- .lock_state = lock_lock_state,
- .error = lock_error,
-};
-
-/* Allow tcp-comms to loop round the list of active nodes */
-int get_next_node_csid(void **context, char *csid)
-{
- struct node_info *ninfo = NULL;
-
- /* First node */
- if (!*context)
- {
- *context = dm_hash_get_first(node_hash);
- }
- else
- {
- *context = dm_hash_get_next(node_hash, *context);
- }
- if (*context)
- ninfo = dm_hash_get_data(node_hash, *context);
-
- /* Find a node that is UP */
- while (*context && ninfo->state == NODE_DOWN)
- {
- *context = dm_hash_get_next(node_hash, *context);
- if (*context)
- {
- ninfo = dm_hash_get_data(node_hash, *context);
- }
- }
-
- if (!*context || ninfo->state == NODE_DOWN)
- {
- return 0;
- }
-
- memcpy(csid, dm_hash_get_key(node_hash, *context), GULM_MAX_CSID_LEN);
- return 1;
-}
-
-int gulm_name_from_csid(const char *csid, char *name)
-{
- struct node_info *ninfo;
-
- ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN);
- if (!ninfo)
- {
- sprintf(name, "UNKNOWN %s", print_csid(csid));
- return -1;
- }
-
- strcpy(name, ninfo->name);
- return 0;
-}
-
-
-static int _csid_from_name(char *csid, const char *name)
-{
- struct dm_hash_node *hn;
- struct node_info *ninfo;
-
- dm_hash_iterate(hn, node_hash)
- {
- ninfo = dm_hash_get_data(node_hash, hn);
- if (strcmp(ninfo->name, name) == 0)
- {
- memcpy(csid, dm_hash_get_key(node_hash, hn), GULM_MAX_CSID_LEN);
- return 0;
- }
- }
- return -1;
-}
-
-static int _get_num_nodes()
-{
- DEBUGLOG("num_nodes = %d\n", num_nodes);
- return num_nodes;
-}
-
-/* Node is now known to be running a clvmd */
-void gulm_add_up_node(const char *csid)
-{
- struct node_info *ninfo;
-
- ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN);
- if (!ninfo) {
- DEBUGLOG("gulm_add_up_node no node_hash entry for csid %s\n", print_csid(csid));
- return;
- }
-
- DEBUGLOG("gulm_add_up_node %s\n", ninfo->name);
-
- if (ninfo->state == NODE_DOWN)
- num_nodes++;
- ninfo->state = NODE_CLVMD;
-
- return;
-
-}
-/* Node is now known to be NOT running a clvmd */
-void add_down_node(char *csid)
-{
- struct node_info *ninfo;
-
- ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN);
- if (!ninfo)
- return;
-
- /* Only set it to UP if it was previously known to be
- running clvmd - gulm may set it DOWN quite soon */
- if (ninfo->state == NODE_CLVMD)
- ninfo->state = NODE_UP;
- drop_expired_locks(ninfo->name);
- return;
-
-}
-
-/* Call a callback for each node, so the caller knows whether it's up or down */
-static int _cluster_do_node_callback(struct local_client *master_client,
- void (*callback)(struct local_client *, const char *csid, int node_up))
-{
- struct dm_hash_node *hn;
- struct node_info *ninfo;
- int somedown = 0;
-
- dm_hash_iterate(hn, node_hash)
- {
- char csid[GULM_MAX_CSID_LEN];
- struct local_client *client;
-
- ninfo = dm_hash_get_data(node_hash, hn);
- memcpy(csid, dm_hash_get_key(node_hash, hn), GULM_MAX_CSID_LEN);
-
- DEBUGLOG("down_callback. node %s, state = %d\n", ninfo->name, ninfo->state);
-
- client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
- if (!client)
- {
- /* If it's up but not connected, try to make contact */
- if (ninfo->state == NODE_UP)
- gulm_connect_csid(csid, &client);
-
- client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
-
- }
- DEBUGLOG("down_callback2. node %s, state = %d\n", ninfo->name, ninfo->state);
- if (ninfo->state != NODE_DOWN)
- callback(master_client, csid, ninfo->state == NODE_CLVMD);
-
- if (ninfo->state != NODE_CLVMD)
- somedown = -1;
- }
- return somedown;
-}
-
-/* Convert gulm error codes to unix errno numbers */
-static int gulm_to_errno(int gulm_ret)
-{
- switch (gulm_ret)
- {
- case lg_err_TryFailed:
- case lg_err_AlreadyPend:
- errno = EAGAIN;
- break;
-
- /* More?? */
- default:
- errno = EINVAL;
- }
-
- return gulm_ret ? -1 : 0;
-}
-
-/* Real locking */
-static int _lock_resource(char *resource, int mode, int flags, int *lockid)
-{
- int status;
- struct lock_wait lwait;
-
- /* Wait until the lock module is ready */
- if (lock_start_flag)
- {
- pthread_mutex_lock(&lock_start_mutex);
- pthread_mutex_unlock(&lock_start_mutex);
- }
-
- pthread_cond_init(&lwait.cond, NULL);
- pthread_mutex_init(&lwait.mutex, NULL);
- pthread_mutex_lock(&lwait.mutex);
-
- /* This needs to be converted from DLM/LVM2 value for GULM */
- if (flags & LCKF_NOQUEUE) flags = lg_lock_flag_Try;
-
- dm_hash_insert(lock_hash, resource, &lwait);
- DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource, flags, mode);
-
- status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1,
- 0, 0, 0,
- mode, flags, NULL, 0);
- if (status)
- {
- DEBUGLOG("lg_lock_state returned %d\n", status);
- return status;
- }
-
- /* Wait for it to complete */
- pthread_cond_wait(&lwait.cond, &lwait.mutex);
- pthread_mutex_unlock(&lwait.mutex);
-
- dm_hash_remove(lock_hash, resource);
- DEBUGLOG("lock-resource returning %d\n", lwait.status);
-
- return gulm_to_errno(lwait.status);
-}
-
-
-static int _unlock_resource(char *resource, int lockid)
-{
- int status;
- struct lock_wait lwait;
-
- pthread_cond_init(&lwait.cond, NULL);
- pthread_mutex_init(&lwait.mutex, NULL);
- pthread_mutex_lock(&lwait.mutex);
-
- dm_hash_insert(lock_hash, resource, &lwait);
-
- DEBUGLOG("unlock_resource %s\n", resource);
- status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1,
- 0, 0, 0,
- lg_lock_state_Unlock, 0, NULL, 0);
-
- if (status)
- {
- DEBUGLOG("lg_lock_state(unlock) returned %d\n", status);
- return status;
- }
-
- /* When we are shutting down, don't wait for unlocks
- to be acknowledged, just do it. */
- if (in_shutdown)
- return status;
-
- /* Wait for it to complete */
-
- pthread_cond_wait(&lwait.cond, &lwait.mutex);
- pthread_mutex_unlock(&lwait.mutex);
-
- dm_hash_remove(lock_hash, resource);
-
- return gulm_to_errno(lwait.status);
-}
-
-
-/* These two locking functions MUST be called in a seperate thread from
- the clvmd main loop because they expect to be woken up by it.
-
- These are abstractions around the real locking functions (above)
- as we need to emulate the DLM's EX/PW/CW interaction with GULM using
- two locks.
- To aid unlocking, we store the lock mode in the lockid (as GULM
- doesn't use this).
-*/
-static int _sync_lock(const char *resource, int mode, int flags, int *lockid)
-{
- int status;
- char lock1[strlen(resource)+3];
- char lock2[strlen(resource)+3];
-
- snprintf(lock1, sizeof(lock1), "%s-1", resource);
- snprintf(lock2, sizeof(lock2), "%s-2", resource);
-
- switch (mode)
- {
- case LCK_EXCL:
- status = _lock_resource(lock1, lg_lock_state_Exclusive, flags, lockid);
- if (status)
- goto out;
-
- /* If we can't get this lock too then bail out */
- status = _lock_resource(lock2, lg_lock_state_Exclusive, LCK_NONBLOCK, lockid);
- if (status == lg_err_TryFailed)
- {
- _unlock_resource(lock1, *lockid);
- status = -1;
- errno = EAGAIN;
- }
- break;
-
- case LCK_PREAD:
- case LCK_READ:
- status = _lock_resource(lock1, lg_lock_state_Shared, flags, lockid);
- if (status)
- goto out;
- status = _unlock_resource(lock2, *lockid);
- break;
-
- case LCK_WRITE:
- status = _lock_resource(lock2, lg_lock_state_Exclusive, flags, lockid);
- if (status)
- goto out;
- status = _unlock_resource(lock1, *lockid);
- break;
-
- default:
- status = -1;
- errno = EINVAL;
- break;
- }
- out:
- *lockid = mode;
- return status;
-}
-
-static int _sync_unlock(const char *resource, int lockid)
-{
- int status = 0;
- char lock1[strlen(resource)+3];
- char lock2[strlen(resource)+3];
-
- snprintf(lock1, sizeof(lock1), "%s-1", resource);
- snprintf(lock2, sizeof(lock2), "%s-2", resource);
-
- /* The held lock mode is in the lock id */
- assert(lockid == LCK_EXCL ||
- lockid == LCK_READ ||
- lockid == LCK_PREAD ||
- lockid == LCK_WRITE);
-
- status = _unlock_resource(lock1, lockid);
- if (!status)
- status = _unlock_resource(lock2, lockid);
-
- return status;
-}
-
-static int _is_quorate()
-{
- return gulm_quorate;
-}
-
-/* Get all the cluster node names & IPs from CCS and
- add them to our node list so we know who to talk to.
- Called when we start up and if we get sent SIGHUP.
-*/
-static int get_all_cluster_nodes()
-{
- int ctree;
- char *nodename;
- int error;
- int i;
-
- /* Open the config file */
- ctree = ccs_force_connect(NULL, 1);
- if (ctree < 0)
- {
- log_error("Error connecting to CCS");
- return -1;
- }
-
- for (i=1;;i++)
- {
- char nodekey[256];
- char nodeip[GULM_MAX_CSID_LEN];
- int clvmflag = 1;
- char *clvmflagstr;
- char key[256];
-
- sprintf(nodekey, "//cluster/clusternodes/clusternode[%d]/@name", i);
- error = ccs_get(ctree, nodekey, &nodename);
- if (error)
- break;
-
- sprintf(key, "//cluster/clusternodes/clusternode[@name=\"%s\"]/clvm", nodename);
- if (!ccs_get(ctree, key, &clvmflagstr))
- {
- clvmflag = atoi(clvmflagstr);
- free(clvmflagstr);
- }
-
- DEBUGLOG("Got node %s from ccs(clvmflag = %d)\n", nodename, clvmflag);
- if ((get_ip_address(nodename, nodeip) == 0) && clvmflag)
- {
- struct node_info *ninfo;
-
- /* If it's not in the list, then add it */
- ninfo = dm_hash_lookup_binary(node_hash, nodeip, GULM_MAX_CSID_LEN);
- if (!ninfo)
- {
- ninfo = malloc(sizeof(struct node_info));
- if (!ninfo)
- {
- syslog(LOG_ERR, "Cannot alloc memory for node info\n");
- ccs_disconnect(ctree);
- return -1;
- }
- strcpy(ninfo->name, nodename);
-
- ninfo->state = NODE_DOWN;
- dm_hash_insert_binary(node_hash, nodeip, GULM_MAX_CSID_LEN, ninfo);
- }
- }
- else
- {
- if (!clvmflag) {
- DEBUGLOG("node %s has clvm disabled\n", nodename);
- }
- else {
- DEBUGLOG("Cannot resolve host name %s\n", nodename);
- log_error("Cannot resolve host name %s\n", nodename);
- }
- }
- free(nodename);
- }
-
- /* Finished with config file */
- ccs_disconnect(ctree);
-
- return 0;
-}
-
-static int _get_main_cluster_fd(void)
-{
- return get_main_gulm_cluster_fd();
-}
-
-static int _cluster_fd_callback(struct local_client *fd, char *buf, int len, const char *csid, struct local_client **new_client)
-{
- return cluster_fd_gulm_callback(fd, buf, len, csid, new_client);
-}
-
-static int _cluster_send_message(const void *buf, int msglen, const char *csid, const char *errtext)
-{
- return gulm_cluster_send_message((char *)buf, msglen, csid, errtext);
-}
-
-static int _get_cluster_name(char *buf, int buflen)
-{
- strncpy(buf, cluster_name, buflen);
- return 0;
-}
-
-static struct cluster_ops _cluster_gulm_ops = {
- .cluster_init_completed = NULL,
- .cluster_send_message = _cluster_send_message,
- .name_from_csid = gulm_name_from_csid,
- .csid_from_name = _csid_from_name,
- .get_num_nodes = _get_num_nodes,
- .cluster_fd_callback = _cluster_fd_callback,
- .get_main_cluster_fd = _get_main_cluster_fd,
- .cluster_do_node_callback = _cluster_do_node_callback,
- .is_quorate = _is_quorate,
- .get_our_csid = _get_our_csid,
- .add_up_node = gulm_add_up_node,
- .reread_config = _reread_config,
- .cluster_closedown = _cluster_closedown,
- .get_cluster_name = _get_cluster_name,
- .sync_lock = _sync_lock,
- .sync_unlock = _sync_unlock,
-};
-
-struct cluster_ops *init_gulm_cluster(void)
-{
- if (!_init_cluster())
- return &_cluster_gulm_ops;
- else
- return NULL;
-}
diff --git a/daemons/clvmd/clvmd-gulm.h b/daemons/clvmd/clvmd-gulm.h
deleted file mode 100644
index 9416f5c..0000000
--- a/daemons/clvmd/clvmd-gulm.h
+++ /dev/null
@@ -1,9 +0,0 @@
-extern int get_next_node_csid(void **context, char *csid);
-extern void add_down_node(char *csid);
-extern int gulm_fd(void);
-extern int get_ip_address(const char *node, char *addr);
-extern void tcp_remove_client(const char *csid);
-extern int alloc_client(int fd, const char *csid, struct local_client **new_client);
-
-void gulm_add_up_node(const char *csid);
-int gulm_name_from_csid(const char *csid, char *name);
diff --git a/daemons/clvmd/clvmd-openais.c b/daemons/clvmd/clvmd-openais.c
index fc98b50..9ce73d6 100644
--- a/daemons/clvmd/clvmd-openais.c
+++ b/daemons/clvmd/clvmd-openais.c
@@ -197,14 +197,13 @@ static int add_internal_client(int fd, fd_callback_t callback)
DEBUGLOG("Add_internal_client, fd = %d\n", fd);
- client = malloc(sizeof(struct local_client));
+ client = calloc(1, sizeof(struct local_client));
if (!client)
{
DEBUGLOG("malloc failed\n");
return -1;
}
- memset(client, 0, sizeof(struct local_client));
client->fd = fd;
client->type = CLUSTER_INTERNAL;
client->callback = callback;
@@ -227,7 +226,7 @@ static void openais_cpg_deliver_callback (cpg_handle_t handle,
memcpy(&target_nodeid, msg, OPENAIS_CSID_LEN);
- DEBUGLOG("%u got message from nodeid %d for %d. len %d\n",
+ DEBUGLOG("%u got message from nodeid %d for %d. len %" PRIsize_t "\n",
our_nodeid, nodeid, target_nodeid, msg_len-4);
if (nodeid != our_nodeid)
@@ -245,7 +244,8 @@ static void openais_cpg_confchg_callback(cpg_handle_t handle,
int i;
struct node_info *ninfo;
- DEBUGLOG("confchg callback. %d joined, %d left, %d members\n",
+ DEBUGLOG("confchg callback. %" PRIsize_t " joined, "
+ "%" PRIsize_t " left, %" PRIsize_t " members\n",
joined_list_entries, left_list_entries, member_list_entries);
for (i=0; i<joined_list_entries; i++) {
@@ -505,11 +505,11 @@ static int _lock_resource(char *resource, int mode, int flags, int *lockid)
saLckResourceClose(res_handle);
return ais_to_errno(err);
}
-
+
/* Wait for it to complete */
- DEBUGLOG("lock_resource returning %d, lock_id=%llx\n", err,
- lock_id);
+ DEBUGLOG("lock_resource returning %d, lock_id=%" PRIx64 "\n",
+ err, lock_id);
linfo->lock_id = lock_id;
linfo->res_handle = res_handle;
@@ -530,7 +530,7 @@ static int _unlock_resource(char *resource, int lockid)
if (!linfo)
return 0;
- DEBUGLOG("unlock_resource: lockid: %llx\n", linfo->lock_id);
+ DEBUGLOG("unlock_resource: lockid: %" PRIx64 "\n", linfo->lock_id);
err = saLckResourceUnlock(linfo->lock_id, SA_TIME_END);
if (err != SA_AIS_OK)
{
@@ -666,6 +666,7 @@ static int _get_cluster_name(char *buf, int buflen)
}
static struct cluster_ops _cluster_openais_ops = {
+ .name = "openais",
.cluster_init_completed = NULL,
.cluster_send_message = _cluster_send_message,
.name_from_csid = _name_from_csid,
diff --git a/daemons/clvmd/clvmd-singlenode.c b/daemons/clvmd/clvmd-singlenode.c
index 335cb98..3b35bf5 100644
--- a/daemons/clvmd/clvmd-singlenode.c
+++ b/daemons/clvmd/clvmd-singlenode.c
@@ -29,6 +29,15 @@
static const char SINGLENODE_CLVMD_SOCKNAME[] = DEFAULT_RUN_DIR "/clvmd_singlenode.sock";
static int listen_fd = -1;
+static struct dm_hash_table *_locks;
+static int _lockid;
+
+struct lock {
+ int lockid;
+ int mode;
+ int excl;
+};
+
static void close_comms(void)
{
if (listen_fd != -1 && close(listen_fd))
@@ -39,8 +48,15 @@ static void close_comms(void)
static int init_comms(void)
{
- struct sockaddr_un addr;
mode_t old_mask;
+ struct sockaddr_un addr = { .sun_family = AF_UNIX };
+
+ if (!dm_strncpy(addr.sun_path, SINGLENODE_CLVMD_SOCKNAME,
+ sizeof(addr.sun_path))) {
+ DEBUGLOG("%s: singlenode socket name too long.",
+ SINGLENODE_CLVMD_SOCKNAME);
+ return -1;
+ }
close_comms();
@@ -53,12 +69,10 @@ static int init_comms(void)
goto error;
}
/* Set Close-on-exec */
- fcntl(listen_fd, F_SETFD, 1);
-
- memset(&addr, 0, sizeof(addr));
- memcpy(addr.sun_path, SINGLENODE_CLVMD_SOCKNAME,
- sizeof(SINGLENODE_CLVMD_SOCKNAME));
- addr.sun_family = AF_UNIX;
+ if (fcntl(listen_fd, F_SETFD, 1)) {
+ DEBUGLOG("Setting CLOEXEC on client fd failed: %s\n", strerror(errno));
+ goto error;
+ }
if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
DEBUGLOG("Can't bind local socket: %s\n", strerror(errno));
@@ -83,9 +97,16 @@ static int _init_cluster(void)
{
int r;
+ if (!(_locks = dm_hash_create(128))) {
+ DEBUGLOG("Failed to allocate single-node hash table.\n");
+ return 1;
+ }
+
r = init_comms();
- if (r)
+ if (r) {
+ dm_hash_destroy(_locks);
return r;
+ }
DEBUGLOG("Single-node cluster initialised.\n");
return 0;
@@ -97,6 +118,9 @@ static void _cluster_closedown(void)
DEBUGLOG("cluster_closedown\n");
destroy_lvhash();
+ dm_hash_destroy(_locks);
+ _locks = NULL;
+ _lockid = 0;
}
static void _get_our_csid(char *csid)
@@ -136,95 +160,101 @@ static int _cluster_do_node_callback(struct local_client *master_client,
int _lock_file(const char *file, uint32_t flags);
-static int *_locks = NULL;
-static char **_resources = NULL;
-static int _lock_max = 1;
static pthread_mutex_t _lock_mutex = PTHREAD_MUTEX_INITIALIZER;
+/* Using one common condition for all locks for simplicity */
+static pthread_cond_t _lock_cond = PTHREAD_COND_INITIALIZER;
/* Real locking */
static int _lock_resource(const char *resource, int mode, int flags, int *lockid)
{
- int *_locks_1;
- char **_resources_1;
- int i, j;
+ struct lock *lck;
- DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n",
+ DEBUGLOG("Locking resource %s, flags=%d, mode=%d\n",
resource, flags, mode);
- retry:
+ mode &= LCK_TYPE_MASK;
pthread_mutex_lock(&_lock_mutex);
-
- /* look for an existing lock for this resource */
- for (i = 1; i < _lock_max; ++i) {
- if (!_resources[i])
- break;
- if (!strcmp(_resources[i], resource)) {
- if ((_locks[i] & LCK_TYPE_MASK) == LCK_WRITE ||
- (_locks[i] & LCK_TYPE_MASK) == LCK_EXCL) {
- DEBUGLOG("%s already write/exclusively locked...\n", resource);
- goto maybe_retry;
- }
- if ((mode & LCK_TYPE_MASK) == LCK_WRITE ||
- (mode & LCK_TYPE_MASK) == LCK_EXCL) {
- DEBUGLOG("%s already locked and WRITE/EXCL lock requested...\n",
- resource);
- goto maybe_retry;
- }
- }
+retry:
+ if (!(lck = dm_hash_lookup(_locks, resource))) {
+ /* Add new locked resource */
+ if (!(lck = dm_zalloc(sizeof(struct lock))) ||
+ !dm_hash_insert(_locks, resource, lck))
+ goto bad;
+
+ lck->lockid = ++_lockid;
+ goto out;
}
- if (i == _lock_max) { /* out of lock slots, extend */
- _locks_1 = dm_realloc(_locks, 2 * _lock_max * sizeof(int));
- if (!_locks_1)
- return 1; /* fail */
- _locks = _locks_1;
- _resources_1 = dm_realloc(_resources, 2 * _lock_max * sizeof(char *));
- if (!_resources_1) {
- /* _locks may get realloc'd twice, but that should be safe */
- return 1; /* fail */
- }
- _resources = _resources_1;
- /* clear the new resource entries */
- for (j = _lock_max; j < 2 * _lock_max; ++j)
- _resources[j] = NULL;
- _lock_max = 2 * _lock_max;
+ /* Update/convert lock */
+ if (flags == LCKF_CONVERT) {
+ if (lck->excl)
+ mode = LCK_EXCL;
+ } else if ((lck->mode == LCK_WRITE) || (lck->mode == LCK_EXCL)) {
+ DEBUGLOG("Resource %s already %s locked (%d)...\n", resource,
+ (lck->mode == LCK_WRITE) ? "write" : "exclusively", lck->lockid);
+ goto maybe_retry;
+ } else if (lck->mode > mode) {
+ DEBUGLOG("Resource %s already locked and %s lock requested...\n",
+ resource,
+ (mode == LCK_READ) ? "READ" :
+ (mode == LCK_WRITE) ? "WRITE" : "EXCLUSIVE");
+ goto maybe_retry;
}
- /* resource is not currently locked, grab it */
-
- *lockid = i;
- _locks[i] = mode;
- _resources[i] = dm_strdup(resource);
-
- DEBUGLOG("%s locked -> %d\n", resource, i);
-
+out:
+ *lockid = lck->lockid;
+ lck->mode = mode;
+ lck->excl |= (mode == LCK_EXCL);
+ DEBUGLOG("Locked resource %s, lockid=%d, mode=%d\n", resource, lck->lockid, mode);
+ pthread_cond_broadcast(&_lock_cond); /* wakeup waiters */
pthread_mutex_unlock(&_lock_mutex);
+
return 0;
- maybe_retry:
- pthread_mutex_unlock(&_lock_mutex);
+
+maybe_retry:
if (!(flags & LCK_NONBLOCK)) {
- usleep(10000);
+ pthread_cond_wait(&_lock_cond, &_lock_mutex);
+ DEBUGLOG("Resource %s RETRYING lock...\n", resource);
goto retry;
}
+bad:
+ DEBUGLOG("Failed to lock resource %s\n", resource);
+ pthread_mutex_unlock(&_lock_mutex);
return 1; /* fail */
}
static int _unlock_resource(const char *resource, int lockid)
{
- DEBUGLOG("unlock_resource: %s lockid: %x\n", resource, lockid);
- if(!_resources[lockid]) {
- DEBUGLOG("(%s) %d not locked\n", resource, lockid);
+ struct lock *lck;
+
+ if (lockid < 0) {
+ DEBUGLOG("Not tracking unlock of lockid -1: %s, lockid=%d\n",
+ resource, lockid);
+ return 0;
+ }
+
+ DEBUGLOG("Unlocking resource %s, lockid=%d\n", resource, lockid);
+ pthread_mutex_lock(&_lock_mutex);
+
+ if (!(lck = dm_hash_lookup(_locks, resource))) {
+ pthread_mutex_unlock(&_lock_mutex);
+ DEBUGLOG("Resource %s, lockid=%d is not locked.\n", resource, lockid);
return 1;
}
- if(strcmp(_resources[lockid], resource)) {
- DEBUGLOG("%d has wrong resource (requested %s, got %s)\n",
- lockid, resource, _resources[lockid]);
+
+ if (lck->lockid != lockid) {
+ pthread_mutex_unlock(&_lock_mutex);
+ DEBUGLOG("Resource %s has wrong lockid %d, expected %d.\n",
+ resource, lck->lockid, lockid);
return 1;
}
- dm_free(_resources[lockid]);
- _resources[lockid] = 0;
+ dm_hash_remove(_locks, resource);
+ dm_free(lck);
+ pthread_cond_broadcast(&_lock_cond); /* wakeup waiters */
+ pthread_mutex_unlock(&_lock_mutex);
+
return 0;
}
@@ -260,6 +290,7 @@ static int _get_cluster_name(char *buf, int buflen)
}
static struct cluster_ops _cluster_singlenode_ops = {
+ .name = "singlenode",
.cluster_init_completed = NULL,
.cluster_send_message = _cluster_send_message,
.name_from_csid = _name_from_csid,
diff --git a/daemons/clvmd/clvmd.c b/daemons/clvmd/clvmd.c
index 1a35c49..ac465d9 100644
--- a/daemons/clvmd/clvmd.c
+++ b/daemons/clvmd/clvmd.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -20,6 +20,7 @@
#include "clvmd-common.h"
#include <pthread.h>
+#include <getopt.h>
#include "clvmd-comms.h"
#include "clvm.h"
@@ -48,6 +49,7 @@
#endif
#define MAX_RETRIES 4
+#define MAX_MISSING_LEN 8000 /* Max supported clvmd message size ? */
#define ISLOCAL_CSID(c) (memcmp(c, our_csid, max_csid_len) == 0)
@@ -77,15 +79,18 @@ struct lvm_thread_cmd {
};
struct lvm_startup_params {
- int using_gulm;
- char **argv;
+ struct dm_hash_table *excl_uuid;
};
-debug_t debug;
+static debug_t debug = DEBUG_OFF;
+static int foreground_mode = 0;
static pthread_t lvm_thread;
+/* Stack size 128KiB for thread, must be bigger then DEFAULT_RESERVED_STACK */
+static const size_t STACK_SIZE = 128 * 1024;
+static pthread_attr_t stack_attr;
static pthread_mutex_t lvm_thread_mutex;
static pthread_cond_t lvm_thread_cond;
-static pthread_mutex_t lvm_start_mutex;
+static pthread_barrier_t lvm_start_barrier;
static struct dm_list lvm_cmd_head;
static volatile sig_atomic_t quit = 0;
static volatile sig_atomic_t reread_config = 0;
@@ -99,9 +104,7 @@ static int child_pipe[2];
#define DFAIL_TIMEOUT 5
#define SUCCESS 0
-typedef enum {IF_AUTO, IF_CMAN, IF_GULM, IF_OPENAIS, IF_COROSYNC, IF_SINGLENODE} if_type_t;
-
-typedef void *(lvm_pthread_fn_t)(void*);
+typedef enum {IF_AUTO, IF_CMAN, IF_OPENAIS, IF_COROSYNC, IF_SINGLENODE} if_type_t;
/* Prototypes for code further down */
static void sigusr2_handler(int sig);
@@ -132,7 +135,7 @@ static int check_all_clvmds_running(struct local_client *client);
static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
int len, const char *csid,
struct local_client **new_client);
-static void lvm_thread_fn(void *) __attribute__ ((noreturn));
+static void *lvm_thread_fn(void *) __attribute__((noreturn));
static int add_to_lvmqueue(struct local_client *client, struct clvm_header *msg,
int msglen, const char *csid);
static int distribute_command(struct local_client *thisfd);
@@ -145,12 +148,12 @@ static if_type_t get_cluster_type(void);
static void usage(const char *prog, FILE *file)
{
- fprintf(file, "Usage:\n"
- "%s [Vhd]\n\n"
+ fprintf(file, "Usage: %s [options]\n"
" -V Show version of clvmd\n"
" -h Show this help information\n"
- " -d Set debug level\n"
- " If starting clvmd then don't fork, run in the foreground\n"
+ " -d[n] Set debug logging (0:none, 1:stderr (implies -f option), 2:syslog)\n"
+ " -f Don't fork, run in the foreground\n"
+ " -E<lockuuid> Take this lock uuid as exclusively locked resource (for restart)\n"
" -R Tell all running clvmds in the cluster to reload their device cache\n"
" -S Restart clvmd, preserving exclusive locks\n"
" -C Sets debug level (from -d) on all clvmd instances clusterwide\n"
@@ -167,9 +170,6 @@ static void usage(const char *prog, FILE *file)
#ifdef USE_OPENAIS
"openais "
#endif
-#ifdef USE_GULM
- "gulm "
-#endif
#ifdef USE_SINGLENODE
"singlenode "
#endif
@@ -199,7 +199,8 @@ static void safe_close(int *fd)
if (*fd >= 0) {
int to_close = *fd;
*fd = -1;
- close(to_close);
+ if (close(to_close))
+ log_sys_error("close", ""); /* path */
}
}
@@ -209,14 +210,15 @@ void debuglog(const char *fmt, ...)
va_list ap;
static int syslog_init = 0;
- if (debug == DEBUG_STDERR) {
+ switch (clvmd_get_debug()) {
+ case DEBUG_STDERR:
va_start(ap,fmt);
time(&P);
fprintf(stderr, "CLVMD[%x]: %.15s ", (int)pthread_self(), ctime(&P)+4 );
vfprintf(stderr, fmt, ap);
va_end(ap);
- }
- if (debug == DEBUG_SYSLOG) {
+ break;
+ case DEBUG_SYSLOG:
if (!syslog_init) {
openlog("clvmd", LOG_PID, LOG_DAEMON);
syslog_init = 1;
@@ -225,9 +227,28 @@ void debuglog(const char *fmt, ...)
va_start(ap,fmt);
vsyslog(LOG_DEBUG, fmt, ap);
va_end(ap);
+ break;
+ case DEBUG_OFF:
+ break;
}
}
+void clvmd_set_debug(debug_t new_debug)
+{
+ if (!foreground_mode && new_debug == DEBUG_STDERR)
+ new_debug = DEBUG_SYSLOG;
+
+ if (new_debug > DEBUG_SYSLOG)
+ new_debug = DEBUG_SYSLOG;
+
+ debug = new_debug;
+}
+
+debug_t clvmd_get_debug(void)
+{
+ return debug;
+}
+
static const char *decode_cmd(unsigned char cmdl)
{
static char buf[128];
@@ -276,12 +297,15 @@ static const char *decode_cmd(unsigned char cmdl)
case CLVMD_CMD_RESTART:
command = "RESTART";
break;
+ case CLVMD_CMD_SYNC_NAMES:
+ command = "SYNC_NAMES";
+ break;
default:
command = "unknown";
break;
}
- sprintf(buf, "%s (0x%x)", command, cmdl);
+ snprintf(buf, sizeof(buf), "%s (0x%x)", command, cmdl);
return buf;
}
@@ -312,52 +336,63 @@ static void check_permissions(void)
int main(int argc, char *argv[])
{
int local_sock;
- struct local_client *newfd;
- struct utsname nodeinfo;
+ struct local_client *newfd, *delfd;
struct lvm_startup_params lvm_params;
- signed char opt;
+ int opt;
int cmd_timeout = DEFAULT_CMD_TIMEOUT;
int start_timeout = 0;
if_type_t cluster_iface = IF_AUTO;
sigset_t ss;
- int using_gulm = 0;
- int debug_opt = 0;
+ debug_t debug_opt = DEBUG_OFF;
+ debug_t debug_arg = DEBUG_OFF;
int clusterwide_opt = 0;
mode_t old_mask;
+ int ret = 1;
+
+ struct option longopts[] = {
+ { "help", 0, 0, 'h' },
+ { NULL, 0, 0, 0 }
+ };
+
+ if (!(lvm_params.excl_uuid = dm_hash_create(128))) {
+ fprintf(stderr, "Failed to allocate hash table\n");
+ return 1;
+ }
/* Deal with command-line arguments */
opterr = 0;
optind = 0;
- while ((opt = getopt(argc, argv, "?vVhd::t:RST:CI:E:")) != EOF) {
+ while ((opt = getopt_long(argc, argv, "vVhfd::t:RST:CI:E:",
+ longopts, NULL)) != -1) {
switch (opt) {
case 'h':
usage(argv[0], stdout);
exit(0);
- case '?':
- usage(argv[0], stderr);
- exit(0);
-
case 'R':
check_permissions();
- return refresh_clvmd(1)==1?0:1;
+ ret = (refresh_clvmd(1) == 1) ? 0 : 1;
+ goto out;
case 'S':
check_permissions();
- return restart_clvmd(clusterwide_opt)==1?0:1;
+ ret = (restart_clvmd(clusterwide_opt) == 1) ? 0 : 1;
+ goto out;
case 'C':
clusterwide_opt = 1;
break;
case 'd':
- debug_opt = 1;
- if (optarg)
- debug = atoi(optarg);
- else
- debug = DEBUG_STDERR;
+ debug_opt = DEBUG_STDERR;
+ debug_arg = optarg ? (debug_t) atoi(optarg) : DEBUG_STDERR;
+ if (debug_arg == DEBUG_STDERR)
+ foreground_mode = 1;
break;
+ case 'f':
+ foreground_mode = 1;
+ break;
case 't':
cmd_timeout = atoi(optarg);
if (!cmd_timeout) {
@@ -369,6 +404,12 @@ int main(int argc, char *argv[])
case 'I':
cluster_iface = parse_cluster_interface(optarg);
break;
+ case 'E':
+ if (!dm_hash_insert(lvm_params.excl_uuid, optarg, optarg)) {
+ fprintf(stderr, "Failed to allocate hash entry\n");
+ goto out;
+ }
+ break;
case 'T':
start_timeout = atoi(optarg);
if (start_timeout <= 0) {
@@ -386,20 +427,14 @@ int main(int argc, char *argv[])
exit(0);
break;
+ default:
+ usage(argv[0], stderr);
+ exit(2);
}
}
check_permissions();
- /* Setting debug options on an existing clvmd */
- if (debug_opt && !check_local_clvmd()) {
-
- /* Sending to stderr makes no sense for a detached daemon */
- if (debug == DEBUG_STDERR)
- debug = DEBUG_SYSLOG;
- return debug_clvmd(debug, clusterwide_opt)==1?0:1;
- }
-
/*
* Switch to C locale to avoid reading large locale-archive file
* used by some glibc (on some distributions it takes over 100MB).
@@ -408,12 +443,19 @@ int main(int argc, char *argv[])
if (setenv("LANG", "C", 1))
perror("Cannot set LANG to C");
+ /* Setting debug options on an existing clvmd */
+ if (debug_opt && !check_local_clvmd()) {
+ dm_hash_destroy(lvm_params.excl_uuid);
+ return debug_clvmd(debug_arg, clusterwide_opt)==1?0:1;
+ }
+
+ clvmd_set_debug(debug_opt);
+
/* Fork into the background (unless requested not to) */
- if (debug != DEBUG_STDERR) {
+ if (!foreground_mode)
be_daemon(start_timeout);
- }
- dm_prepare_selinux_context(DEFAULT_RUN_DIR, S_IFDIR);
+ (void) dm_prepare_selinux_context(DEFAULT_RUN_DIR, S_IFDIR);
old_mask = umask(0077);
if (dm_create_dir(DEFAULT_RUN_DIR) == 0) {
DEBUGLOG("clvmd: unable to create %s directory\n",
@@ -447,7 +489,7 @@ int main(int argc, char *argv[])
/* Set up signal handlers, USR1 is for cluster change notifications (in cman)
USR2 causes child threads to exit.
- HUP causes gulm version to re-read nodes list from CCS.
+ (HUP used to cause gulm to re-read the nodes list from CCS.)
PIPE should be ignored */
signal(SIGUSR2, sigusr2_handler);
signal(SIGHUP, sighup_handler);
@@ -462,9 +504,14 @@ int main(int argc, char *argv[])
/* Initialise the LVM thread variables */
dm_list_init(&lvm_cmd_head);
+ if (pthread_attr_init(&stack_attr) ||
+ pthread_attr_setstacksize(&stack_attr, STACK_SIZE)) {
+ log_sys_error("pthread_attr_init", "");
+ exit(1);
+ }
pthread_mutex_init(&lvm_thread_mutex, NULL);
pthread_cond_init(&lvm_thread_cond, NULL);
- pthread_mutex_init(&lvm_start_mutex, NULL);
+ pthread_barrier_init(&lvm_start_barrier, NULL, 2);
init_lvhash();
/* Start the cluster interface */
@@ -479,16 +526,6 @@ int main(int argc, char *argv[])
syslog(LOG_NOTICE, "Cluster LVM daemon started - connected to CMAN");
}
#endif
-#ifdef USE_GULM
- if (!clops)
- if ((cluster_iface == IF_AUTO || cluster_iface == IF_GULM) && (clops = init_gulm_cluster())) {
- max_csid_len = GULM_MAX_CSID_LEN;
- max_cluster_message = GULM_MAX_CLUSTER_MESSAGE;
- max_cluster_member_name_len = GULM_MAX_CLUSTER_MEMBER_NAME_LEN;
- using_gulm = 1;
- syslog(LOG_NOTICE, "Cluster LVM daemon started - connected to GULM");
- }
-#endif
#ifdef USE_COROSYNC
if (!clops)
if (((cluster_iface == IF_AUTO || cluster_iface == IF_COROSYNC) && (clops = init_corosync_cluster()))) {
@@ -526,7 +563,6 @@ int main(int argc, char *argv[])
DEBUGLOG("Cluster ready, doing some more initialisation\n");
/* Save our CSID */
- uname(&nodeinfo);
clops->get_our_csid(our_csid);
/* Initialise the FD list head */
@@ -553,16 +589,12 @@ int main(int argc, char *argv[])
DEBUGLOG("starting LVM thread\n");
/* Don't let anyone else to do work until we are started */
- pthread_mutex_lock(&lvm_start_mutex);
- lvm_params.using_gulm = using_gulm;
- lvm_params.argv = argv;
- pthread_create(&lvm_thread, NULL, (lvm_pthread_fn_t*)lvm_thread_fn,
- (void *)&lvm_params);
+ pthread_create(&lvm_thread, &stack_attr, lvm_thread_fn, &lvm_params);
+
+ /* Don't start until the LVM thread is ready */
+ pthread_barrier_wait(&lvm_start_barrier);
/* Tell the rest of the cluster our version number */
- /* CMAN can do this immediately, gulm needs to wait until
- the core initialisation has finished and the node list
- has been gathered */
if (clops->cluster_init_completed)
clops->cluster_init_completed();
@@ -576,15 +608,39 @@ int main(int argc, char *argv[])
/* Do some work */
main_loop(local_sock, cmd_timeout);
+ pthread_mutex_lock(&lvm_thread_mutex);
+ pthread_cond_signal(&lvm_thread_cond);
+ pthread_mutex_unlock(&lvm_thread_mutex);
+ if ((errno = pthread_join(lvm_thread, NULL)))
+ log_sys_error("pthread_join", "");
+
close_local_sock(local_sock);
destroy_lvm();
- return 0;
+ for (newfd = local_client_head.next; newfd != NULL;) {
+ delfd = newfd;
+ newfd = newfd->next;
+ if (delfd->fd == local_sock)
+ delfd->fd = -1;
+ /*
+ * FIXME:
+ * needs cleanup code from read_from_local_sock() for now
+ * break of 'clvmd' may access already free memory here.
+ */
+ safe_close(&(delfd->fd));
+ free(delfd);
+ }
+
+ ret = 0;
+out:
+ dm_hash_destroy(lvm_params.excl_uuid);
+
+ return ret;
}
-/* Called when the GuLM cluster layer has completed initialisation.
+/* Called when the cluster layer has completed initialisation.
We send the version message */
-void clvmd_cluster_init_completed()
+void clvmd_cluster_init_completed(void)
{
send_version_message();
}
@@ -616,7 +672,8 @@ static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
if (client_fd >= 0) {
newfd = malloc(sizeof(struct local_client));
if (!newfd) {
- close(client_fd);
+ if (close(client_fd))
+ log_sys_error("close", "socket");
return 1;
}
@@ -666,9 +723,9 @@ static int local_pipe_callback(struct local_client *thisfd, char *buf,
/* EOF on pipe or an error, close it */
if (len <= 0) {
- int jstat;
void *ret = &status;
- close(thisfd->fd);
+ if (close(thisfd->fd))
+ log_sys_error("close", "local_pipe");
/* Clear out the cross-link */
if (thisfd->bits.pipe.client != NULL)
@@ -677,7 +734,10 @@ static int local_pipe_callback(struct local_client *thisfd, char *buf,
/* Reap child thread */
if (thisfd->bits.pipe.threadid) {
- jstat = pthread_join(thisfd->bits.pipe.threadid, &ret);
+ if ((errno = pthread_join(thisfd->bits.pipe.threadid,
+ &ret)))
+ log_sys_error("pthread_join", "");
+
thisfd->bits.pipe.threadid = 0;
if (thisfd->bits.pipe.client != NULL)
thisfd->bits.pipe.client->bits.localsock.
@@ -689,27 +749,25 @@ static int local_pipe_callback(struct local_client *thisfd, char *buf,
status, sock_client);
/* But has the client gone away ?? */
if (sock_client == NULL) {
- DEBUGLOG
- ("Got PIPE response for dead client, ignoring it\n");
+ DEBUGLOG("Got PIPE response for dead client, ignoring it\n");
} else {
/* If error then just return that code */
if (status)
send_local_reply(sock_client, status,
sock_client->fd);
else {
- if (sock_client->bits.localsock.state ==
- POST_COMMAND) {
+ /* FIXME: closer inspect this code since state is write thread protected */
+ pthread_mutex_lock(&sock_client->bits.localsock.mutex);
+ if (sock_client->bits.localsock.state == POST_COMMAND) {
+ pthread_mutex_unlock(&sock_client->bits.localsock.mutex);
send_local_reply(sock_client, 0,
sock_client->fd);
- } else // PRE_COMMAND finished.
- {
- if (
- (status =
- distribute_command(sock_client)) !=
- 0) send_local_reply(sock_client,
- EFBIG,
- sock_client->
- fd);
+ } else {
+ /* PRE_COMMAND finished. */
+ pthread_mutex_unlock(&sock_client->bits.localsock.mutex);
+ if ((status = distribute_command(sock_client)))
+ send_local_reply(sock_client, EFBIG,
+ sock_client->fd);
}
}
}
@@ -774,9 +832,10 @@ static void request_timed_out(struct local_client *client)
/* This is where the real work happens */
static void main_loop(int local_sock, int cmd_timeout)
{
+ sigset_t ss;
+
DEBUGLOG("Using timeout of %d seconds\n", cmd_timeout);
- sigset_t ss;
sigemptyset(&ss);
sigaddset(&ss, SIGINT);
sigaddset(&ss, SIGTERM);
@@ -827,7 +886,6 @@ static void main_loop(int local_sock, int cmd_timeout)
struct local_client *free_fd;
lastfd->next = thisfd->next;
free_fd = thisfd;
- thisfd = lastfd;
DEBUGLOG("removeme set for fd %d\n", free_fd->fd);
@@ -863,7 +921,6 @@ static void main_loop(int local_sock, int cmd_timeout)
ret, errno);
lastfd->next = thisfd->next;
free_fd = thisfd;
- thisfd = lastfd;
safe_close(&(free_fd->fd));
/* Queue cleanup, this also frees the client struct */
@@ -987,7 +1044,10 @@ static void be_daemon(int timeout)
exit(3);
}
- pipe(child_pipe);
+ if (pipe(child_pipe)) {
+ perror("Error creating pipe");
+ exit(3);
+ }
switch (fork()) {
case -1:
@@ -995,11 +1055,11 @@ static void be_daemon(int timeout)
exit(2);
case 0: /* Child */
- close(child_pipe[0]);
+ (void) close(child_pipe[0]);
break;
default: /* Parent */
- close(child_pipe[1]);
+ (void) close(child_pipe[1]);
wait_for_child(child_pipe[0], timeout);
}
@@ -1030,9 +1090,9 @@ static int read_from_local_sock(struct local_client *thisfd)
int len;
int argslen;
int missing_len;
- char buffer[PIPE_BUF];
+ char buffer[PIPE_BUF + 1];
- len = read(thisfd->fd, buffer, sizeof(buffer));
+ len = read(thisfd->fd, buffer, sizeof(buffer) - 1);
if (len == -1 && errno == EINTR)
return 1;
@@ -1041,7 +1101,6 @@ static int read_from_local_sock(struct local_client *thisfd)
/* EOF or error on socket */
if (len <= 0) {
int *status;
- int jstat;
DEBUGLOG("EOF on local socket: inprogress=%d\n",
thisfd->bits.localsock.in_progress);
@@ -1068,9 +1127,10 @@ static int read_from_local_sock(struct local_client *thisfd)
pthread_cond_signal(&thisfd->bits.localsock.cond);
pthread_mutex_unlock(&thisfd->bits.localsock.mutex);
- jstat =
- pthread_join(thisfd->bits.localsock.threadid,
- (void **) &status);
+ if ((errno = pthread_join(thisfd->bits.localsock.threadid,
+ (void **) &status)))
+ log_sys_error("pthread_join", "");
+
DEBUGLOG("Joined child thread\n");
thisfd->bits.localsock.threadid = 0;
@@ -1083,8 +1143,8 @@ static int read_from_local_sock(struct local_client *thisfd)
struct local_client *lastfd = NULL;
struct local_client *free_fd = NULL;
- close(thisfd->bits.localsock.pipe_client->fd); /* Close pipe */
- close(thisfd->bits.localsock.pipe);
+ (void) close(thisfd->bits.localsock.pipe_client->fd); /* Close pipe */
+ (void) close(thisfd->bits.localsock.pipe);
/* Remove pipe client */
for (newfd = &local_client_head; newfd != NULL;
@@ -1122,6 +1182,7 @@ static int read_from_local_sock(struct local_client *thisfd)
struct clvm_header *inheader;
int status;
+ buffer[len] = 0; /* Ensure \0 terminated */
inheader = (struct clvm_header *) buffer;
/* Fill in the client ID */
@@ -1129,20 +1190,16 @@ static int read_from_local_sock(struct local_client *thisfd)
/* If we are already busy then return an error */
if (thisfd->bits.localsock.in_progress) {
- struct clvm_header reply;
- reply.cmd = CLVMD_CMD_REPLY;
- reply.status = EBUSY;
- reply.arglen = 0;
- reply.flags = 0;
+ struct clvm_header reply = {
+ .cmd = CLVMD_CMD_REPLY,
+ .status = EBUSY
+ };
send_message(&reply, sizeof(reply), our_csid,
thisfd->fd,
"Error sending EBUSY reply to local user");
return len;
}
- /* Free any old buffer space */
- free(thisfd->bits.localsock.cmd);
-
/* See if we have the whole message */
argslen =
len - strlen(inheader->node) - sizeof(struct clvm_header);
@@ -1151,15 +1208,30 @@ static int read_from_local_sock(struct local_client *thisfd)
if (missing_len < 0)
missing_len = 0;
+ /* We need at least sizeof(struct clvm_header) bytes in buffer */
+ if (len < sizeof(struct clvm_header) || argslen < 0 ||
+ missing_len > MAX_MISSING_LEN) {
+ struct clvm_header reply = {
+ .cmd = CLVMD_CMD_REPLY,
+ .status = EINVAL
+ };
+ send_message(&reply, sizeof(reply), our_csid,
+ thisfd->fd,
+ "Error sending EINVAL reply to local user");
+ return 0;
+ }
+
+ /* Free any old buffer space */
+ free(thisfd->bits.localsock.cmd);
+
/* Save the message */
thisfd->bits.localsock.cmd = malloc(len + missing_len);
if (!thisfd->bits.localsock.cmd) {
- struct clvm_header reply;
- reply.cmd = CLVMD_CMD_REPLY;
- reply.status = ENOMEM;
- reply.arglen = 0;
- reply.flags = 0;
+ struct clvm_header reply = {
+ .cmd = CLVMD_CMD_REPLY,
+ .status = ENOMEM
+ };
send_message(&reply, sizeof(reply), our_csid,
thisfd->fd,
"Error sending ENOMEM reply to local user");
@@ -1174,15 +1246,22 @@ static int read_from_local_sock(struct local_client *thisfd)
char *argptr =
inheader->node + strlen(inheader->node) + 1;
- while (missing_len > 0 && len >= 0) {
- DEBUGLOG
- ("got %d bytes, need another %d (total %d)\n",
- argslen, missing_len, inheader->arglen);
+ while (missing_len > 0) {
+ DEBUGLOG("got %d bytes, need another %d (total %d)\n",
+ argslen, missing_len, inheader->arglen);
len = read(thisfd->fd, argptr + argslen,
missing_len);
- if (len >= 0) {
+ if (len == -1 && errno == EINTR)
+ continue;
+ if (len > 0) {
missing_len -= len;
argslen += len;
+ } else {
+ /* EOF or error on socket */
+ DEBUGLOG("EOF on local socket\n");
+ free(thisfd->bits.localsock.cmd);
+ thisfd->bits.localsock.cmd = NULL;
+ return 0;
}
}
}
@@ -1207,13 +1286,12 @@ static int read_from_local_sock(struct local_client *thisfd)
/* Check the node name for validity */
if (inheader->node[0] && clops->csid_from_name(csid, inheader->node)) {
/* Error, node is not in the cluster */
- struct clvm_header reply;
- DEBUGLOG("Unknown node: '%s'\n", inheader->node);
+ struct clvm_header reply = {
+ .cmd = CLVMD_CMD_REPLY,
+ .status = ENOENT
+ };
- reply.cmd = CLVMD_CMD_REPLY;
- reply.status = ENOENT;
- reply.flags = 0;
- reply.arglen = 0;
+ DEBUGLOG("Unknown node: '%s'\n", inheader->node);
send_message(&reply, sizeof(reply), our_csid,
thisfd->fd,
"Error sending ENOENT reply to local user");
@@ -1234,17 +1312,29 @@ static int read_from_local_sock(struct local_client *thisfd)
}
/* Create a pipe and add the reading end to our FD list */
- pipe(comms_pipe);
+ if (pipe(comms_pipe)) {
+ struct clvm_header reply = {
+ .cmd = CLVMD_CMD_REPLY,
+ .status = EBUSY
+ };
+
+ DEBUGLOG("creating pipe failed: %s\n", strerror(errno));
+ send_message(&reply, sizeof(reply), our_csid,
+ thisfd->fd,
+ "Error sending EBUSY reply to local user");
+ return len;
+ }
+
newfd = malloc(sizeof(struct local_client));
if (!newfd) {
- struct clvm_header reply;
- close(comms_pipe[0]);
- close(comms_pipe[1]);
-
- reply.cmd = CLVMD_CMD_REPLY;
- reply.status = ENOMEM;
- reply.arglen = 0;
- reply.flags = 0;
+ struct clvm_header reply = {
+ .cmd = CLVMD_CMD_REPLY,
+ .status = ENOMEM
+ };
+
+ (void) close(comms_pipe[0]);
+ (void) close(comms_pipe[1]);
+
send_message(&reply, sizeof(reply), our_csid,
thisfd->fd,
"Error sending ENOMEM reply to local user");
@@ -1279,8 +1369,8 @@ static int read_from_local_sock(struct local_client *thisfd)
thisfd->bits.localsock.in_progress = TRUE;
thisfd->bits.localsock.state = PRE_COMMAND;
DEBUGLOG("Creating pre&post thread\n");
- status = pthread_create(&thisfd->bits.localsock.threadid, NULL,
- pre_and_post_thread, thisfd);
+ status = pthread_create(&thisfd->bits.localsock.threadid,
+ &stack_attr, pre_and_post_thread, thisfd);
DEBUGLOG("Created pre&post thread, state = %d\n", status);
}
return len;
@@ -1306,7 +1396,10 @@ static int distribute_command(struct local_client *thisfd)
int len = thisfd->bits.localsock.cmd_len;
thisfd->xid = global_xid++;
- DEBUGLOG("distribute command: XID = %d\n", thisfd->xid);
+ DEBUGLOG("distribute command: XID = %d, flags=0x%x (%s%s)\n",
+ thisfd->xid, inheader->flags,
+ (inheader->flags & CLVMD_FLAG_LOCAL) ? "LOCAL" : "",
+ (inheader->flags & CLVMD_FLAG_REMOTE) ? "REMOTE" : "");
/* Forward it to other nodes in the cluster if needed */
if (!(inheader->flags & CLVMD_FLAG_LOCAL)) {
@@ -1319,7 +1412,11 @@ static int distribute_command(struct local_client *thisfd)
thisfd->bits.localsock.in_progress = TRUE;
thisfd->bits.localsock.sent_out = TRUE;
- /* Do it here first */
+ /*
+ * Send to local node first, even if CLVMD_FLAG_REMOTE
+ * is set so we still get a reply if this is the
+ * only node.
+ */
add_to_lvmqueue(thisfd, inheader, len, NULL);
DEBUGLOG("Sending message to all cluster nodes\n");
@@ -1372,7 +1469,6 @@ static void process_remote_command(struct clvm_header *msg, int msglen, int fd,
int replylen = 0;
int buflen = max_cluster_message - sizeof(struct clvm_header) - 1;
int status;
- int msg_malloced = 0;
/* Get the node name as we /may/ need it later */
clops->name_from_csid(csid, nodename);
@@ -1431,9 +1527,9 @@ static void process_remote_command(struct clvm_header *msg, int msglen, int fd,
if (replyargs != NULL) {
/* Run the command */
- status =
- do_command(NULL, msg, msglen, &replyargs, buflen,
- &replylen);
+ /* FIXME: usage of init_test() is unprotected */
+ status = do_command(NULL, msg, msglen, &replyargs,
+ buflen, &replylen);
} else {
status = ENOMEM;
}
@@ -1481,10 +1577,6 @@ static void process_remote_command(struct clvm_header *msg, int msglen, int fd,
}
}
- /* Free buffer if it was malloced */
- if (msg_malloced) {
- free(msg);
- }
free(replyargs);
}
@@ -1554,11 +1646,6 @@ static __attribute__ ((noreturn)) void *pre_and_post_thread(void *arg)
DEBUGLOG("in sub thread: client = %p\n", client);
pthread_mutex_lock(&client->bits.localsock.mutex);
- /* Don't start until the LVM thread is ready */
- pthread_mutex_lock(&lvm_start_mutex);
- pthread_mutex_unlock(&lvm_start_mutex);
- DEBUGLOG("Sub thread ready for work.\n");
-
/* Ignore SIGUSR1 (handled by master process) but enable
SIGUSR2 (kills subthreads) */
sigemptyset(&ss);
@@ -1572,6 +1659,7 @@ static __attribute__ ((noreturn)) void *pre_and_post_thread(void *arg)
/* Loop around doing PRE and POST functions until the client goes away */
while (!client->bits.localsock.finished) {
/* Execute the code */
+ /* FIXME: usage of init_test() is unprotected as in do_command() */
status = do_pre_command(client);
if (status)
@@ -1654,7 +1742,12 @@ static int process_local_command(struct clvm_header *msg, int msglen,
if (replybuf == NULL)
return -1;
- status = do_command(client, msg, msglen, &replybuf, buflen, &replylen);
+ /* If remote flag is set, just set a successful status code. */
+ if (msg->flags & CLVMD_FLAG_REMOTE)
+ status = 0;
+ else
+ /* FIXME: usage of init_test() is unprotected */
+ status = do_command(client, msg, msglen, &replybuf, buflen, &replylen);
if (status)
client->bits.localsock.all_success = 0;
@@ -1723,13 +1816,18 @@ static void send_local_reply(struct local_client *client, int status, int fd)
}
/* Add in the size of our header */
- message_len = message_len + sizeof(struct clvm_header) + 1;
- replybuf = malloc(message_len);
+ message_len = message_len + sizeof(struct clvm_header);
+ if (!(replybuf = malloc(message_len))) {
+ DEBUGLOG("Memory allocation fails\n");
+ return;
+ }
clientreply = (struct clvm_header *) replybuf;
clientreply->status = status;
clientreply->cmd = CLVMD_CMD_REPLY;
clientreply->node[0] = '\0';
+ clientreply->xid = 0;
+ clientreply->clientid = 0;
clientreply->flags = 0;
ptr = clientreply->args;
@@ -1764,7 +1862,7 @@ static void send_local_reply(struct local_client *client, int status, int fd)
/* Terminate with an empty node name */
*ptr = '\0';
- clientreply->arglen = ptr - clientreply->args + 1;
+ clientreply->arglen = ptr - clientreply->args;
/* And send it */
send_message(replybuf, message_len, our_csid, fd,
@@ -1795,7 +1893,7 @@ static void free_reply(struct local_client *client)
}
/* Send our version number to the cluster */
-static void send_version_message()
+static void send_version_message(void)
{
char message[sizeof(struct clvm_header) + sizeof(int) * 3];
struct clvm_header *msg = (struct clvm_header *) message;
@@ -1847,7 +1945,7 @@ static int send_message(void *buf, int msglen, const char *csid, int fd,
break;
}
- len = write(fd, buf + ptr, msglen - ptr);
+ len = write(fd, (char*)buf + ptr, msglen - ptr);
if (len <= 0) {
if (errno == EINTR)
@@ -1900,11 +1998,11 @@ static int process_work_item(struct lvm_thread_cmd *cmd)
/*
* Routine that runs in the "LVM thread".
*/
-static void lvm_thread_fn(void *arg)
+static void *lvm_thread_fn(void *arg)
{
- struct dm_list *cmdl, *tmp;
sigset_t ss;
struct lvm_startup_params *lvm_params = arg;
+ struct lvm_thread_cmd *cmd;
DEBUGLOG("LVM thread function started\n");
@@ -1915,24 +2013,22 @@ static void lvm_thread_fn(void *arg)
pthread_sigmask(SIG_BLOCK, &ss, NULL);
/* Initialise the interface to liblvm */
- init_clvm(lvm_params->using_gulm, lvm_params->argv);
+ init_clvm(lvm_params->excl_uuid);
/* Allow others to get moving */
- pthread_mutex_unlock(&lvm_start_mutex);
+ pthread_barrier_wait(&lvm_start_barrier);
+ DEBUGLOG("Sub thread ready for work.\n");
/* Now wait for some actual work */
- for (;;) {
- DEBUGLOG("LVM thread waiting for work\n");
+ pthread_mutex_lock(&lvm_thread_mutex);
- pthread_mutex_lock(&lvm_thread_mutex);
- if (dm_list_empty(&lvm_cmd_head))
+ while (!quit) {
+ if (dm_list_empty(&lvm_cmd_head)) {
+ DEBUGLOG("LVM thread waiting for work\n");
pthread_cond_wait(&lvm_thread_cond, &lvm_thread_mutex);
-
- dm_list_iterate_safe(cmdl, tmp, &lvm_cmd_head) {
- struct lvm_thread_cmd *cmd;
-
- cmd =
- dm_list_struct_base(cmdl, struct lvm_thread_cmd, list);
+ } else {
+ cmd = dm_list_item(dm_list_first(&lvm_cmd_head),
+ struct lvm_thread_cmd);
dm_list_del(&cmd->list);
pthread_mutex_unlock(&lvm_thread_mutex);
@@ -1942,8 +2038,11 @@ static void lvm_thread_fn(void *arg)
pthread_mutex_lock(&lvm_thread_mutex);
}
- pthread_mutex_unlock(&lvm_thread_mutex);
}
+
+ pthread_mutex_unlock(&lvm_thread_mutex);
+
+ pthread_exit(NULL);
}
/* Pass down some work to the LVM thread */
@@ -1994,42 +2093,52 @@ static int add_to_lvmqueue(struct local_client *client, struct clvm_header *msg,
static int check_local_clvmd(void)
{
int local_socket;
- struct sockaddr_un sockaddr;
int ret = 0;
+ struct sockaddr_un sockaddr = { .sun_family = AF_UNIX };
+
+ if (!dm_strncpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(sockaddr.sun_path))) {
+ log_error("%s: clvmd socket name too long.", CLVMD_SOCKNAME);
+ return -1;
+ }
/* Open local socket */
if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
+ log_sys_error("socket", "local socket");
return -1;
}
- memset(&sockaddr, 0, sizeof(sockaddr));
- memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
- sockaddr.sun_family = AF_UNIX;
-
if (connect(local_socket,(struct sockaddr *) &sockaddr,
sizeof(sockaddr))) {
+ log_sys_error("connect", "local socket");
ret = -1;
}
- close(local_socket);
+ if (close(local_socket))
+ log_sys_error("close", "local socket");
+
return ret;
}
static void close_local_sock(int local_socket)
{
if (local_socket != -1 && close(local_socket))
- stack;
+ log_sys_error("close", CLVMD_SOCKNAME);
if (CLVMD_SOCKNAME[0] != '\0' && unlink(CLVMD_SOCKNAME))
stack;
}
/* Open the local socket, that's the one we talk to libclvm down */
-static int open_local_sock()
+static int open_local_sock(void)
{
- int local_socket = -1;
- struct sockaddr_un sockaddr;
mode_t old_mask;
+ int local_socket = -1;
+ struct sockaddr_un sockaddr = { .sun_family = AF_UNIX };
+
+ if (!dm_strncpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(sockaddr.sun_path))) {
+ log_error("%s: clvmd socket name too long.", CLVMD_SOCKNAME);
+ return -1;
+ }
close_local_sock(local_socket);
@@ -2046,11 +2155,9 @@ static int open_local_sock()
/* Set Close-on-exec & non-blocking */
if (fcntl(local_socket, F_SETFD, 1))
DEBUGLOG("setting CLOEXEC on local_socket failed: %s\n", strerror(errno));
- fcntl(local_socket, F_SETFL, fcntl(local_socket, F_GETFL, 0) | O_NONBLOCK);
+ if (fcntl(local_socket, F_SETFL, fcntl(local_socket, F_GETFL, 0) | O_NONBLOCK))
+ DEBUGLOG("setting O_NONBLOCK on local_socket failed: %s\n", strerror(errno));
- memset(&sockaddr, 0, sizeof(sockaddr));
- memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
- sockaddr.sun_family = AF_UNIX;
if (bind(local_socket, (struct sockaddr *) &sockaddr, sizeof(sockaddr))) {
log_error("can't bind local socket: %m");
@@ -2071,7 +2178,7 @@ error:
return -1;
}
-void process_message(struct local_client *client, const char *buf, int len,
+void process_message(struct local_client *client, char *buf, int len,
const char *csid)
{
struct clvm_header *inheader;
@@ -2108,7 +2215,7 @@ static struct local_client *find_client(int clientid)
{
struct local_client *thisfd;
for (thisfd = &local_client_head; thisfd != NULL; thisfd = thisfd->next) {
- if (thisfd->fd == ntohl(clientid))
+ if (thisfd->fd == (int)ntohl(clientid))
return thisfd;
}
return NULL;
@@ -2170,8 +2277,6 @@ static if_type_t parse_cluster_interface(char *ifname)
iface = IF_AUTO;
if (!strcmp(ifname, "cman"))
iface = IF_CMAN;
- if (!strcmp(ifname, "gulm"))
- iface = IF_GULM;
if (!strcmp(ifname, "openais"))
iface = IF_OPENAIS;
if (!strcmp(ifname, "corosync"))
@@ -2187,7 +2292,7 @@ static if_type_t parse_cluster_interface(char *ifname)
* only called if the command-line option is not present, and if it fails
* we still try the interfaces in order.
*/
-static if_type_t get_cluster_type()
+static if_type_t get_cluster_type(void)
{
#ifdef HAVE_COROSYNC_CONFDB_H
confdb_handle_t handle;
@@ -2227,6 +2332,9 @@ static if_type_t get_cluster_type()
if (result != CS_OK)
goto out;
+ if (namelen >= sizeof(buf))
+ namelen = sizeof(buf) - 1;
+
buf[namelen] = '\0';
type = parse_cluster_interface(buf);
DEBUGLOG("got interface type '%s' from confdb\n", buf);
diff --git a/daemons/clvmd/clvmd.h b/daemons/clvmd/clvmd.h
index ccc79cc..5bad43a 100644
--- a/daemons/clvmd/clvmd.h
+++ b/daemons/clvmd/clvmd.h
@@ -112,11 +112,14 @@ extern void cmd_client_cleanup(struct local_client *client);
extern int add_client(struct local_client *new_client);
extern void clvmd_cluster_init_completed(void);
-extern void process_message(struct local_client *client, const char *buf,
+extern void process_message(struct local_client *client, char *buf,
int len, const char *csid);
extern void debuglog(const char *fmt, ... )
__attribute__ ((format(printf, 1, 2)));
+void clvmd_set_debug(debug_t new_de);
+debug_t clvmd_get_debug(void);
+
int sync_lock(const char *resource, int mode, int flags, int *lockid);
int sync_unlock(const char *resource, int lockid);
diff --git a/daemons/clvmd/lvm-functions.c b/daemons/clvmd/lvm-functions.c
index 214f229..6793752 100644
--- a/daemons/clvmd/lvm-functions.c
+++ b/daemons/clvmd/lvm-functions.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -109,10 +109,9 @@ static const char *decode_full_locking_cmd(uint32_t cmdl)
break;
}
- sprintf(buf, "0x%x %s (%s|%s%s%s%s%s%s)", cmdl, command, type, scope,
+ sprintf(buf, "0x%x %s (%s|%s%s%s%s%s)", cmdl, command, type, scope,
cmdl & LCK_NONBLOCK ? "|NONBLOCK" : "",
cmdl & LCK_HOLD ? "|HOLD" : "",
- cmdl & LCK_LOCAL ? "|LOCAL" : "",
cmdl & LCK_CLUSTER_VG ? "|CLUSTER_VG" : "",
cmdl & LCK_CACHE ? "|CACHE" : "");
@@ -132,12 +131,14 @@ static const char *decode_flags(unsigned char flags)
static char buf[128];
int len;
- len = sprintf(buf, "0x%x ( %s%s%s%s%s)", flags,
+ len = sprintf(buf, "0x%x ( %s%s%s%s%s%s%s)", flags,
flags & LCK_PARTIAL_MODE ? "PARTIAL_MODE|" : "",
flags & LCK_MIRROR_NOSYNC_MODE ? "MIRROR_NOSYNC|" : "",
flags & LCK_DMEVENTD_MONITOR_MODE ? "DMEVENTD_MONITOR|" : "",
flags & LCK_ORIGIN_ONLY_MODE ? "ORIGIN_ONLY|" : "",
- flags & LCK_CONVERT ? "CONVERT|" : "");
+ flags & LCK_TEST_MODE ? "TEST|" : "",
+ flags & LCK_CONVERT ? "CONVERT|" : "",
+ flags & LCK_DMEVENTD_MONITOR_IGNORE ? "DMEVENTD_MONITOR_IGNORE|" : "");
if (len > 1)
buf[len - 2] = ' ';
@@ -147,7 +148,7 @@ static const char *decode_flags(unsigned char flags)
return buf;
}
-char *get_last_lvm_error()
+char *get_last_lvm_error(void)
{
return last_error;
}
@@ -166,11 +167,15 @@ static struct lv_info *lookup_info(const char *resource)
return lvi;
}
-static void insert_info(const char *resource, struct lv_info *lvi)
+static int insert_info(const char *resource, struct lv_info *lvi)
{
+ int ret;
+
pthread_mutex_lock(&lv_hash_lock);
- dm_hash_insert(lv_hash, resource, lvi);
+ ret = dm_hash_insert(lv_hash, resource, lvi);
pthread_mutex_unlock(&lv_hash_lock);
+
+ return ret;
}
static void remove_info(const char *resource)
@@ -194,16 +199,16 @@ static int get_current_lock(char *resource)
}
-void init_lvhash()
+void init_lvhash(void)
{
/* Create hash table for keeping LV locks & status */
- lv_hash = dm_hash_create(100);
+ lv_hash = dm_hash_create(1024);
pthread_mutex_init(&lv_hash_lock, NULL);
pthread_mutex_init(&lvm_lock, NULL);
}
/* Called at shutdown to tidy the lockspace */
-void destroy_lvhash()
+void destroy_lvhash(void)
{
struct dm_hash_node *v;
struct lv_info *lvi;
@@ -235,14 +240,25 @@ static int hold_lock(char *resource, int mode, int flags)
int saved_errno;
struct lv_info *lvi;
+ if (test_mode())
+ return 0;
+
/* Mask off invalid options */
flags &= LCKF_NOQUEUE | LCKF_CONVERT;
lvi = lookup_info(resource);
- if (lvi && lvi->lock_mode == mode) {
- DEBUGLOG("hold_lock, lock mode %d already held\n", mode);
- return 0;
+ if (lvi) {
+ if (lvi->lock_mode == mode) {
+ DEBUGLOG("hold_lock, lock mode %d already held\n",
+ mode);
+ return 0;
+ }
+ if ((lvi->lock_mode == LCK_EXCL) && (mode == LCK_WRITE)) {
+ DEBUGLOG("hold_lock, lock already held LCK_EXCL, "
+ "ignoring LCK_WRITE request");
+ return 0;
+ }
}
/* Only allow explicit conversions */
@@ -265,8 +281,10 @@ static int hold_lock(char *resource, int mode, int flags)
errno = saved_errno;
} else {
lvi = malloc(sizeof(struct lv_info));
- if (!lvi)
+ if (!lvi) {
+ errno = ENOMEM;
return -1;
+ }
lvi->lock_mode = mode;
status = sync_lock(resource, mode, flags & ~LCKF_CONVERT, &lvi->lock_id);
@@ -276,7 +294,10 @@ static int hold_lock(char *resource, int mode, int flags)
DEBUGLOG("hold_lock. lock at %d failed: %s\n", mode,
strerror(errno));
} else
- insert_info(resource, lvi);
+ if (!insert_info(resource, lvi)) {
+ errno = ENOMEM;
+ return -1;
+ }
errno = saved_errno;
}
@@ -290,6 +311,9 @@ static int hold_unlock(char *resource)
int status;
int saved_errno;
+ if (test_mode())
+ return 0;
+
if (!(lvi = lookup_info(resource))) {
DEBUGLOG("hold_unlock, lock not already held\n");
return 0;
@@ -316,7 +340,7 @@ static int hold_unlock(char *resource)
*/
/* Activate LV exclusive or non-exclusive */
-static int do_activate_lv(char *resource, unsigned char lock_flags, int mode)
+static int do_activate_lv(char *resource, unsigned char command, unsigned char lock_flags, int mode)
{
int oldmode;
int status;
@@ -326,7 +350,7 @@ static int do_activate_lv(char *resource, unsigned char lock_flags, int mode)
/* Is it already open ? */
oldmode = get_current_lock(resource);
- if (oldmode == mode && (lock_flags & LCK_CLUSTER_VG)) {
+ if (oldmode == mode && (command & LCK_CLUSTER_VG)) {
DEBUGLOG("do_activate_lv, lock already held at %d\n", oldmode);
return 0; /* Nothing to do */
}
@@ -349,7 +373,7 @@ static int do_activate_lv(char *resource, unsigned char lock_flags, int mode)
* Use lock conversion only if requested, to prevent implicit conversion
* of exclusive lock to shared one during activation.
*/
- if (lock_flags & LCK_CLUSTER_VG) {
+ if (command & LCK_CLUSTER_VG) {
status = hold_lock(resource, mode, LCKF_NOQUEUE | (lock_flags & LCK_CONVERT ? LCKF_CONVERT:0));
if (status) {
/* Return an LVM-sensible error for this.
@@ -369,9 +393,9 @@ static int do_activate_lv(char *resource, unsigned char lock_flags, int mode)
goto error;
if (lvi.suspended) {
- memlock_inc(cmd);
+ critical_section_inc(cmd, "resuming");
if (!lv_resume(cmd, resource, 0)) {
- memlock_dec(cmd);
+ critical_section_dec(cmd, "resumed");
goto error;
}
}
@@ -389,55 +413,62 @@ error:
}
/* Resume the LV if it was active */
-static int do_resume_lv(char *resource, unsigned char lock_flags)
+static int do_resume_lv(char *resource, unsigned char command, unsigned char lock_flags)
{
- int oldmode;
+ int oldmode, origin_only, exclusive, revert;
/* Is it open ? */
oldmode = get_current_lock(resource);
- if (oldmode == -1 && (lock_flags & LCK_CLUSTER_VG)) {
+ if (oldmode == -1 && (command & LCK_CLUSTER_VG)) {
DEBUGLOG("do_resume_lv, lock not already held\n");
return 0; /* We don't need to do anything */
}
+ origin_only = (lock_flags & LCK_ORIGIN_ONLY_MODE) ? 1 : 0;
+ exclusive = (oldmode == LCK_EXCL) ? 1 : 0;
+ revert = (lock_flags & LCK_REVERT_MODE) ? 1 : 0;
- if (!lv_resume_if_active(cmd, resource, (lock_flags & LCK_ORIGIN_ONLY_MODE) ? 1 : 0))
+ if (!lv_resume_if_active(cmd, resource, origin_only, exclusive, revert))
return EIO;
return 0;
}
/* Suspend the device if active */
-static int do_suspend_lv(char *resource, unsigned char lock_flags)
+static int do_suspend_lv(char *resource, unsigned char command, unsigned char lock_flags)
{
int oldmode;
struct lvinfo lvi;
unsigned origin_only = (lock_flags & LCK_ORIGIN_ONLY_MODE) ? 1 : 0;
+ unsigned exclusive;
/* Is it open ? */
oldmode = get_current_lock(resource);
- if (oldmode == -1 && (lock_flags & LCK_CLUSTER_VG)) {
+ if (oldmode == -1 && (command & LCK_CLUSTER_VG)) {
DEBUGLOG("do_suspend_lv, lock not already held\n");
return 0; /* Not active, so it's OK */
}
+ exclusive = (oldmode == LCK_EXCL) ? 1 : 0;
+
/* Only suspend it if it exists */
if (!lv_info_by_lvid(cmd, resource, origin_only, &lvi, 0, 0))
return EIO;
- if (lvi.exists && !lv_suspend_if_active(cmd, resource, origin_only))
+ if (lvi.exists &&
+ !lv_suspend_if_active(cmd, resource, origin_only, exclusive))
return EIO;
return 0;
}
-static int do_deactivate_lv(char *resource, unsigned char lock_flags)
+static int do_deactivate_lv(char *resource, unsigned char command, unsigned char lock_flags)
{
int oldmode;
int status;
/* Is it open ? */
oldmode = get_current_lock(resource);
- if (oldmode == -1 && (lock_flags & LCK_CLUSTER_VG)) {
+ if (oldmode == -1 && (command & LCK_CLUSTER_VG)) {
DEBUGLOG("do_deactivate_lock, lock not already held\n");
return 0; /* We don't need to do anything */
}
@@ -445,7 +476,7 @@ static int do_deactivate_lv(char *resource, unsigned char lock_flags)
if (!lv_deactivate(cmd, resource))
return EIO;
- if (lock_flags & LCK_CLUSTER_VG) {
+ if (command & LCK_CLUSTER_VG) {
status = hold_unlock(resource);
if (status)
return errno;
@@ -479,8 +510,8 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
{
int status = 0;
- DEBUGLOG("do_lock_lv: resource '%s', cmd = %s, flags = %s, memlock = %d\n",
- resource, decode_locking_cmd(command), decode_flags(lock_flags), memlock());
+ DEBUGLOG("do_lock_lv: resource '%s', cmd = %s, flags = %s, critical_section = %d\n",
+ resource, decode_locking_cmd(command), decode_flags(lock_flags), critical_section());
if (!cmd->config_valid || config_files_changed(cmd)) {
/* Reinitialise various settings inc. logging, filters */
@@ -494,10 +525,14 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
if (lock_flags & LCK_MIRROR_NOSYNC_MODE)
init_mirror_in_sync(1);
- if (lock_flags & LCK_DMEVENTD_MONITOR_MODE)
- init_dmeventd_monitor(1);
- else
- init_dmeventd_monitor(0);
+ if (lock_flags & LCK_DMEVENTD_MONITOR_IGNORE)
+ init_dmeventd_monitor(DMEVENTD_MONITOR_IGNORE);
+ else {
+ if (lock_flags & LCK_DMEVENTD_MONITOR_MODE)
+ init_dmeventd_monitor(1);
+ else
+ init_dmeventd_monitor(0);
+ }
cmd->partial_activation = (lock_flags & LCK_PARTIAL_MODE) ? 1 : 0;
@@ -506,24 +541,24 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
switch (command & LCK_MASK) {
case LCK_LV_EXCLUSIVE:
- status = do_activate_lv(resource, lock_flags, LCK_EXCL);
+ status = do_activate_lv(resource, command, lock_flags, LCK_EXCL);
break;
case LCK_LV_SUSPEND:
- status = do_suspend_lv(resource, lock_flags);
+ status = do_suspend_lv(resource, command, lock_flags);
break;
case LCK_UNLOCK:
case LCK_LV_RESUME: /* if active */
- status = do_resume_lv(resource, lock_flags);
+ status = do_resume_lv(resource, command, lock_flags);
break;
case LCK_LV_ACTIVATE:
- status = do_activate_lv(resource, lock_flags, LCK_READ);
+ status = do_activate_lv(resource, command, lock_flags, LCK_READ);
break;
case LCK_LV_DEACTIVATE:
- status = do_deactivate_lv(resource, lock_flags);
+ status = do_deactivate_lv(resource, command, lock_flags);
break;
default:
@@ -541,7 +576,7 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
dm_pool_empty(cmd->mem);
pthread_mutex_unlock(&lvm_lock);
- DEBUGLOG("Command return is %d, memlock is %d\n", status, memlock());
+ DEBUGLOG("Command return is %d, critical_section is %d\n", status, critical_section());
return status;
}
@@ -554,7 +589,7 @@ int pre_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
LCKF_CONVERT is used always, local node is going to modify metadata
*/
if ((command & (LCK_SCOPE_MASK | LCK_TYPE_MASK)) == LCK_LV_SUSPEND &&
- (lock_flags & LCK_CLUSTER_VG)) {
+ (command & LCK_CLUSTER_VG)) {
DEBUGLOG("pre_lock_lv: resource '%s', cmd = %s, flags = %s\n",
resource, decode_locking_cmd(command), decode_flags(lock_flags));
@@ -573,7 +608,7 @@ int post_lock_lv(unsigned char command, unsigned char lock_flags,
/* Opposite of above, done on resume after a metadata update */
if ((command & (LCK_SCOPE_MASK | LCK_TYPE_MASK)) == LCK_LV_RESUME &&
- (lock_flags & LCK_CLUSTER_VG)) {
+ (command & LCK_CLUSTER_VG)) {
int oldmode;
DEBUGLOG
@@ -611,7 +646,7 @@ int do_check_lvm1(const char *vgname)
return status == 1 ? 0 : EBUSY;
}
-int do_refresh_cache()
+int do_refresh_cache(void)
{
DEBUGLOG("Refreshing context\n");
log_notice("Refreshing context");
@@ -633,46 +668,6 @@ int do_refresh_cache()
return 0;
}
-
-/* Only called at gulm startup. Drop any leftover VG or P_orphan locks
- that might be hanging around if we died for any reason
-*/
-static void drop_vg_locks(void)
-{
- char vg[128];
- char line[255];
- FILE *vgs =
- popen
- (LVM_PATH " pvs --config 'log{command_names=0 prefix=\"\"}' --nolocking --noheadings -o vg_name", "r");
-
- sync_unlock("P_" VG_ORPHANS, LCK_EXCL);
- sync_unlock("P_" VG_GLOBAL, LCK_EXCL);
-
- if (!vgs)
- return;
-
- while (fgets(line, sizeof(line), vgs)) {
- char *vgend;
- char *vgstart;
-
- if (line[strlen(line)-1] == '\n')
- line[strlen(line)-1] = '\0';
-
- vgstart = line + strspn(line, " ");
- vgend = vgstart + strcspn(vgstart, " ");
- *vgend = '\0';
-
- if (strncmp(vgstart, "WARNING:", 8) == 0)
- continue;
-
- sprintf(vg, "V_%s", vgstart);
- sync_unlock(vg, LCK_EXCL);
-
- }
- if (fclose(vgs))
- DEBUGLOG("vgs fclose failed: %s\n", strerror(errno));
-}
-
/*
* Handle VG lock - drop metadata or update lvmcache state
*/
@@ -689,8 +684,8 @@ void do_lock_vg(unsigned char command, unsigned char lock_flags, char *resource)
if (strncmp(resource, "P_#", 3) && !strncmp(resource, "P_", 2))
lock_cmd |= LCK_CACHE;
- DEBUGLOG("do_lock_vg: resource '%s', cmd = %s, flags = %s, memlock = %d\n",
- resource, decode_full_locking_cmd(lock_cmd), decode_flags(lock_flags), memlock());
+ DEBUGLOG("do_lock_vg: resource '%s', cmd = %s, flags = %s, critical_section = %d\n",
+ resource, decode_full_locking_cmd(lock_cmd), decode_flags(lock_flags), critical_section());
/* P_#global causes a full cache refresh */
if (!strcmp(resource, "P_" VG_GLOBAL)) {
@@ -717,49 +712,33 @@ void do_lock_vg(unsigned char command, unsigned char lock_flags, char *resource)
}
/*
- * Compare the uuid with the list of exclusive locks that clvmd
- * held before it was restarted, so we can get the right kind
- * of lock now we are restarting.
- */
-static int was_ex_lock(char *uuid, char **argv)
-{
- int optnum = 0;
- char *opt = argv[optnum];
-
- while (opt) {
- if (strcmp(opt, "-E") == 0) {
- opt = argv[++optnum];
- if (opt && (strcmp(opt, uuid) == 0)) {
- DEBUGLOG("Lock %s is exclusive\n", uuid);
- return 1;
- }
- }
- opt = argv[++optnum];
- }
- return 0;
-}
-
-/*
* Ideally, clvmd should be started before any LVs are active
* but this may not be the case...
* I suppose this also comes in handy if clvmd crashes, not that it would!
*/
-static void *get_initial_state(char **argv)
+static int get_initial_state(struct dm_hash_table *excl_uuid)
{
int lock_mode;
char lv[64], vg[64], flags[25], vg_flags[25];
char uuid[65];
char line[255];
- FILE *lvs =
- popen
- (LVM_PATH " lvs --config 'log{command_names=0 prefix=\"\"}' --nolocking --noheadings -o vg_uuid,lv_uuid,lv_attr,vg_attr",
- "r");
-
- if (!lvs)
- return NULL;
+ char *lvs_cmd;
+ const char *lvm_binary = getenv("LVM_BINARY") ? : LVM_PATH;
+ FILE *lvs;
+
+ if (dm_asprintf(&lvs_cmd, "%s lvs --config 'log{command_names=0 prefix=\"\"}' "
+ "--nolocking --noheadings -o vg_uuid,lv_uuid,lv_attr,vg_attr",
+ lvm_binary) < 0)
+ return_0;
+
+ /* FIXME: Maybe link and use liblvm2cmd directly instead of fork */
+ if (!(lvs = popen(lvs_cmd, "r"))) {
+ dm_free(lvs_cmd);
+ return 0;
+ }
while (fgets(line, sizeof(line), lvs)) {
- if (sscanf(line, "%s %s %s %s\n", vg, lv, flags, vg_flags) == 4) {
+ if (sscanf(line, "%64s %64s %25s %25s\n", vg, lv, flags, vg_flags) == 4) {
/* States: s:suspended a:active S:dropped snapshot I:invalid snapshot */
if (strlen(vg) == 38 && /* is is a valid UUID ? */
@@ -782,21 +761,23 @@ static void *get_initial_state(char **argv)
memcpy(&uuid[58], &lv[32], 6);
uuid[64] = '\0';
- lock_mode = LCK_READ;
-
/* Look for this lock in the list of EX locks
we were passed on the command-line */
- if (was_ex_lock(uuid, argv))
- lock_mode = LCK_EXCL;
+ lock_mode = (dm_hash_lookup(excl_uuid, uuid)) ?
+ LCK_EXCL : LCK_READ;
DEBUGLOG("getting initial lock for %s\n", uuid);
- hold_lock(uuid, lock_mode, LCKF_NOQUEUE);
+ if (hold_lock(uuid, lock_mode, LCKF_NOQUEUE))
+ DEBUGLOG("Failed to hold lock %s\n", uuid);
}
}
}
if (fclose(lvs))
DEBUGLOG("lvs fclose failed: %s\n", strerror(errno));
- return NULL;
+
+ dm_free(lvs_cmd);
+
+ return 1;
}
static void lvm2_log_fn(int level, const char *file, int line, int dm_errno,
@@ -863,7 +844,7 @@ void lvm_do_backup(const char *vgname)
else
log_error("Error backing up metadata, can't find VG for group %s", vgname);
- free_vg(vg);
+ release_vg(vg);
dm_pool_empty(cmd->mem);
pthread_mutex_unlock(&lvm_lock);
@@ -894,10 +875,26 @@ struct dm_hash_node *get_next_excl_lock(struct dm_hash_node *v, char **name)
return v;
}
+void lvm_do_fs_unlock(void)
+{
+ pthread_mutex_lock(&lvm_lock);
+ DEBUGLOG("Syncing device names\n");
+ fs_unlock();
+ pthread_mutex_unlock(&lvm_lock);
+}
+
/* Called to initialise the LVM context of the daemon */
-int init_clvm(int using_gulm, char **argv)
+int init_clvm(struct dm_hash_table *excl_uuid)
{
- if (!(cmd = create_toolcontext(1, NULL))) {
+ /* Use LOG_DAEMON for syslog messages instead of LOG_USER */
+ init_syslog(LOG_DAEMON);
+ openlog("clvmd", LOG_PID, LOG_DAEMON);
+
+ /* Initialise already held locks */
+ if (!get_initial_state(excl_uuid))
+ log_error("Cannot load initial lock states.");
+
+ if (!(cmd = create_toolcontext(1, NULL, 0, 1))) {
log_error("Failed to allocate command context");
return 0;
}
@@ -907,21 +904,12 @@ int init_clvm(int using_gulm, char **argv)
return 0;
}
- /* Use LOG_DAEMON for syslog messages instead of LOG_USER */
- init_syslog(LOG_DAEMON);
- openlog("clvmd", LOG_PID, LOG_DAEMON);
cmd->cmd_line = "clvmd";
/* Check lvm.conf is setup for cluster-LVM */
check_config();
init_ignore_suspended_devices(1);
- /* Remove any non-LV locks that may have been left around */
- if (using_gulm)
- drop_vg_locks();
-
- get_initial_state(argv);
-
/* Trap log messages so we can pass them back to the user */
init_log_fn(lvm2_log_fn);
memlock_inc_daemon(cmd);
diff --git a/daemons/clvmd/lvm-functions.h b/daemons/clvmd/lvm-functions.h
index 97153d4..565f878 100644
--- a/daemons/clvmd/lvm-functions.h
+++ b/daemons/clvmd/lvm-functions.h
@@ -27,7 +27,7 @@ extern int post_lock_lv(unsigned char lock_cmd, unsigned char lock_flags,
char *resource);
extern int do_check_lvm1(const char *vgname);
extern int do_refresh_cache(void);
-extern int init_clvm(int using_gulm, char **argv);
+extern int init_clvm(struct dm_hash_table *excl_uuid);
extern void destroy_lvm(void);
extern void init_lvhash(void);
extern void destroy_lvhash(void);
@@ -36,5 +36,6 @@ extern char *get_last_lvm_error(void);
extern void do_lock_vg(unsigned char command, unsigned char lock_flags,
char *resource);
extern struct dm_hash_node *get_next_excl_lock(struct dm_hash_node *v, char **name);
+void lvm_do_fs_unlock(void);
#endif
diff --git a/daemons/clvmd/refresh_clvmd.c b/daemons/clvmd/refresh_clvmd.c
index 1e88b5c..28b5625 100644
--- a/daemons/clvmd/refresh_clvmd.c
+++ b/daemons/clvmd/refresh_clvmd.c
@@ -13,6 +13,8 @@
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+/* FIXME Remove duplicated functions from this file. */
+
/*
* Send a command to a running clvmd from the command-line
*/
@@ -45,7 +47,12 @@ static int _clvmd_sock = -1;
static int _open_local_sock(void)
{
int local_socket;
- struct sockaddr_un sockaddr;
+ struct sockaddr_un sockaddr = { .sun_family = AF_UNIX };
+
+ if (!dm_strncpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(sockaddr.sun_path))) {
+ fprintf(stderr, "%s: clvmd socket name too long.", CLVMD_SOCKNAME);
+ return -1;
+ }
/* Open local socket */
if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
@@ -53,11 +60,6 @@ static int _open_local_sock(void)
return -1;
}
- memset(&sockaddr, 0, sizeof(sockaddr));
- memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
-
- sockaddr.sun_family = AF_UNIX;
-
if (connect(local_socket,(struct sockaddr *) &sockaddr,
sizeof(sockaddr))) {
int saved_errno = errno;
@@ -80,7 +82,7 @@ static int _send_request(const char *inbuf, int inlen, char **retbuf, int no_res
char outbuf[PIPE_BUF];
struct clvm_header *outheader = (struct clvm_header *) outbuf;
int len;
- int off;
+ unsigned off;
int buflen;
int err;
@@ -149,29 +151,31 @@ static int _send_request(const char *inbuf, int inlen, char **retbuf, int no_res
/* Build the structure header and parse-out wildcard node names */
static void _build_header(struct clvm_header *head, int cmd, const char *node,
- int len)
+ unsigned int len)
{
head->cmd = cmd;
head->status = 0;
head->flags = 0;
+ head->xid = 0;
head->clientid = 0;
- head->arglen = len;
-
- if (node) {
- /*
- * Allow a couple of special node names:
- * "*" for all nodes,
- * "." for the local node only
- */
- if (strcmp(node, "*") == 0) {
- head->node[0] = '\0';
- } else if (strcmp(node, ".") == 0) {
- head->node[0] = '\0';
- head->flags = CLVMD_FLAG_LOCAL;
- } else
- strcpy(head->node, node);
- } else
+ if (len)
+ /* 1 byte is used from struct clvm_header.args[1], so -> len - 1 */
+ head->arglen = len - 1;
+ else {
+ head->arglen = 0;
+ *head->args = '\0';
+ }
+
+ /*
+ * Translate special node names.
+ */
+ if (!node || !strcmp(node, NODE_ALL))
+ head->node[0] = '\0';
+ else if (!strcmp(node, NODE_LOCAL)) {
head->node[0] = '\0';
+ head->flags = CLVMD_FLAG_LOCAL;
+ } else
+ strcpy(head->node, node);
}
/*
@@ -198,7 +202,8 @@ static int _cluster_request(char cmd, const char *node, void *data, int len,
return 0;
_build_header(head, cmd, node, len);
- memcpy(head->node + strlen(head->node) + 1, data, len);
+ if (len)
+ memcpy(head->node + strlen(head->node) + 1, data, len);
status = _send_request(outbuf, sizeof(struct clvm_header) +
strlen(head->node) + len, &retbuf, no_response);
@@ -290,7 +295,7 @@ int refresh_clvmd(int all_nodes)
int status;
int i;
- status = _cluster_request(CLVMD_CMD_REFRESH, all_nodes?"*":".", args, 0, &response, &num_responses, 0);
+ status = _cluster_request(CLVMD_CMD_REFRESH, all_nodes ? NODE_ALL : NODE_LOCAL, args, 0, &response, &num_responses, 0);
/* If any nodes were down then display them and return an error */
for (i = 0; i < num_responses; i++) {
@@ -321,7 +326,7 @@ int restart_clvmd(int all_nodes)
{
int dummy, status;
- status = _cluster_request(CLVMD_CMD_RESTART, all_nodes?"*":".", NULL, 0, NULL, &dummy, 1);
+ status = _cluster_request(CLVMD_CMD_RESTART, all_nodes ? NODE_ALL : NODE_LOCAL, NULL, 0, NULL, &dummy, 1);
/*
* FIXME: we cannot receive response, clvmd re-exec before it.
@@ -348,9 +353,9 @@ int debug_clvmd(int level, int clusterwide)
args[0] = level;
if (clusterwide)
- nodes = "*";
+ nodes = NODE_ALL;
else
- nodes = ".";
+ nodes = NODE_LOCAL;
status = _cluster_request(CLVMD_CMD_SET_DEBUG, nodes, args, 1, &response, &num_responses, 0);
diff --git a/daemons/clvmd/tcp-comms.c b/daemons/clvmd/tcp-comms.c
deleted file mode 100644
index 5f86556..0000000
--- a/daemons/clvmd/tcp-comms.c
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- * Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
- *
- * This file is part of LVM2.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU Lesser General Public License v.2.1.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/*
- * This provides the inter-clvmd communications for a system without CMAN.
- * There is a listening TCP socket which accepts new connections in the
- * normal way.
- * It can also make outgoing connnections to the other clvmd nodes.
- */
-
-#include "clvmd-common.h"
-
-#include <pthread.h>
-#include <sys/utsname.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <stdint.h>
-#include <fcntl.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <errno.h>
-#include <syslog.h>
-#include <netdb.h>
-#include <assert.h>
-
-#include "clvm.h"
-#include "clvmd-comms.h"
-#include "clvmd.h"
-#include "clvmd-gulm.h"
-
-#define DEFAULT_TCP_PORT 21064
-
-static int listen_fd = -1;
-static int tcp_port;
-struct dm_hash_table *sock_hash;
-
-static int get_our_ip_address(char *addr, int *family);
-static int read_from_tcpsock(struct local_client *fd, char *buf, int len, char *csid,
- struct local_client **new_client);
-
-/* Called by init_cluster() to open up the listening socket */
-int init_comms(unsigned short port)
-{
- struct sockaddr_in6 addr;
-
- sock_hash = dm_hash_create(100);
- tcp_port = port ? : DEFAULT_TCP_PORT;
-
- listen_fd = socket(AF_INET6, SOCK_STREAM, 0);
-
- if (listen_fd < 0)
- {
- return -1;
- }
- else
- {
- int one = 1;
- setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(int));
- setsockopt(listen_fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int));
- }
-
- memset(&addr, 0, sizeof(addr)); // Bind to INADDR_ANY
- addr.sin6_family = AF_INET6;
- addr.sin6_port = htons(tcp_port);
-
- if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0)
- {
- DEBUGLOG("Can't bind to port: %s\n", strerror(errno));
- syslog(LOG_ERR, "Can't bind to port %d, is clvmd already running ?", tcp_port);
- close(listen_fd);
- return -1;
- }
-
- listen(listen_fd, 5);
-
- /* Set Close-on-exec */
- fcntl(listen_fd, F_SETFD, 1);
-
- return 0;
-}
-
-void tcp_remove_client(const char *c_csid)
-{
- struct local_client *client;
- char csid[GULM_MAX_CSID_LEN];
- unsigned int i;
- memcpy(csid, c_csid, sizeof csid);
- DEBUGLOG("tcp_remove_client\n");
-
- /* Don't actually close the socket here - that's the
- job of clvmd.c whch will do the job when it notices the
- other end has gone. We just need to remove the client(s) from
- the hash table so we don't try to use it for sending any more */
- for (i = 0; i < 2; i++)
- {
- client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
- if (client)
- {
- dm_hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
- client->removeme = 1;
- close(client->fd);
- }
- /* Look for a mangled one too, on the 2nd iteration. */
- csid[0] ^= 0x80;
- }
-}
-
-int alloc_client(int fd, const char *c_csid, struct local_client **new_client)
-{
- struct local_client *client;
- char csid[GULM_MAX_CSID_LEN];
- memcpy(csid, c_csid, sizeof csid);
-
- DEBUGLOG("alloc_client %d csid = %s\n", fd, print_csid(csid));
-
- /* Create a local_client and return it */
- client = malloc(sizeof(struct local_client));
- if (!client)
- {
- DEBUGLOG("malloc failed\n");
- return -1;
- }
-
- memset(client, 0, sizeof(struct local_client));
- client->fd = fd;
- client->type = CLUSTER_DATA_SOCK;
- client->callback = read_from_tcpsock;
- if (new_client)
- *new_client = client;
-
- /* Add to our list of node sockets */
- if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN))
- {
- DEBUGLOG("alloc_client mangling CSID for second connection\n");
- /* This is a duplicate connection but we can't close it because
- the other end may already have started sending.
- So, we mangle the IP address and keep it, all sending will
- go out of the main FD
- */
- csid[0] ^= 0x80;
- client->bits.net.flags = 1; /* indicate mangled CSID */
-
- /* If it still exists then kill the connection as we should only
- ever have one incoming connection from each node */
- if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN))
- {
- DEBUGLOG("Multiple incoming connections from node\n");
- syslog(LOG_ERR, " Bogus incoming connection from %d.%d.%d.%d\n", csid[0],csid[1],csid[2],csid[3]);
-
- free(client);
- errno = ECONNREFUSED;
- return -1;
- }
- }
- dm_hash_insert_binary(sock_hash, csid, GULM_MAX_CSID_LEN, client);
-
- return 0;
-}
-
-int get_main_gulm_cluster_fd()
-{
- return listen_fd;
-}
-
-
-/* Read on main comms (listen) socket, accept it */
-int cluster_fd_gulm_callback(struct local_client *fd, char *buf, int len, const char *csid,
- struct local_client **new_client)
-{
- int newfd;
- struct sockaddr_in6 addr;
- socklen_t addrlen = sizeof(addr);
- int status;
- char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN];
-
- DEBUGLOG("cluster_fd_callback\n");
- *new_client = NULL;
- newfd = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
-
- DEBUGLOG("cluster_fd_callback, newfd=%d (errno=%d)\n", newfd, errno);
- if (!newfd)
- {
- syslog(LOG_ERR, "error in accept: %m");
- errno = EAGAIN;
- return -1; /* Don't return an error or clvmd will close the listening FD */
- }
-
- /* Check that the client is a member of the cluster
- and reject if not.
- */
- if (gulm_name_from_csid((char *)&addr.sin6_addr, name) < 0)
- {
- syslog(LOG_ERR, "Got connect from non-cluster node %s\n",
- print_csid((char *)&addr.sin6_addr));
- DEBUGLOG("Got connect from non-cluster node %s\n",
- print_csid((char *)&addr.sin6_addr));
- close(newfd);
-
- errno = EAGAIN;
- return -1;
- }
-
- status = alloc_client(newfd, (char *)&addr.sin6_addr, new_client);
- if (status)
- {
- DEBUGLOG("cluster_fd_callback, alloc_client failed, status = %d\n", status);
- close(newfd);
- /* See above... */
- errno = EAGAIN;
- return -1;
- }
- DEBUGLOG("cluster_fd_callback, returning %d, %p\n", newfd, *new_client);
- return newfd;
-}
-
-/* Try to get at least 'len' bytes from the socket */
-static int really_read(int fd, char *buf, int len)
-{
- int got, offset;
-
- got = offset = 0;
-
- do {
- got = read(fd, buf+offset, len-offset);
- DEBUGLOG("really_read. got %d bytes\n", got);
- offset += got;
- } while (got > 0 && offset < len);
-
- if (got < 0)
- return got;
- else
- return offset;
-}
-
-
-static int read_from_tcpsock(struct local_client *client, char *buf, int len, char *csid,
- struct local_client **new_client)
-{
- struct sockaddr_in6 addr;
- socklen_t slen = sizeof(addr);
- struct clvm_header *header = (struct clvm_header *)buf;
- int status;
- uint32_t arglen;
-
- DEBUGLOG("read_from_tcpsock fd %d\n", client->fd);
- *new_client = NULL;
-
- /* Get "csid" */
- getpeername(client->fd, (struct sockaddr *)&addr, &slen);
- memcpy(csid, &addr.sin6_addr, GULM_MAX_CSID_LEN);
-
- /* Read just the header first, then get the rest if there is any.
- * Stream sockets, sigh.
- */
- status = really_read(client->fd, buf, sizeof(struct clvm_header));
- if (status > 0)
- {
- int status2;
-
- arglen = ntohl(header->arglen);
-
- /* Get the rest */
- if (arglen && arglen < GULM_MAX_CLUSTER_MESSAGE)
- {
- status2 = really_read(client->fd, buf+status, arglen);
- if (status2 > 0)
- status += status2;
- else
- status = status2;
- }
- }
-
- DEBUGLOG("read_from_tcpsock, status = %d(errno = %d)\n", status, errno);
-
- /* Remove it from the hash table if there's an error, clvmd will
- remove the socket from its lists and free the client struct */
- if (status == 0 ||
- (status < 0 && errno != EAGAIN && errno != EINTR))
- {
- char remcsid[GULM_MAX_CSID_LEN];
-
- memcpy(remcsid, csid, GULM_MAX_CSID_LEN);
- close(client->fd);
-
- /* If the csid was mangled, then make sure we remove the right entry */
- if (client->bits.net.flags)
- remcsid[0] ^= 0x80;
- dm_hash_remove_binary(sock_hash, remcsid, GULM_MAX_CSID_LEN);
-
- /* Tell cluster manager layer */
- add_down_node(remcsid);
- }
- else {
- gulm_add_up_node(csid);
- /* Send it back to clvmd */
- process_message(client, buf, status, csid);
- }
- return status;
-}
-
-int gulm_connect_csid(const char *csid, struct local_client **newclient)
-{
- int fd;
- struct sockaddr_in6 addr;
- int status;
- int one = 1;
-
- DEBUGLOG("Connecting socket\n");
- fd = socket(PF_INET6, SOCK_STREAM, 0);
-
- if (fd < 0)
- {
- syslog(LOG_ERR, "Unable to create new socket: %m");
- return -1;
- }
-
- addr.sin6_family = AF_INET6;
- memcpy(&addr.sin6_addr, csid, GULM_MAX_CSID_LEN);
- addr.sin6_port = htons(tcp_port);
-
- DEBUGLOG("Connecting socket %d\n", fd);
- if (connect(fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_in6)) < 0)
- {
- /* "Connection refused" is "normal" because clvmd may not yet be running
- * on that node.
- */
- if (errno != ECONNREFUSED)
- {
- syslog(LOG_ERR, "Unable to connect to remote node: %m");
- }
- DEBUGLOG("Unable to connect to remote node: %s\n", strerror(errno));
- close(fd);
- return -1;
- }
-
- /* Set Close-on-exec */
- fcntl(fd, F_SETFD, 1);
- setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int));
-
- status = alloc_client(fd, csid, newclient);
- if (status)
- close(fd);
- else
- add_client(*newclient);
-
- /* If we can connect to it, it must be running a clvmd */
- gulm_add_up_node(csid);
- return status;
-}
-
-/* Send a message to a known CSID */
-static int tcp_send_message(void *buf, int msglen, const char *csid, const char *errtext)
-{
- int status;
- struct local_client *client;
- char ourcsid[GULM_MAX_CSID_LEN];
-
- assert(csid);
-
- DEBUGLOG("tcp_send_message, csid = %s, msglen = %d\n", print_csid(csid), msglen);
-
- /* Don't connect to ourself */
- get_our_gulm_csid(ourcsid);
- if (memcmp(csid, ourcsid, GULM_MAX_CSID_LEN) == 0)
- return msglen;
-
- client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
- if (!client)
- {
- status = gulm_connect_csid(csid, &client);
- if (status)
- return -1;
- }
- DEBUGLOG("tcp_send_message, fd = %d\n", client->fd);
-
- return write(client->fd, buf, msglen);
-}
-
-
-int gulm_cluster_send_message(void *buf, int msglen, const char *csid, const char *errtext)
-{
- int status=0;
-
- DEBUGLOG("cluster send message, csid = %p, msglen = %d\n", csid, msglen);
-
- /* If csid is NULL then send to all known (not just connected) nodes */
- if (!csid)
- {
- void *context = NULL;
- char loop_csid[GULM_MAX_CSID_LEN];
-
- /* Loop round all gulm-known nodes */
- while (get_next_node_csid(&context, loop_csid))
- {
- status = tcp_send_message(buf, msglen, loop_csid, errtext);
- if (status == 0 ||
- (status < 0 && (errno == EAGAIN || errno == EINTR)))
- break;
- }
- }
- else
- {
-
- status = tcp_send_message(buf, msglen, csid, errtext);
- }
- return status;
-}
-
-/* To get our own IP address we get the locally bound address of the
- socket that's talking to GULM in the assumption(eek) that it will
- be on the "right" network in a multi-homed system */
-static int get_our_ip_address(char *addr, int *family)
-{
- struct utsname info;
-
- uname(&info);
- get_ip_address(info.nodename, addr);
-
- return 0;
-}
-
-/* Public version of above for those that don't care what protocol
- we're using */
-void get_our_gulm_csid(char *csid)
-{
- static char our_csid[GULM_MAX_CSID_LEN];
- static int got_csid = 0;
-
- if (!got_csid)
- {
- int family;
-
- memset(our_csid, 0, sizeof(our_csid));
- if (get_our_ip_address(our_csid, &family))
- {
- got_csid = 1;
- }
- }
- memcpy(csid, our_csid, GULM_MAX_CSID_LEN);
-}
-
-static void map_v4_to_v6(struct in_addr *ip4, struct in6_addr *ip6)
-{
- ip6->s6_addr32[0] = 0;
- ip6->s6_addr32[1] = 0;
- ip6->s6_addr32[2] = htonl(0xffff);
- ip6->s6_addr32[3] = ip4->s_addr;
-}
-
-/* Get someone else's IP address from DNS */
-int get_ip_address(const char *node, char *addr)
-{
- struct hostent *he;
-
- memset(addr, 0, GULM_MAX_CSID_LEN);
-
- // TODO: what do we do about multi-homed hosts ???
- // CCSs ip_interfaces solved this but some bugger removed it.
-
- /* Try IPv6 first. The man page for gethostbyname implies that
- it will lookup ip6 & ip4 names, but it seems not to */
- he = gethostbyname2(node, AF_INET6);
- if (he)
- {
- memcpy(addr, he->h_addr_list[0],
- he->h_length);
- }
- else
- {
- he = gethostbyname2(node, AF_INET);
- if (!he)
- return -1;
- map_v4_to_v6((struct in_addr *)he->h_addr_list[0], (struct in6_addr *)addr);
- }
-
- return 0;
-}
-
-char *print_csid(const char *csid)
-{
- static char buf[128];
- int *icsid = (int *)csid;
-
- sprintf(buf, "[%x.%x.%x.%x]",
- icsid[0],icsid[1],icsid[2],icsid[3]);
-
- return buf;
-}
diff --git a/daemons/clvmd/tcp-comms.h b/daemons/clvmd/tcp-comms.h
deleted file mode 100644
index 9260e12..0000000
--- a/daemons/clvmd/tcp-comms.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <netinet/in.h>
-
-#define GULM_MAX_CLUSTER_MESSAGE 1600
-#define GULM_MAX_CSID_LEN sizeof(struct in6_addr)
-#define GULM_MAX_CLUSTER_MEMBER_NAME_LEN 128
-
-extern int init_comms(unsigned short);
-extern char *print_csid(const char *);
-int get_main_gulm_cluster_fd(void);
-int cluster_fd_gulm_callback(struct local_client *fd, char *buf, int len, const char *csid, struct local_client **new_client);
-int gulm_cluster_send_message(void *buf, int msglen, const char *csid, const char *errtext);
-void get_our_gulm_csid(char *csid);
-int gulm_connect_csid(const char *csid, struct local_client **newclient);
diff --git a/daemons/cmirrord/clogd.c b/daemons/cmirrord/clogd.c
index 8d9a7b9..adf7a92 100644
--- a/daemons/cmirrord/clogd.c
+++ b/daemons/cmirrord/clogd.c
@@ -121,7 +121,8 @@ static void process_signals(void)
static void remove_lockfile(void)
{
- unlink(CMIRRORD_PIDFILE);
+ if (unlink(CMIRRORD_PIDFILE))
+ LOG_ERROR("Unable to remove \"" CMIRRORD_PIDFILE "\" %s", strerror(errno));
}
/*
@@ -133,6 +134,12 @@ static void daemonize(void)
{
int pid;
int status;
+ int devnull;
+
+ if ((devnull = open("/dev/null", O_RDWR)) == -1) {
+ LOG_ERROR("Can't open /dev/null: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
signal(SIGTERM, &parent_exit_handler);
@@ -178,13 +185,22 @@ static void daemonize(void)
}
setsid();
- chdir("/");
+ if (chdir("/")) {
+ LOG_ERROR("Failed to chdir /: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
umask(0);
- close(0); close(1); close(2);
- open("/dev/null", O_RDONLY); /* reopen stdin */
- open("/dev/null", O_WRONLY); /* reopen stdout */
- open("/dev/null", O_WRONLY); /* reopen stderr */
+ if (close(0) || close(1) || close(2)) {
+ LOG_ERROR("Failed to close terminal FDs");
+ exit(EXIT_FAILURE);
+ }
+
+ if ((dup2(devnull, 0) < 0) || /* reopen stdin */
+ (dup2(devnull, 1) < 0) || /* reopen stdout */
+ (dup2(devnull, 2) < 0)) /* reopen stderr */
+ exit(EXIT_FAILURE);
LOG_OPEN("cmirrord", LOG_PID, LOG_DAEMON);
diff --git a/daemons/cmirrord/cluster.c b/daemons/cmirrord/cluster.c
index b6c925a..70c76c3 100644
--- a/daemons/cmirrord/cluster.c
+++ b/daemons/cmirrord/cluster.c
@@ -20,10 +20,12 @@
#include <corosync/cpg.h>
#include <errno.h>
-#include <openais/saAis.h>
-#include <openais/saCkpt.h>
#include <signal.h>
#include <unistd.h>
+#if CMIRROR_HAS_CHECKPOINT
+#include <openais/saAis.h>
+#include <openais/saCkpt.h>
+#endif
/* Open AIS error codes */
#define str_ais_error(x) \
@@ -62,13 +64,13 @@
RQ_TYPE((x) & ~DM_ULOG_RESPONSE)
static uint32_t my_cluster_id = 0xDEAD;
+#if CMIRROR_HAS_CHECKPOINT
static SaCkptHandleT ckpt_handle = 0;
static SaCkptCallbacksT callbacks = { 0, 0 };
static SaVersionT version = { 'B', 1, 1 };
+#endif
#define DEBUGGING_HISTORY 100
-//static char debugging[DEBUGGING_HISTORY][128];
-//static int idx = 0;
#define LOG_SPRINT(cc, f, arg...) do { \
cc->idx++; \
cc->idx = cc->idx % DEBUGGING_HISTORY; \
@@ -77,6 +79,7 @@ static SaVersionT version = { 'B', 1, 1 };
static int log_resp_rec = 0;
+#define RECOVERING_REGION_SECTION_SIZE 64
struct checkpoint_data {
uint32_t requester;
char uuid[CPG_MAX_NAME_LENGTH];
@@ -86,7 +89,7 @@ struct checkpoint_data {
char *clean_bits;
char *recovering_region;
struct checkpoint_data *next;
-};
+};
#define INVALID 0
#define VALID 1
@@ -128,7 +131,6 @@ static struct dm_list clog_cpg_list;
int cluster_send(struct clog_request *rq)
{
int r;
- int count=0;
int found = 0;
struct iovec iov;
struct clog_cpg *entry;
@@ -165,7 +167,10 @@ int cluster_send(struct clog_request *rq)
if (entry->cpg_state != VALID)
return -EINVAL;
+#if CMIRROR_HAS_CHECKPOINT
do {
+ int count = 0;
+
r = cpg_mcast_joined(entry->handle, CPG_TYPE_AGREED, &iov, 1);
if (r != SA_AIS_ERR_TRY_AGAIN)
break;
@@ -189,12 +194,14 @@ int cluster_send(struct clog_request *rq)
str_ais_error(r));
usleep(1000);
} while (1);
-
- if (r == CPG_OK)
+#else
+ r = cpg_mcast_joined(entry->handle, CPG_TYPE_AGREED, &iov, 1);
+#endif
+ if (r == CS_OK)
return 0;
/* error codes found in openais/cpg.h */
- LOG_ERROR("cpg_mcast_joined error: %s", str_ais_error(r));
+ LOG_ERROR("cpg_mcast_joined error: %d", r);
rq->u_rq.error = -EBADE;
return -EBADE;
@@ -419,6 +426,7 @@ static void free_checkpoint(struct checkpoint_data *cp)
free(cp);
}
+#if CMIRROR_HAS_CHECKPOINT
static int export_checkpoint(struct checkpoint_data *cp)
{
SaCkptCheckpointCreationAttributesT attr;
@@ -587,7 +595,54 @@ rr_create_retry:
return 0;
}
-static int import_checkpoint(struct clog_cpg *entry, int no_read)
+#else
+static int export_checkpoint(struct checkpoint_data *cp)
+{
+ int r, rq_size;
+ struct clog_request *rq;
+
+ rq_size = sizeof(*rq);
+ rq_size += RECOVERING_REGION_SECTION_SIZE;
+ rq_size += cp->bitmap_size * 2; /* clean|sync_bits */
+
+ rq = malloc(rq_size);
+ if (!rq) {
+ LOG_ERROR("export_checkpoint: "
+ "Unable to allocate transfer structs");
+ return -ENOMEM;
+ }
+ memset(rq, 0, rq_size);
+
+ dm_list_init(&rq->u.list);
+ rq->u_rq.request_type = DM_ULOG_CHECKPOINT_READY;
+ rq->originator = cp->requester;
+ strncpy(rq->u_rq.uuid, cp->uuid, CPG_MAX_NAME_LENGTH);
+ rq->u_rq.seq = my_cluster_id;
+ rq->u_rq.data_size = rq_size - sizeof(*rq);
+
+ /* Sync bits */
+ memcpy(rq->u_rq.data, cp->sync_bits, cp->bitmap_size);
+
+ /* Clean bits */
+ memcpy(rq->u_rq.data + cp->bitmap_size, cp->clean_bits, cp->bitmap_size);
+
+ /* Recovering region */
+ memcpy(rq->u_rq.data + (cp->bitmap_size * 2), cp->recovering_region,
+ strlen(cp->recovering_region));
+
+ r = cluster_send(rq);
+ if (r)
+ LOG_ERROR("Failed to send checkpoint ready notice: %s",
+ strerror(-r));
+
+ free(rq);
+ return 0;
+}
+#endif /* CMIRROR_HAS_CHECKPOINT */
+
+#if CMIRROR_HAS_CHECKPOINT
+static int import_checkpoint(struct clog_cpg *entry, int no_read,
+ struct clog_request *rq __attribute__((unused)))
{
int rtn = 0;
SaCkptCheckpointHandleT h;
@@ -619,6 +674,7 @@ open_retry:
if (rv != SA_AIS_OK) {
LOG_ERROR("[%s] Failed to open checkpoint: %s",
SHORT_UUID(entry->name.value), str_ais_error(rv));
+ free(bitmap);
return -EIO; /* FIXME: better error */
}
@@ -647,6 +703,7 @@ init_retry:
if (rv != SA_AIS_OK) {
LOG_ERROR("[%s] Sync checkpoint section creation failed: %s",
SHORT_UUID(entry->name.value), str_ais_error(rv));
+ free(bitmap);
return -EIO; /* FIXME: better error */
}
@@ -740,6 +797,32 @@ no_read:
return rtn;
}
+#else
+static int import_checkpoint(struct clog_cpg *entry, int no_read,
+ struct clog_request *rq)
+{
+ int bitmap_size;
+
+ bitmap_size = (rq->u_rq.data_size - RECOVERING_REGION_SECTION_SIZE) / 2;
+ if (bitmap_size < 0) {
+ LOG_ERROR("Checkpoint has invalid payload size.");
+ return -EINVAL;
+ }
+
+ if (pull_state(entry->name.value, entry->luid, "sync_bits",
+ rq->u_rq.data, bitmap_size) ||
+ pull_state(entry->name.value, entry->luid, "clean_bits",
+ rq->u_rq.data + bitmap_size, bitmap_size) ||
+ pull_state(entry->name.value, entry->luid, "recovering_region",
+ rq->u_rq.data + (bitmap_size * 2),
+ RECOVERING_REGION_SECTION_SIZE)) {
+ LOG_ERROR("Error loading bitmap state from checkpoint.");
+ return -EIO;
+ }
+ return 0;
+}
+#endif /* CMIRROR_HAS_CHECKPOINT */
+
static void do_checkpoints(struct clog_cpg *entry, int leaving)
{
struct checkpoint_data *cp;
@@ -827,10 +910,11 @@ static int resend_requests(struct clog_cpg *entry)
rq->u_rq.seq);
rq->u_rq.data_size = 0;
- kernel_send(&rq->u_rq);
-
+ if (kernel_send(&rq->u_rq))
+ LOG_ERROR("Failed to respond to kernel [%s]",
+ RQ_TYPE(rq->u_rq.request_type));
break;
-
+
default:
/*
* If an action or a response is required, then
@@ -857,13 +941,13 @@ static int resend_requests(struct clog_cpg *entry)
static int do_cluster_work(void *data __attribute__((unused)))
{
- int r = SA_AIS_OK;
+ int r = CS_OK;
struct clog_cpg *entry, *tmp;
dm_list_iterate_items_safe(entry, tmp, &clog_cpg_list) {
- r = cpg_dispatch(entry->handle, CPG_DISPATCH_ALL);
- if (r != SA_AIS_OK)
- LOG_ERROR("cpg_dispatch failed: %s", str_ais_error(r));
+ r = cpg_dispatch(entry->handle, CS_DISPATCH_ALL);
+ if (r != CS_OK)
+ LOG_ERROR("cpg_dispatch failed: %d", r);
if (entry->free_me) {
free(entry);
@@ -874,7 +958,7 @@ static int do_cluster_work(void *data __attribute__((unused)))
resend_requests(entry);
}
- return (r == SA_AIS_OK) ? 0 : -1; /* FIXME: good error number? */
+ return (r == CS_OK) ? 0 : -1; /* FIXME: good error number? */
}
static int flush_startup_list(struct clog_cpg *entry)
@@ -939,16 +1023,19 @@ static void cpg_message_callback(cpg_handle_t handle, const struct cpg_name *gna
struct clog_request *tmp_rq;
struct clog_cpg *match;
- if (clog_request_from_network(rq, msg_len) < 0)
- /* Error message comes from 'clog_request_from_network' */
- return;
-
match = find_clog_cpg(handle);
if (!match) {
LOG_ERROR("Unable to find clog_cpg for cluster message");
return;
}
+ /*
+ * Perform necessary endian and version compatibility conversions
+ */
+ if (clog_request_from_network(rq, msg_len) < 0)
+ /* Any error messages come from 'clog_request_from_network' */
+ return;
+
if ((nodeid == my_cluster_id) &&
!(rq->u_rq.request_type & DM_ULOG_RESPONSE) &&
(rq->u_rq.request_type != DM_ULOG_RESUME) &&
@@ -967,7 +1054,7 @@ static void cpg_message_callback(cpg_handle_t handle, const struct cpg_name *gna
}
memcpy(tmp_rq, rq, sizeof(*rq) + rq->u_rq.data_size);
dm_list_init(&tmp_rq->u.list);
- dm_list_add( &match->working_list, &tmp_rq->u.list);
+ dm_list_add(&match->working_list, &tmp_rq->u.list);
}
if (rq->u_rq.request_type == DM_ULOG_POSTSUSPEND) {
@@ -1020,7 +1107,8 @@ static void cpg_message_callback(cpg_handle_t handle, const struct cpg_name *gna
/* Redundant checkpoints ignored if match->valid */
LOG_SPRINT(match, "[%s] CHECKPOINT_READY notification from %u",
SHORT_UUID(rq->u_rq.uuid), nodeid);
- if (import_checkpoint(match, (match->state != INVALID))) {
+ if (import_checkpoint(match,
+ (match->state != INVALID), rq)) {
LOG_SPRINT(match,
"[%s] Failed to import checkpoint from %u",
SHORT_UUID(rq->u_rq.uuid), nodeid);
@@ -1144,11 +1232,11 @@ out:
_RQ_TYPE(rq->u_rq.request_type),
rq->originator, (response) ? "YES" : "NO");
else
- LOG_SPRINT(match, "SEQ#=%u, UUID=%s, TYPE=%s, ORIG=%u, RESP=%s, RSPR=%u",
+ LOG_SPRINT(match, "SEQ#=%u, UUID=%s, TYPE=%s, ORIG=%u, RESP=%s, RSPR=%u, error=%d",
rq->u_rq.seq, SHORT_UUID(rq->u_rq.uuid),
_RQ_TYPE(rq->u_rq.request_type),
rq->originator, (response) ? "YES" : "NO",
- nodeid);
+ nodeid, rq->u_rq.error);
}
}
@@ -1161,7 +1249,7 @@ static void cpg_join_callback(struct clog_cpg *match,
uint32_t my_pid = (uint32_t)getpid();
uint32_t lowest = match->lowest_id;
struct clog_request *rq;
- char dbuf[32];
+ char dbuf[32] = { 0 };
/* Assign my_cluster_id */
if ((my_cluster_id == 0xDEAD) && (joined->pid == my_pid))
@@ -1177,7 +1265,6 @@ static void cpg_join_callback(struct clog_cpg *match,
if (joined->nodeid == my_cluster_id)
goto out;
- memset(dbuf, 0, sizeof(dbuf));
for (i = 0; i < member_list_entries - 1; i++)
sprintf(dbuf+strlen(dbuf), "%u-", member_list[i].nodeid);
sprintf(dbuf+strlen(dbuf), "(%u)", joined->nodeid);
@@ -1259,7 +1346,9 @@ static void cpg_leave_callback(struct clog_cpg *match,
dm_list_del(&rq->u.list);
if (rq->u_rq.request_type == DM_ULOG_POSTSUSPEND)
- kernel_send(&rq->u_rq);
+ if (kernel_send(&rq->u_rq))
+ LOG_ERROR("Failed to respond to kernel [%s]",
+ RQ_TYPE(rq->u_rq.request_type));
free(rq);
}
@@ -1268,7 +1357,7 @@ static void cpg_leave_callback(struct clog_cpg *match,
match->free_me = 1;
match->lowest_id = 0xDEAD;
match->state = INVALID;
- }
+ }
/* Remove any pending checkpoints for the leaving node. */
for (p_cp = NULL, c_cp = match->checkpoint_list;
@@ -1324,7 +1413,7 @@ static void cpg_leave_callback(struct clog_cpg *match,
left->nodeid);
return;
}
-
+
match->lowest_id = member_list[0].nodeid;
for (i = 0; i < member_list_entries; i++)
if (match->lowest_id > member_list[i].nodeid)
@@ -1413,6 +1502,7 @@ cpg_callbacks_t cpg_callbacks = {
*/
static int remove_checkpoint(struct clog_cpg *entry)
{
+#if CMIRROR_HAS_CHECKPOINT
int len;
SaNameT name;
SaAisErrorT rv;
@@ -1442,7 +1532,7 @@ unlink_retry:
usleep(1000);
goto unlink_retry;
}
-
+
if (rv != SA_AIS_OK) {
LOG_ERROR("[%s] Failed to unlink checkpoint: %s",
SHORT_UUID(entry->name.value), str_ais_error(rv));
@@ -1452,6 +1542,10 @@ unlink_retry:
saCkptCheckpointClose(h);
return 1;
+#else
+ /* No checkpoint to remove, so 'success' */
+ return 1;
+#endif
}
int create_cluster_cpg(char *uuid, uint64_t luid)
@@ -1493,14 +1587,14 @@ int create_cluster_cpg(char *uuid, uint64_t luid)
SHORT_UUID(new->name.value));
r = cpg_initialize(&new->handle, &cpg_callbacks);
- if (r != SA_AIS_OK) {
+ if (r != CS_OK) {
LOG_ERROR("cpg_initialize failed: Cannot join cluster");
free(new);
return -EPERM;
}
r = cpg_join(new->handle, &new->name);
- if (r != SA_AIS_OK) {
+ if (r != CS_OK) {
LOG_ERROR("cpg_join failed: Cannot join cluster");
free(new);
return -EPERM;
@@ -1541,7 +1635,7 @@ static int _destroy_cluster_cpg(struct clog_cpg *del)
{
int r;
int state;
-
+
LOG_COND(log_resend_requests, "[%s] I am leaving.2.....",
SHORT_UUID(del->name.value));
@@ -1573,7 +1667,7 @@ static int _destroy_cluster_cpg(struct clog_cpg *del)
abort_startup(del);
r = cpg_leave(del->handle, &del->name);
- if (r != CPG_OK)
+ if (r != CS_OK)
LOG_ERROR("Error leaving CPG!");
return 0;
}
@@ -1591,24 +1685,27 @@ int destroy_cluster_cpg(char *uuid)
int init_cluster(void)
{
+#if CMIRROR_HAS_CHECKPOINT
SaAisErrorT rv;
- dm_list_init(&clog_cpg_list);
rv = saCkptInitialize(&ckpt_handle, &callbacks, &version);
if (rv != SA_AIS_OK)
return EXIT_CLUSTER_CKPT_INIT;
-
+#endif
+ dm_list_init(&clog_cpg_list);
return 0;
}
void cleanup_cluster(void)
{
+#if CMIRROR_HAS_CHECKPOINT
SaAisErrorT err;
err = saCkptFinalize(ckpt_handle);
if (err != SA_AIS_OK)
LOG_ERROR("Failed to finalize checkpoint handle");
+#endif
}
void cluster_debug(void)
diff --git a/daemons/cmirrord/functions.c b/daemons/cmirrord/functions.c
index de80793..f6e0918 100644
--- a/daemons/cmirrord/functions.c
+++ b/daemons/cmirrord/functions.c
@@ -235,11 +235,9 @@ static int rw_log(struct log_c *lc, int do_write)
*/
static int read_log(struct log_c *lc)
{
- struct log_header lh;
+ struct log_header lh = { 0 };
size_t bitset_size;
- memset(&lh, 0, sizeof(struct log_header));
-
if (rw_log(lc, 0))
return -EIO; /* Failed disk read */
@@ -329,19 +327,26 @@ static int find_disk_path(char *major_minor_str, char *path_rtn, int *unlink_pat
*/
sprintf(path_rtn, "/dev/mapper/%s", dep->d_name);
- stat(path_rtn, &statbuf);
+ if (stat(path_rtn, &statbuf) < 0) {
+ LOG_DBG("Unable to stat %s", path_rtn);
+ continue;
+ }
if (S_ISBLK(statbuf.st_mode) &&
(major(statbuf.st_rdev) == major) &&
(minor(statbuf.st_rdev) == minor)) {
LOG_DBG(" %s: YES", dep->d_name);
- closedir(dp);
+ if (closedir(dp))
+ LOG_DBG("Unable to closedir /dev/mapper %s",
+ strerror(errno));
return 0;
} else {
LOG_DBG(" %s: NO", dep->d_name);
}
}
- closedir(dp);
+ if (closedir(dp))
+ LOG_DBG("Unable to closedir /dev/mapper %s",
+ strerror(errno));
/* FIXME Find out why this was here and deal with underlying problem. */
LOG_DBG("Path not found for %d/%d", major, minor);
@@ -369,10 +374,10 @@ static int _clog_ctr(char *uuid, uint64_t luid,
enum sync log_sync = DEFAULTSYNC;
uint32_t block_on_error = 0;
- int disk_log = 0;
+ int disk_log;
char disk_path[128];
int unlink_path = 0;
- size_t page_size;
+ long page_size;
int pages;
/* If core log request, then argv[0] will be region_size */
@@ -476,7 +481,12 @@ static int _clog_ctr(char *uuid, uint64_t luid,
lc->sync_count = (log_sync == NOSYNC) ? region_count : 0;
if (disk_log) {
- page_size = sysconf(_SC_PAGESIZE);
+ if ((page_size = sysconf(_SC_PAGESIZE)) < 0) {
+ LOG_ERROR("Unable to read pagesize: %s",
+ strerror(errno));
+ r = errno;
+ goto fail;
+ }
pages = *(lc->clean_bits) / page_size;
pages += *(lc->clean_bits) % page_size ? 1 : 0;
pages += 1; /* for header */
@@ -489,7 +499,10 @@ static int _clog_ctr(char *uuid, uint64_t luid,
goto fail;
}
if (unlink_path)
- unlink(disk_path);
+ if (unlink(disk_path) < 0) {
+ LOG_DBG("Warning: Unable to unlink log device, %s: %s",
+ disk_path, strerror(errno));
+ }
lc->disk_fd = r;
lc->disk_size = pages * page_size;
@@ -586,7 +599,10 @@ static int clog_ctr(struct dm_ulog_request *rq)
/* We join the CPG when we resume */
/* No returning data */
- rq->data_size = 0;
+ if ((rq->version > 1) && !strcmp(argv[0], "clustered-disk"))
+ rq->data_size = sprintf(rq->data, "%s", argv[1]) + 1;
+ else
+ rq->data_size = 0;
if (r) {
LOG_ERROR("Failed to create cluster log (%s)", rq->uuid);
@@ -626,8 +642,9 @@ static int clog_dtr(struct dm_ulog_request *rq)
LOG_DBG("[%s] Cluster log removed", SHORT_UUID(lc->uuid));
dm_list_del(&lc->list);
- if (lc->disk_fd != -1)
- close(lc->disk_fd);
+ if (lc->disk_fd != -1 && close(lc->disk_fd))
+ LOG_ERROR("Failed to close disk log: %s",
+ strerror(errno));
if (lc->disk_buffer)
free(lc->disk_buffer);
dm_free(lc->clean_bits);
@@ -770,7 +787,7 @@ static int clog_resume(struct dm_ulog_request *rq)
else if (lc->disk_nr_regions > lc->region_count)
LOG_DBG("[%s] Mirror has shrunk, updating log bits",
SHORT_UUID(lc->uuid));
- break;
+ break;
case -EINVAL:
LOG_DBG("[%s] (Re)initializing mirror log - resync issued.",
SHORT_UUID(lc->uuid));
@@ -823,7 +840,7 @@ out:
lc->sync_search = 0;
lc->state = LOG_RESUMED;
lc->recovery_halted = 0;
-
+
return rq->error;
}
@@ -1018,7 +1035,7 @@ static int clog_flush(struct dm_ulog_request *rq, int server)
{
int r = 0;
struct log_c *lc = get_log(rq->uuid, rq->luid);
-
+
if (!lc)
return -EINVAL;
@@ -1600,7 +1617,7 @@ out:
rq->data_size = sizeof(*pkg);
- return 0;
+ return 0;
}
@@ -1705,14 +1722,12 @@ int do_request(struct clog_request *rq, int server)
static void print_bits(dm_bitset_t bs, int print)
{
int i, size;
- char outbuf[128];
+ char outbuf[128] = { 0 };
unsigned char *buf = (unsigned char *)(bs + 1);
size = (*bs % 8) ? 1 : 0;
size += (*bs / 8);
- memset(outbuf, 0, sizeof(outbuf));
-
for (i = 0; i < size; i++) {
if (!(i % 16)) {
if (outbuf[0] != '\0') {
@@ -1817,8 +1832,11 @@ int pull_state(const char *uuid, uint64_t luid,
}
if (!strncmp(which, "recovering_region", 17)) {
- sscanf(buf, "%llu %u", (unsigned long long *)&lc->recovering_region,
- &lc->recoverer);
+ if (sscanf(buf, "%llu %u", (unsigned long long *)&lc->recovering_region,
+ &lc->recoverer) != 2) {
+ LOG_ERROR("cannot parse recovering region from: %s", buf);
+ return -EINVAL;
+ }
LOG_SPRINT(lc, "CKPT INIT - SEQ#=X, UUID=%s, nodeid = X:: "
"recovering_region=%llu, recoverer=%u",
SHORT_UUID(lc->uuid),
@@ -1853,7 +1871,7 @@ int pull_state(const char *uuid, uint64_t luid,
LOG_DBG("[%s] loading clean_bits:", SHORT_UUID(lc->uuid));
- print_bits(lc->sync_bits, 0);
+ print_bits(lc->clean_bits, 0);
}
return 0;
diff --git a/daemons/cmirrord/link_mon.c b/daemons/cmirrord/link_mon.c
index 80a98d0..74058f9 100644
--- a/daemons/cmirrord/link_mon.c
+++ b/daemons/cmirrord/link_mon.c
@@ -58,7 +58,7 @@ int links_register(int fd, const char *name, int (*callback)(void *data), void *
free(lc);
return -ENOMEM;
}
-
+
pfds = tmp;
free_pfds = used_pfds + 1;
}
@@ -125,7 +125,7 @@ int links_monitor(void)
for (i = 0; i < used_pfds; i++)
if (pfds[i].revents & POLLIN) {
LOG_DBG("Data ready on %d", pfds[i].fd);
-
+
/* FIXME: Add this back return 1;*/
r++;
}
diff --git a/daemons/cmirrord/local.c b/daemons/cmirrord/local.c
index 9e076c4..500f6dc 100644
--- a/daemons/cmirrord/local.c
+++ b/daemons/cmirrord/local.c
@@ -27,7 +27,7 @@
#define CN_VAL_DM_USERSPACE_LOG 0x1
#endif
-static int cn_fd; /* Connector (netlink) socket fd */
+static int cn_fd = -1; /* Connector (netlink) socket fd */
static char recv_buf[2048];
static char send_buf[2048];
@@ -237,7 +237,6 @@ static int do_local_work(void *data __attribute__((unused)))
case DM_ULOG_GET_REGION_SIZE:
case DM_ULOG_IN_SYNC:
case DM_ULOG_GET_SYNC_COUNT:
- case DM_ULOG_STATUS_INFO:
case DM_ULOG_STATUS_TABLE:
case DM_ULOG_PRESUSPEND:
/* We do not specify ourselves as server here */
@@ -249,7 +248,7 @@ static int do_local_work(void *data __attribute__((unused)))
if (r)
LOG_ERROR("Failed to respond to kernel [%s]",
RQ_TYPE(u_rq->request_type));
-
+
break;
case DM_ULOG_RESUME:
/*
@@ -273,13 +272,16 @@ static int do_local_work(void *data __attribute__((unused)))
case DM_ULOG_MARK_REGION:
case DM_ULOG_GET_RESYNC_WORK:
case DM_ULOG_SET_REGION_SYNC:
+ case DM_ULOG_STATUS_INFO:
case DM_ULOG_IS_REMOTE_RECOVERING:
case DM_ULOG_POSTSUSPEND:
r = cluster_send(rq);
if (r) {
u_rq->data_size = 0;
u_rq->error = r;
- kernel_send(u_rq);
+ if (kernel_send(u_rq))
+ LOG_ERROR("Failed to respond to kernel [%s]",
+ RQ_TYPE(u_rq->request_type));
}
break;
@@ -384,14 +386,18 @@ int init_local(void)
r = bind(cn_fd, (struct sockaddr *) &addr, sizeof(addr));
if (r < 0) {
- close(cn_fd);
+ if (close(cn_fd))
+ LOG_ERROR("Failed to close socket: %s",
+ strerror(errno));
return EXIT_KERNEL_BIND;
}
opt = addr.nl_groups;
r = setsockopt(cn_fd, 270, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt));
if (r) {
- close(cn_fd);
+ if (close(cn_fd))
+ LOG_ERROR("Failed to close socket: %s",
+ strerror(errno));
return EXIT_KERNEL_SETSOCKOPT;
}
@@ -412,5 +418,7 @@ int init_local(void)
void cleanup_local(void)
{
links_unregister(cn_fd);
- close(cn_fd);
+ if (cn_fd >= 0 && close(cn_fd))
+ LOG_ERROR("Failed to close socket: %s",
+ strerror(errno));
}
diff --git a/daemons/dmeventd/.exported_symbols b/daemons/dmeventd/.exported_symbols
index 25690c8..fab74dc 100644
--- a/daemons/dmeventd/.exported_symbols
+++ b/daemons/dmeventd/.exported_symbols
@@ -1,3 +1,4 @@
init_fifos
fini_fifos
daemon_talk
+dm_event_get_version
diff --git a/daemons/dmeventd/Makefile.in b/daemons/dmeventd/Makefile.in
index e99e089..1302a44 100644
--- a/daemons/dmeventd/Makefile.in
+++ b/daemons/dmeventd/Makefile.in
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
#
# This file is part of the device-mapper userspace tools.
#
@@ -60,11 +60,11 @@ LIBS += -ldevmapper
LVMLIBS += -ldevmapper-event $(PTHREAD_LIBS)
dmeventd: $(LIB_SHARED) dmeventd.o
- $(CC) $(CFLAGS) $(LDFLAGS) -L. -o $@ dmeventd.o \
+ $(CC) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) -L. -o $@ dmeventd.o \
$(DL_LIBS) $(LVMLIBS) $(LIBS) -rdynamic
dmeventd.static: $(LIB_STATIC) dmeventd.o $(interfacebuilddir)/libdevmapper.a
- $(CC) $(CFLAGS) $(LDFLAGS) -static -L. -L$(interfacebuilddir) -o $@ \
+ $(CC) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) -static -L. -L$(interfacebuilddir) -o $@ \
dmeventd.o $(DL_LIBS) $(LVMLIBS) $(LIBS) $(STATIC_LIBS)
ifeq ("@PKGCONFIG@", "yes")
@@ -105,4 +105,4 @@ install: install_include install_lib install_dmeventd
install_device-mapper: install_include install_lib install_dmeventd
-DISTCLEAN_TARGETS += libdevmapper-event.pc .exported_symbols_generated
+DISTCLEAN_TARGETS += libdevmapper-event.pc
diff --git a/daemons/dmeventd/dmeventd.c b/daemons/dmeventd/dmeventd.c
index 2b454f9..13148c3 100644
--- a/daemons/dmeventd/dmeventd.c
+++ b/daemons/dmeventd/dmeventd.c
@@ -39,13 +39,27 @@
#include <arpa/inet.h> /* for htonl, ntohl */
#ifdef linux
-# include <malloc.h>
-
-# define OOM_ADJ_FILE "/proc/self/oom_adj"
+/*
+ * Kernel version 2.6.36 and higher has
+ * new OOM killer adjustment interface.
+ */
+# define OOM_ADJ_FILE_OLD "/proc/self/oom_adj"
+# define OOM_ADJ_FILE "/proc/self/oom_score_adj"
/* From linux/oom.h */
+/* Old interface */
# define OOM_DISABLE (-17)
# define OOM_ADJUST_MIN (-16)
+/* New interface */
+# define OOM_SCORE_ADJ_MIN (-1000)
+
+/* Systemd on-demand activation support */
+# define SD_ACTIVATION_ENV_VAR_NAME "SD_ACTIVATION"
+# define SD_LISTEN_PID_ENV_VAR_NAME "LISTEN_PID"
+# define SD_LISTEN_FDS_ENV_VAR_NAME "LISTEN_FDS"
+# define SD_LISTEN_FDS_START 3
+# define SD_FD_FIFO_SERVER SD_LISTEN_FDS_START
+# define SD_FD_FIFO_CLIENT (SD_LISTEN_FDS_START + 1)
#endif
@@ -95,9 +109,8 @@ static pthread_mutex_t _global_mutex;
#define THREAD_STACK_SIZE (300*1024)
-#define DEBUGLOG(fmt, args...) _debuglog(fmt, ## args)
-
int dmeventd_debug = 0;
+static int _systemd_activation = 0;
static int _foreground = 0;
static int _restart = 0;
static char **_initial_registrations = 0;
@@ -203,24 +216,6 @@ static DM_LIST_INIT(_timeout_registry);
static pthread_mutex_t _timeout_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t _timeout_cond = PTHREAD_COND_INITIALIZER;
-static void _debuglog(const char *fmt, ...)
-{
- time_t P;
- va_list ap;
-
- if (!_foreground)
- return;
-
- va_start(ap,fmt);
-
- time(&P);
- fprintf(stderr, "dmeventd[%p]: %.15s ", (void *) pthread_self(), ctime(&P)+4 );
- vfprintf(stderr, fmt, ap);
- fprintf(stderr, "\n");
-
- va_end(ap);
-}
-
/* Allocate/free the status structure for a monitoring thread. */
static struct thread_status *_alloc_thread_status(struct message_data *data,
struct dso_data *dso_data)
@@ -407,7 +402,9 @@ static int _fill_device_data(struct thread_status *ts)
if (!dmt)
return 0;
- dm_task_set_uuid(dmt, ts->device.uuid);
+ if (!dm_task_set_uuid(dmt, ts->device.uuid))
+ goto fail;
+
if (!dm_task_run(dmt))
goto fail;
@@ -450,7 +447,7 @@ static int _get_status(struct message_data *message_data)
{
struct dm_event_daemon_message *msg = message_data->msg;
struct thread_status *thread;
- int i = 0, j = 0;
+ int i, j;
int ret = -1;
int count = dm_list_size(&_thread_registry);
int size = 0, current = 0;
@@ -585,6 +582,7 @@ static void _unregister_for_timeout(struct thread_status *thread)
pthread_mutex_unlock(&_timeout_mutex);
}
+__attribute__((format(printf, 4, 5)))
static void _no_intr_log(int level, const char *file, int line,
const char *f, ...)
{
@@ -740,8 +738,10 @@ static void _monitor_unregister(void *arg)
return;
}
thread->status = DM_THREAD_DONE;
+ pthread_mutex_lock(&_timeout_mutex);
UNLINK_THREAD(thread);
LINK(thread, &_thread_registry_unused);
+ pthread_mutex_unlock(&_timeout_mutex);
_unlock_mutex();
}
@@ -752,7 +752,10 @@ static struct dm_task *_get_device_status(struct thread_status *ts)
if (!dmt)
return NULL;
- dm_task_set_uuid(dmt, ts->device.uuid);
+ if (!dm_task_set_uuid(dmt, ts->device.uuid)) {
+ dm_task_destroy(dmt);
+ return NULL;
+ }
if (!dm_task_run(dmt)) {
dm_task_destroy(dmt);
@@ -996,10 +999,8 @@ static int _register_for_event(struct message_data *message_data)
almost as good as dead already... */
if (thread_new->events & DM_EVENT_TIMEOUT) {
ret = -_register_for_timeout(thread_new);
- if (ret) {
- _unlock_mutex();
- goto out;
- }
+ if (ret)
+ goto outth;
}
if (!(thread = _lookup_thread_status(message_data))) {
@@ -1025,6 +1026,7 @@ static int _register_for_event(struct message_data *message_data)
/* Or event # into events bitfield. */
thread->events |= message_data->events.field;
+ outth:
_unlock_mutex();
out:
@@ -1076,8 +1078,10 @@ static int _unregister_for_event(struct message_data *message_data)
* unlink and terminate its monitoring thread.
*/
if (!thread->events) {
+ pthread_mutex_lock(&_timeout_mutex);
UNLINK_THREAD(thread);
LINK(thread, &_thread_registry_unused);
+ pthread_mutex_unlock(&_timeout_mutex);
}
_unlock_mutex();
@@ -1099,15 +1103,19 @@ static int _registered_device(struct message_data *message_data,
const char *id = message_data->id;
const char *dso = thread->dso_data->dso_name;
const char *dev = thread->device.uuid;
+ int r;
unsigned events = ((thread->status == DM_THREAD_RUNNING)
&& (thread->events)) ? thread->events : thread->
events | DM_EVENT_REGISTRATION_PENDING;
dm_free(msg->data);
- msg->size = dm_asprintf(&(msg->data), fmt, id, dso, dev, events);
+ if ((r = dm_asprintf(&(msg->data), fmt, id, dso, dev, events)) < 0) {
+ msg->size = 0;
+ return -ENOMEM;
+ }
- _unlock_mutex();
+ msg->size = (uint32_t) r;
return 0;
}
@@ -1140,6 +1148,7 @@ static int _want_registered_device(char *dso_name, char *device_uuid,
static int _get_registered_dev(struct message_data *message_data, int next)
{
struct thread_status *thread, *hit = NULL;
+ int ret = -ENOENT;
_lock_mutex();
@@ -1156,16 +1165,12 @@ static int _get_registered_dev(struct message_data *message_data, int next)
* If we got a registered device and want the next one ->
* fetch next conforming element off the list.
*/
- if (hit && !next) {
- _unlock_mutex();
- return _registered_device(message_data, hit);
- }
+ if (hit && !next)
+ goto reg;
if (!hit)
goto out;
- thread = hit;
-
while (1) {
if (dm_list_end(&_thread_registry, &thread->list))
goto out;
@@ -1177,13 +1182,13 @@ static int _get_registered_dev(struct message_data *message_data, int next)
}
}
- _unlock_mutex();
- return _registered_device(message_data, hit);
+ reg:
+ ret = _registered_device(message_data, hit);
out:
_unlock_mutex();
-
- return -ENOENT;
+
+ return ret;
}
static int _get_registered_device(struct message_data *message_data)
@@ -1241,68 +1246,66 @@ static void _init_fifos(struct dm_event_fifos *fifos)
/* Open fifos used for client communication. */
static int _open_fifos(struct dm_event_fifos *fifos)
{
- int orig_errno;
+ struct stat st;
/* Create client fifo. */
(void) dm_prepare_selinux_context(fifos->client_path, S_IFIFO);
if ((mkfifo(fifos->client_path, 0600) == -1) && errno != EEXIST) {
- syslog(LOG_ERR, "%s: Failed to create client fifo.\n", __func__);
- orig_errno = errno;
+ syslog(LOG_ERR, "%s: Failed to create client fifo %s: %m.\n",
+ __func__, fifos->client_path);
(void) dm_prepare_selinux_context(NULL, 0);
- stack;
- return -orig_errno;
+ return 0;
}
/* Create server fifo. */
(void) dm_prepare_selinux_context(fifos->server_path, S_IFIFO);
if ((mkfifo(fifos->server_path, 0600) == -1) && errno != EEXIST) {
- syslog(LOG_ERR, "%s: Failed to create server fifo.\n", __func__);
- orig_errno = errno;
+ syslog(LOG_ERR, "%s: Failed to create server fifo %s: %m.\n",
+ __func__, fifos->server_path);
(void) dm_prepare_selinux_context(NULL, 0);
- stack;
- return -orig_errno;
+ return 0;
}
(void) dm_prepare_selinux_context(NULL, 0);
- struct stat st;
-
/* Warn about wrong permissions if applicable */
if ((!stat(fifos->client_path, &st)) && (st.st_mode & 0777) != 0600)
- syslog(LOG_WARNING, "Fixing wrong permissions on %s",
+ syslog(LOG_WARNING, "Fixing wrong permissions on %s: %m.\n",
fifos->client_path);
if ((!stat(fifos->server_path, &st)) && (st.st_mode & 0777) != 0600)
- syslog(LOG_WARNING, "Fixing wrong permissions on %s",
+ syslog(LOG_WARNING, "Fixing wrong permissions on %s: %m.\n",
fifos->server_path);
/* If they were already there, make sure permissions are ok. */
if (chmod(fifos->client_path, 0600)) {
- syslog(LOG_ERR, "Unable to set correct file permissions on %s",
+ syslog(LOG_ERR, "Unable to set correct file permissions on %s: %m.\n",
fifos->client_path);
- return -errno;
+ return 0;
}
if (chmod(fifos->server_path, 0600)) {
- syslog(LOG_ERR, "Unable to set correct file permissions on %s",
+ syslog(LOG_ERR, "Unable to set correct file permissions on %s: %m.\n",
fifos->server_path);
- return -errno;
+ return 0;
}
/* Need to open read+write or we will block or fail */
if ((fifos->server = open(fifos->server_path, O_RDWR)) < 0) {
- stack;
- return -errno;
+ syslog(LOG_ERR, "Failed to open fifo server %s: %m.\n",
+ fifos->server_path);
+ return 0;
}
/* Need to open read+write for select() to work. */
if ((fifos->client = open(fifos->client_path, O_RDWR)) < 0) {
- stack;
- close(fifos->server);
- return -errno;
+ syslog(LOG_ERR, "Failed to open fifo client %s: %m", fifos->client_path);
+ if (close(fifos->server))
+ syslog(LOG_ERR, "Failed to close fifo server %s: %m", fifos->server_path);
+ return 0;
}
- return 0;
+ return 1;
}
/*
@@ -1405,7 +1408,7 @@ static int _client_write(struct dm_event_fifos *fifos,
static int _handle_request(struct dm_event_daemon_message *msg,
struct message_data *message_data)
{
- static struct {
+ static struct request {
unsigned int cmd;
int (*f)(struct message_data *);
} requests[] = {
@@ -1420,7 +1423,7 @@ static int _handle_request(struct dm_event_daemon_message *msg,
{ DM_EVENT_CMD_GET_STATUS, _get_status},
}, *req;
- for (req = requests; req < requests + sizeof(requests); req++)
+ for (req = requests; req < requests + sizeof(requests) / sizeof(struct request); req++)
if (req->cmd == msg->cmd)
return req->f(message_data);
@@ -1432,17 +1435,16 @@ static int _do_process_request(struct dm_event_daemon_message *msg)
{
int ret;
char *answer;
- static struct message_data message_data;
+ struct message_data message_data = { .msg = msg };
/* Parse the message. */
- memset(&message_data, 0, sizeof(message_data));
- message_data.msg = msg;
if (msg->cmd == DM_EVENT_CMD_HELLO || msg->cmd == DM_EVENT_CMD_DIE) {
ret = 0;
answer = msg->data;
if (answer) {
- msg->size = dm_asprintf(&(msg->data), "%s %s", answer,
- msg->cmd == DM_EVENT_CMD_DIE ? "DYING" : "HELLO");
+ msg->size = dm_asprintf(&(msg->data), "%s %s %d", answer,
+ msg->cmd == DM_EVENT_CMD_DIE ? "DYING" : "HELLO",
+ DM_EVENT_PROTOCOL_VERSION);
dm_free(answer);
} else {
msg->size = 0;
@@ -1467,9 +1469,7 @@ static int _do_process_request(struct dm_event_daemon_message *msg)
static void _process_request(struct dm_event_fifos *fifos)
{
int die = 0;
- struct dm_event_daemon_message msg;
-
- memset(&msg, 0, sizeof(msg));
+ struct dm_event_daemon_message msg = { 0 };
/*
* Read the request from the client (client_read, client_write
@@ -1488,9 +1488,9 @@ static void _process_request(struct dm_event_fifos *fifos)
if (!_client_write(fifos, &msg))
stack;
- if (die) raise(9);
-
dm_free(msg.data);
+
+ if (die) raise(9);
}
static void _process_initial_registrations(void)
@@ -1501,9 +1501,10 @@ static void _process_initial_registrations(void)
while ((reg = _initial_registrations[i])) {
msg.cmd = DM_EVENT_CMD_REGISTER_FOR_EVENT;
- msg.size = strlen(reg);
- msg.data = reg;
- _do_process_request(&msg);
+ if ((msg.size = strlen(reg))) {
+ msg.data = reg;
+ _do_process_request(&msg);
+ }
++ i;
}
}
@@ -1513,6 +1514,7 @@ static void _cleanup_unused_threads(void)
int ret;
struct dm_list *l;
struct thread_status *thread;
+ int join_ret = 0;
_lock_mutex();
while ((l = dm_list_first(&_thread_registry_unused))) {
@@ -1552,12 +1554,15 @@ static void _cleanup_unused_threads(void)
if (thread->status == DM_THREAD_DONE) {
dm_list_del(l);
- pthread_join(thread->thread, NULL);
+ join_ret = pthread_join(thread->thread, NULL);
_free_thread_status(thread);
}
}
_unlock_mutex();
+
+ if (join_ret)
+ syslog(LOG_ERR, "Failed pthread_join: %s\n", strerror(join_ret));
}
static void _sig_alarm(int signum __attribute__((unused)))
@@ -1569,10 +1574,8 @@ static void _sig_alarm(int signum __attribute__((unused)))
static void _init_thread_signals(void)
{
sigset_t my_sigset;
- struct sigaction act;
+ struct sigaction act = { .sa_handler = _sig_alarm };
- memset(&act, 0, sizeof(act));
- act.sa_handler = _sig_alarm;
sigaction(SIGALRM, &act, NULL);
sigfillset(&my_sigset);
@@ -1610,40 +1613,138 @@ static void _exit_handler(int sig __attribute__((unused)))
}
#ifdef linux
+static int _set_oom_adj(const char *oom_adj_path, int val)
+{
+ FILE *fp;
+
+ if (!(fp = fopen(oom_adj_path, "w"))) {
+ perror("oom_adj: fopen failed");
+ return 0;
+ }
+
+ fprintf(fp, "%i", val);
+
+ if (dm_fclose(fp))
+ perror("oom_adj: fclose failed");
+
+ return 1;
+}
+
/*
* Protection against OOM killer if kernel supports it
*/
-static int _set_oom_adj(int val)
+static int _protect_against_oom_killer(void)
{
- FILE *fp;
-
struct stat st;
if (stat(OOM_ADJ_FILE, &st) == -1) {
- if (errno == ENOENT)
- DEBUGLOG(OOM_ADJ_FILE " not found");
- else
+ if (errno != ENOENT)
perror(OOM_ADJ_FILE ": stat failed");
- return 1;
+
+ /* Try old oom_adj interface as a fallback */
+ if (stat(OOM_ADJ_FILE_OLD, &st) == -1) {
+ if (errno == ENOENT)
+ perror(OOM_ADJ_FILE_OLD " not found");
+ else
+ perror(OOM_ADJ_FILE_OLD ": stat failed");
+ return 1;
+ }
+
+ return _set_oom_adj(OOM_ADJ_FILE_OLD, OOM_DISABLE) ||
+ _set_oom_adj(OOM_ADJ_FILE_OLD, OOM_ADJUST_MIN);
}
- if (!(fp = fopen(OOM_ADJ_FILE, "w"))) {
- perror(OOM_ADJ_FILE ": fopen failed");
+ return _set_oom_adj(OOM_ADJ_FILE, OOM_SCORE_ADJ_MIN);
+}
+
+static int _handle_preloaded_fifo(int fd, const char *path)
+{
+ struct stat st_fd, st_path;
+ int flags;
+
+ if ((flags = fcntl(fd, F_GETFD)) < 0)
return 0;
- }
- fprintf(fp, "%i", val);
- if (dm_fclose(fp))
- perror(OOM_ADJ_FILE ": fclose failed");
+ if (flags & FD_CLOEXEC)
+ return 0;
+
+ if (fstat(fd, &st_fd) < 0 || !S_ISFIFO(st_fd.st_mode))
+ return 0;
+
+ if (stat(path, &st_path) < 0 ||
+ st_path.st_dev != st_fd.st_dev ||
+ st_path.st_ino != st_fd.st_ino)
+ return 0;
+
+ if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) < 0)
+ return 0;
return 1;
}
+
+static int _systemd_handover(struct dm_event_fifos *fifos)
+{
+ const char *e;
+ char *p;
+ unsigned long env_pid, env_listen_fds;
+ int r = 0;
+
+ memset(fifos, 0, sizeof(*fifos));
+
+ /* SD_ACTIVATION must be set! */
+ if (!(e = getenv(SD_ACTIVATION_ENV_VAR_NAME)) || strcmp(e, "1"))
+ goto out;
+
+ /* LISTEN_PID must be equal to our PID! */
+ if (!(e = getenv(SD_LISTEN_PID_ENV_VAR_NAME)))
+ goto out;
+
+ errno = 0;
+ env_pid = strtoul(e, &p, 10);
+ if (errno || !p || *p || env_pid <= 0 ||
+ getpid() != (pid_t) env_pid)
+ goto out;
+
+ /* LISTEN_FDS must be 2 and the fds must be FIFOSs! */
+ if (!(e = getenv(SD_LISTEN_FDS_ENV_VAR_NAME)))
+ goto out;
+
+ errno = 0;
+ env_listen_fds = strtoul(e, &p, 10);
+ if (errno || !p || *p || env_listen_fds != 2)
+ goto out;
+
+ /* Check and handle the FIFOs passed in */
+ r = (_handle_preloaded_fifo(SD_FD_FIFO_SERVER, DM_EVENT_FIFO_SERVER) &&
+ _handle_preloaded_fifo(SD_FD_FIFO_CLIENT, DM_EVENT_FIFO_CLIENT));
+
+ if (r) {
+ fifos->server = SD_FD_FIFO_SERVER;
+ fifos->server_path = DM_EVENT_FIFO_SERVER;
+ fifos->client = SD_FD_FIFO_CLIENT;
+ fifos->client_path = DM_EVENT_FIFO_CLIENT;
+ }
+
+out:
+ unsetenv(SD_ACTIVATION_ENV_VAR_NAME);
+ unsetenv(SD_LISTEN_PID_ENV_VAR_NAME);
+ unsetenv(SD_LISTEN_FDS_ENV_VAR_NAME);
+ return r;
+}
#endif
-static void remove_lockfile(void)
+static void _remove_files_on_exit(void)
{
if (unlink(DMEVENTD_PIDFILE))
perror(DMEVENTD_PIDFILE ": unlink failed");
+
+ if (!_systemd_activation) {
+ if (unlink(DM_EVENT_FIFO_CLIENT))
+ perror(DM_EVENT_FIFO_CLIENT " : unlink failed");
+
+ if (unlink(DM_EVENT_FIFO_SERVER))
+ perror(DM_EVENT_FIFO_SERVER " : unlink failed");
+ }
}
static void _daemonize(void)
@@ -1703,8 +1804,15 @@ static void _daemonize(void)
else
fd = rlim.rlim_cur;
- for (--fd; fd >= 0; fd--)
- close(fd);
+ for (--fd; fd >= 0; fd--) {
+#ifdef linux
+ /* Do not close fds preloaded by systemd! */
+ if (_systemd_activation &&
+ (fd == SD_FD_FIFO_SERVER || fd == SD_FD_FIFO_CLIENT))
+ continue;
+#endif
+ (void) close(fd);
+ }
if ((open("/dev/null", O_RDONLY) < 0) ||
(open("/dev/null", O_WRONLY) < 0) ||
@@ -1721,16 +1829,25 @@ static void restart(void)
int i, count = 0;
char *message;
int length;
+ int version;
/* Get the list of registrations from the running daemon. */
if (!init_fifos(&fifos)) {
- fprintf(stderr, "Could not initiate communication with existing dmeventd.\n");
+ fprintf(stderr, "WARNING: Could not initiate communication with existing dmeventd.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (!dm_event_get_version(&fifos, &version)) {
+ fprintf(stderr, "WARNING: Could not communicate with existing dmeventd.\n");
+ fini_fifos(&fifos);
exit(EXIT_FAILURE);
}
- if (daemon_talk(&fifos, &msg, DM_EVENT_CMD_HELLO, NULL, NULL, 0, 0)) {
- fprintf(stderr, "Could not communicate with existing dmeventd.\n");
+ if (version < 1) {
+ fprintf(stderr, "WARNING: The running dmeventd instance is too old.\n"
+ "Protocol version %d (required: 1). Action cancelled.\n",
+ version);
exit(EXIT_FAILURE);
}
@@ -1749,9 +1866,16 @@ static void restart(void)
}
}
- _initial_registrations = dm_malloc(sizeof(char*) * (count + 1));
+ if (!(_initial_registrations = dm_malloc(sizeof(char*) * (count + 1)))) {
+ fprintf(stderr, "Memory allocation registration failed.\n");
+ exit(EXIT_FAILURE);
+ }
+
for (i = 0; i < count; ++i) {
- _initial_registrations[i] = dm_strdup(message);
+ if (!(_initial_registrations[i] = dm_strdup(message))) {
+ fprintf(stderr, "Memory allocation for message failed.\n");
+ exit(EXIT_FAILURE);
+ }
message += strlen(message) + 1;
}
_initial_registrations[count] = 0;
@@ -1761,17 +1885,28 @@ static void restart(void)
exit(EXIT_FAILURE);
}
+ /*
+ * Wait for daemon to die, detected by sending further DIE messages
+ * until one fails.
+ */
+ for (i = 0; i < 10; ++i) {
+ if (daemon_talk(&fifos, &msg, DM_EVENT_CMD_DIE, "-", "-", 0, 0))
+ break; /* yep, it's dead probably */
+ usleep(10);
+ }
+
fini_fifos(&fifos);
}
static void usage(char *prog, FILE *file)
{
fprintf(file, "Usage:\n"
- "%s [-V] [-h] [-d] [-d] [-d] [-f]\n\n"
- " -V Show version of dmeventd\n"
- " -h Show this help information\n"
+ "%s [-d [-d [-d]]] [-f] [-h] [-R] [-V] [-?]\n\n"
" -d Log debug messages to syslog (-d, -dd, -ddd)\n"
- " -f Don't fork, run in the foreground\n\n", prog);
+ " -f Don't fork, run in the foreground\n"
+ " -h -? Show this help information\n"
+ " -R Restart dmeventd\n"
+ " -V Show version of dmeventd\n\n", prog);
}
int main(int argc, char *argv[])
@@ -1803,7 +1938,6 @@ int main(int argc, char *argv[])
case 'V':
printf("dmeventd version: %s\n", DM_LIB_VERSION);
exit(1);
- break;
}
}
@@ -1818,6 +1952,10 @@ int main(int argc, char *argv[])
if (_restart)
restart();
+#ifdef linux
+ _systemd_activation = _systemd_handover(&fifos);
+#endif
+
if (!_foreground)
_daemonize();
@@ -1827,17 +1965,19 @@ int main(int argc, char *argv[])
if (dm_create_lockfile(DMEVENTD_PIDFILE) == 0)
exit(EXIT_FAILURE);
- atexit(remove_lockfile);
+ atexit(_remove_files_on_exit);
(void) dm_prepare_selinux_context(NULL, 0);
/* Set the rest of the signals to cause '_exit_now' to be set */
+ signal(SIGTERM, &_exit_handler);
signal(SIGINT, &_exit_handler);
signal(SIGHUP, &_exit_handler);
signal(SIGQUIT, &_exit_handler);
#ifdef linux
- if (!_set_oom_adj(OOM_DISABLE) && !_set_oom_adj(OOM_ADJUST_MIN))
- syslog(LOG_ERR, "Failed to set oom_adj to protect against OOM killer");
+ /* Systemd has adjusted oom killer for us already */
+ if (!_systemd_activation && !_protect_against_oom_killer())
+ syslog(LOG_ERR, "Failed to protect against OOM killer");
#endif
_init_thread_signals();
@@ -1847,11 +1987,12 @@ int main(int argc, char *argv[])
//multilog_init_verbose(std_syslog, _LOG_DEBUG);
//multilog_async(1);
- _init_fifos(&fifos);
+ if (!_systemd_activation)
+ _init_fifos(&fifos);
pthread_mutex_init(&_global_mutex, NULL);
- if (_open_fifos(&fifos))
+ if (!_systemd_activation && !_open_fifos(&fifos))
exit(EXIT_FIFO_FAILURE);
/* Signal parent, letting them know we are ready to go. */
@@ -1865,11 +2006,13 @@ int main(int argc, char *argv[])
while (!_exit_now) {
_process_request(&fifos);
_cleanup_unused_threads();
+ _lock_mutex();
if (!dm_list_empty(&_thread_registry)
|| !dm_list_empty(&_thread_registry_unused))
_thread_registries_empty = 0;
else
_thread_registries_empty = 1;
+ _unlock_mutex();
}
_exit_dm_lib();
diff --git a/daemons/dmeventd/dmeventd.h b/daemons/dmeventd/dmeventd.h
index 254758e..e21cf45 100644
--- a/daemons/dmeventd/dmeventd.h
+++ b/daemons/dmeventd/dmeventd.h
@@ -17,11 +17,8 @@
/* FIXME This stuff must be configurable. */
-#define DM_EVENT_DAEMON "/sbin/dmeventd"
-#define DM_EVENT_LOCKFILE "/var/lock/dmeventd"
-#define DM_EVENT_FIFO_CLIENT "/var/run/dmeventd-client"
-#define DM_EVENT_FIFO_SERVER "/var/run/dmeventd-server"
-#define DM_EVENT_PIDFILE "/var/run/dmeventd.pid"
+#define DM_EVENT_FIFO_CLIENT DEFAULT_DM_RUN_DIR "/dmeventd-client"
+#define DM_EVENT_FIFO_SERVER DEFAULT_DM_RUN_DIR "/dmeventd-server"
#define DM_EVENT_DEFAULT_TIMEOUT 10
@@ -66,11 +63,13 @@ struct dm_event_fifos {
#define EXIT_CHDIR_FAILURE 7
/* Implemented in libdevmapper-event.c, but not part of public API. */
+// FIXME misuse of bitmask as enum
int daemon_talk(struct dm_event_fifos *fifos,
struct dm_event_daemon_message *msg, int cmd,
const char *dso_name, const char *dev_name,
enum dm_event_mask evmask, uint32_t timeout);
int init_fifos(struct dm_event_fifos *fifos);
void fini_fifos(struct dm_event_fifos *fifos);
+int dm_event_get_version(struct dm_event_fifos *fifos, int *version);
#endif /* __DMEVENTD_DOT_H__ */
diff --git a/daemons/dmeventd/libdevmapper-event.c b/daemons/dmeventd/libdevmapper-event.c
index bc8ad99..1f8fbef 100644
--- a/daemons/dmeventd/libdevmapper-event.c
+++ b/daemons/dmeventd/libdevmapper-event.c
@@ -59,14 +59,10 @@ struct dm_event_handler *dm_event_handler_create(void)
{
struct dm_event_handler *dmevh = NULL;
- if (!(dmevh = dm_malloc(sizeof(*dmevh))))
+ if (!(dmevh = dm_zalloc(sizeof(*dmevh)))) {
+ log_error("Failed to allocate event handler.");
return NULL;
-
- dmevh->dmeventd_path = NULL;
- dmevh->dso = dmevh->dev_name = dmevh->uuid = NULL;
- dmevh->major = dmevh->minor = 0;
- dmevh->mask = 0;
- dmevh->timeout = 0;
+ }
return dmevh;
}
@@ -245,6 +241,10 @@ static int _daemon_read(struct dm_event_fifos *fifos,
log_error("Unable to read from event server");
return 0;
}
+ if ((ret == 0) && (i > 4) && !bytes) {
+ log_error("No input from event server.");
+ return 0;
+ }
}
if (ret < 1) {
log_error("Unable to read from event server.");
@@ -309,7 +309,7 @@ static int _daemon_write(struct dm_event_fifos *fifos,
}
if (ret == 0)
break;
- read(fifos->server, drainbuf, 127);
+ ret = read(fifos->server, drainbuf, 127);
}
while (bytes < size) {
@@ -424,30 +424,27 @@ static int _start_daemon(char *dmeventd_path, struct dm_event_fifos *fifos)
fifos->client = open(fifos->client_path, O_WRONLY | O_NONBLOCK);
if (fifos->client >= 0) {
/* server is running and listening */
-
- close(fifos->client);
+ if (close(fifos->client))
+ log_sys_error("close", fifos->client_path);
return 1;
} else if (errno != ENXIO) {
/* problem */
-
- log_error("%s: Can't open client fifo %s: %s",
- __func__, fifos->client_path, strerror(errno));
- stack;
+ log_sys_error("open", fifos->client_path);
return 0;
}
start_server:
/* server is not running */
- if (!strncmp(DMEVENTD_PATH, "/", 1) && stat(DMEVENTD_PATH, &statbuf)) {
- log_error("Unable to find dmeventd.");
- return_0;
+ if ((args[0][0] == '/') && stat(args[0], &statbuf)) {
+ log_sys_error("stat", args[0]);
+ return 0;
}
pid = fork();
if (pid < 0)
- log_error("Unable to fork.");
+ log_sys_error("fork", "");
else if (!pid) {
execvp(args[0], args);
@@ -477,25 +474,23 @@ int init_fifos(struct dm_event_fifos *fifos)
/* Open the fifo used to read from the daemon. */
if ((fifos->server = open(fifos->server_path, O_RDWR)) < 0) {
- log_error("%s: open server fifo %s",
- __func__, fifos->server_path);
- stack;
+ log_sys_error("open", fifos->server_path);
return 0;
}
/* Lock out anyone else trying to do communication with the daemon. */
if (flock(fifos->server, LOCK_EX) < 0) {
- log_error("%s: flock %s", __func__, fifos->server_path);
- close(fifos->server);
+ log_sys_error("flock", fifos->server_path);
+ if (close(fifos->server))
+ log_sys_error("close", fifos->server_path);
return 0;
}
/* if ((fifos->client = open(fifos->client_path, O_WRONLY | O_NONBLOCK)) < 0) {*/
if ((fifos->client = open(fifos->client_path, O_RDWR | O_NONBLOCK)) < 0) {
- log_error("%s: Can't open client fifo %s: %s",
- __func__, fifos->client_path, strerror(errno));
- close(fifos->server);
- stack;
+ log_sys_error("open", fifos->client_path);
+ if (close(fifos->server))
+ log_sys_error("close", fifos->server_path);
return 0;
}
@@ -523,8 +518,10 @@ void fini_fifos(struct dm_event_fifos *fifos)
if (flock(fifos->server, LOCK_UN))
log_error("flock unlock %s", fifos->server_path);
- close(fifos->client);
- close(fifos->server);
+ if (close(fifos->client))
+ log_sys_error("close", fifos->client_path);
+ if (close(fifos->server))
+ log_sys_error("close", fifos->server_path);
}
/* Get uuid of a device */
@@ -538,34 +535,46 @@ static struct dm_task *_get_device_info(const struct dm_event_handler *dmevh)
return NULL;
}
- if (dmevh->uuid)
- dm_task_set_uuid(dmt, dmevh->uuid);
- else if (dmevh->dev_name)
- dm_task_set_name(dmt, dmevh->dev_name);
- else if (dmevh->major && dmevh->minor) {
- dm_task_set_major(dmt, dmevh->major);
- dm_task_set_minor(dmt, dmevh->minor);
- }
+ if (dmevh->uuid) {
+ if (!dm_task_set_uuid(dmt, dmevh->uuid))
+ goto_bad;
+ } else if (dmevh->dev_name) {
+ if (!dm_task_set_name(dmt, dmevh->dev_name))
+ goto_bad;
+ } else if (dmevh->major && dmevh->minor) {
+ if (!dm_task_set_major(dmt, dmevh->major) ||
+ !dm_task_set_minor(dmt, dmevh->minor))
+ goto_bad;
+ }
/* FIXME Add name or uuid or devno to messages */
if (!dm_task_run(dmt)) {
log_error("_get_device_info: dm_task_run() failed");
- goto failed;
+ goto bad;
}
if (!dm_task_get_info(dmt, &info)) {
log_error("_get_device_info: failed to get info for device");
- goto failed;
+ goto bad;
}
if (!info.exists) {
- log_error("_get_device_info: device not found");
- goto failed;
+ log_error("_get_device_info: %s%s%s%.0d%s%.0d%s%s: device not found",
+ dmevh->uuid ? : "",
+ (!dmevh->uuid && dmevh->dev_name) ? dmevh->dev_name : "",
+ (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? "(" : "",
+ (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? dmevh->major : 0,
+ (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? ":" : "",
+ (!dmevh->uuid && !dmevh->dev_name && dmevh->minor > 0) ? dmevh->minor : 0,
+ (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) && dmevh->minor == 0 ? "0" : "",
+ (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? ") " : "");
+ goto bad;
}
+
return dmt;
-failed:
+ bad:
dm_task_destroy(dmt);
return NULL;
}
@@ -715,17 +724,18 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next)
uuid = dm_task_get_uuid(dmt);
- if (!(ret = _do_event(next ? DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE :
- DM_EVENT_CMD_GET_REGISTERED_DEVICE, dmevh->dmeventd_path,
- &msg, dmevh->dso, uuid, dmevh->mask, 0))) {
- /* FIXME this will probably horribly break if we get
- ill-formatted reply */
- ret = _parse_message(&msg, &reply_dso, &reply_uuid, &reply_mask);
- } else {
+ if (_do_event(next ? DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE :
+ DM_EVENT_CMD_GET_REGISTERED_DEVICE, dmevh->dmeventd_path,
+ &msg, dmevh->dso, uuid, dmevh->mask, 0)) {
+ log_debug("%s: device not registered.", dm_task_get_name(dmt));
ret = -ENOENT;
goto fail;
}
+ /* FIXME this will probably horribly break if we get
+ ill-formatted reply */
+ ret = _parse_message(&msg, &reply_dso, &reply_uuid, &reply_mask);
+
dm_task_destroy(dmt);
dmt = NULL;
@@ -733,6 +743,10 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next)
msg.data = NULL;
_dm_event_handler_clear_dev_info(dmevh);
+ if (!reply_uuid) {
+ ret = -ENXIO; /* dmeventd probably gave us bogus uuid back */
+ goto fail;
+ }
dmevh->uuid = dm_strdup(reply_uuid);
if (!dmevh->uuid) {
ret = -ENOMEM;
@@ -781,6 +795,36 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next)
return ret;
}
+/*
+ * You can (and have to) call this at the stage of the protocol where
+ * daemon_talk(fifos, &msg, DM_EVENT_CMD_HELLO, NULL, NULL, 0, 0)
+ *
+ * would be normally sent. This call will parse the version reply from
+ * dmeventd, in addition to above call. It is not safe to call this at any
+ * other place in the protocol.
+ *
+ * This is an internal function, not exposed in the public API.
+ */
+
+int dm_event_get_version(struct dm_event_fifos *fifos, int *version) {
+ char *p;
+ struct dm_event_daemon_message msg = { 0, 0, NULL };
+
+ if (daemon_talk(fifos, &msg, DM_EVENT_CMD_HELLO, NULL, NULL, 0, 0))
+ return 0;
+ p = msg.data;
+ *version = 0;
+
+ p = strchr(p, ' '); /* Message ID */
+ if (!p) return 0;
+ p = strchr(p + 1, ' '); /* HELLO */
+ if (!p) return 0;
+ p = strchr(p + 1, ' '); /* HELLO, once more */
+ if (p)
+ *version = atoi(p);
+ return 1;
+}
+
#if 0 /* left out for now */
static char *_skip_string(char *src, const int delimiter)
diff --git a/daemons/dmeventd/libdevmapper-event.h b/daemons/dmeventd/libdevmapper-event.h
index 0de20c1..7ce3f39 100644
--- a/daemons/dmeventd/libdevmapper-event.h
+++ b/daemons/dmeventd/libdevmapper-event.h
@@ -46,6 +46,7 @@ enum dm_event_mask {
};
#define DM_EVENT_ALL_ERRORS DM_EVENT_ERROR_MASK
+#define DM_EVENT_PROTOCOL_VERSION 1
struct dm_event_handler;
@@ -81,6 +82,7 @@ void dm_event_handler_set_timeout(struct dm_event_handler *dmevh, int timeout);
/*
* Specify mask for events to monitor.
*/
+// FIXME misuse of bitmask as enum
void dm_event_handler_set_event_mask(struct dm_event_handler *dmevh,
enum dm_event_mask evmask);
@@ -90,6 +92,7 @@ const char *dm_event_handler_get_uuid(const struct dm_event_handler *dmevh);
int dm_event_handler_get_major(const struct dm_event_handler *dmevh);
int dm_event_handler_get_minor(const struct dm_event_handler *dmevh);
int dm_event_handler_get_timeout(const struct dm_event_handler *dmevh);
+// FIXME misuse of bitmask as enum
enum dm_event_mask dm_event_handler_get_event_mask(const struct dm_event_handler *dmevh);
/* FIXME Review interface (what about this next thing?) */
@@ -103,6 +106,7 @@ int dm_event_unregister_handler(const struct dm_event_handler *dmevh);
/* Prototypes for DSO interface, see dmeventd.c, struct dso_data for
detailed descriptions. */
+// FIXME misuse of bitmask as enum
void process_event(struct dm_task *dmt, enum dm_event_mask evmask, void **user);
int register_device(const char *device_name, const char *uuid, int major, int minor, void **user);
int unregister_device(const char *device_name, const char *uuid, int major,
diff --git a/daemons/dmeventd/plugins/Makefile.in b/daemons/dmeventd/plugins/Makefile.in
index 45176ad..b26e6d8 100644
--- a/daemons/dmeventd/plugins/Makefile.in
+++ b/daemons/dmeventd/plugins/Makefile.in
@@ -1,6 +1,6 @@
#
# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
-# Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2005, 2011 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -16,10 +16,31 @@ srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = @top_builddir@
-SUBDIRS += lvm2 mirror snapshot
+SUBDIRS += lvm2
+
+ifneq ("@MIRRORS@", "none")
+ SUBDIRS += mirror
+endif
+
+ifneq ("@SNAPSHOTS@", "none")
+ SUBDIRS += snapshot
+endif
+
+ifneq ("@RAID@", "none")
+ SUBDIRS += raid
+endif
+
+ifneq ("@THIN@", "none")
+ SUBDIRS += thin
+endif
+
+ifeq ($(MAKECMDGOALS),distclean)
+ SUBDIRS = lvm2 mirror snapshot raid thin
+endif
include $(top_builddir)/make.tmpl
-mirror: lvm2
snapshot: lvm2
-
+mirror: lvm2
+raid: lvm2
+thin: lvm2
diff --git a/daemons/dmeventd/plugins/lvm2/.exported_symbols b/daemons/dmeventd/plugins/lvm2/.exported_symbols
index ebe3d05..646e4cf 100644
--- a/daemons/dmeventd/plugins/lvm2/.exported_symbols
+++ b/daemons/dmeventd/plugins/lvm2/.exported_symbols
@@ -4,3 +4,4 @@ dmeventd_lvm2_lock
dmeventd_lvm2_unlock
dmeventd_lvm2_pool
dmeventd_lvm2_run
+dmeventd_lvm2_command
diff --git a/daemons/dmeventd/plugins/lvm2/Makefile.in b/daemons/dmeventd/plugins/lvm2/Makefile.in
index 46247aa..fcb2a0a 100644
--- a/daemons/dmeventd/plugins/lvm2/Makefile.in
+++ b/daemons/dmeventd/plugins/lvm2/Makefile.in
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2010 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -24,10 +24,8 @@ LIB_VERSION = $(LIB_VERSION_LVM)
include $(top_builddir)/make.tmpl
-LIBS += @LVM2CMD_LIB@ -ldevmapper $(PTHREAD_LIBS)
+LIBS += @LVM2CMD_LIB@ -ldevmapper $(PTHREAD_LIBS) $(DAEMON_LIBS)
install_lvm2: install_lib_shared
install: install_lvm2
-
-DISTCLEAN_TARGETS += .exported_symbols_generated
diff --git a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c
index 937d81d..5d5a46b 100644
--- a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c
+++ b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c
@@ -16,8 +16,6 @@
#include "log.h"
#include "lvm2cmd.h"
-#include "errors.h"
-#include "libdevmapper-event.h"
#include "dmeventd_lvm.h"
#include <pthread.h>
@@ -82,10 +80,7 @@ static void _temporary_log_fn(int level,
void dmeventd_lvm2_lock(void)
{
- if (pthread_mutex_trylock(&_event_mutex)) {
- syslog(LOG_NOTICE, "Another thread is handling an event. Waiting...");
- pthread_mutex_lock(&_event_mutex);
- }
+ pthread_mutex_lock(&_event_mutex);
}
void dmeventd_lvm2_unlock(void)
@@ -113,6 +108,7 @@ int dmeventd_lvm2_init(void)
_mem_pool = NULL;
goto out;
}
+ lvm2_disable_dmeventd_monitoring(_lvm_handle);
/* FIXME Temporary: move to dmeventd core */
lvm2_run(_lvm_handle, "_memlock_inc");
}
@@ -150,3 +146,31 @@ int dmeventd_lvm2_run(const char *cmdline)
return lvm2_run(_lvm_handle, cmdline);
}
+int dmeventd_lvm2_command(struct dm_pool *mem, char *buffer, size_t size,
+ const char *cmd, const char *device)
+{
+ char *vg = NULL, *lv = NULL, *layer;
+ int r;
+
+ if (!dm_split_lvm_name(mem, device, &vg, &lv, &layer)) {
+ syslog(LOG_ERR, "Unable to determine VG name from %s.\n",
+ device);
+ return 0;
+ }
+
+ /* strip off the mirror component designations */
+ layer = strstr(lv, "_mlog");
+ if (layer)
+ *layer = '\0';
+
+ r = dm_snprintf(buffer, size, "%s %s/%s", cmd, vg, lv);
+
+ dm_pool_free(mem, vg);
+
+ if (r < 0) {
+ syslog(LOG_ERR, "Unable to form LVM command. (too long).\n");
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h
index 8efcb9b..1960c71 100644
--- a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h
+++ b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h
@@ -36,4 +36,7 @@ void dmeventd_lvm2_unlock(void);
struct dm_pool *dmeventd_lvm2_pool(void);
+int dmeventd_lvm2_command(struct dm_pool *mem, char *buffer, size_t size,
+ const char *cmd, const char *device);
+
#endif /* _DMEVENTD_LVMWRAP_H */
diff --git a/daemons/dmeventd/plugins/mirror/Makefile.in b/daemons/dmeventd/plugins/mirror/Makefile.in
index 7f80629..85b33c9 100644
--- a/daemons/dmeventd/plugins/mirror/Makefile.in
+++ b/daemons/dmeventd/plugins/mirror/Makefile.in
@@ -1,6 +1,6 @@
#
# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
-# Copyright (C) 2004-2005, 2008-2010 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2005, 2008-2011 Red Hat, Inc. All rights reserved.
#
# This file is part of LVM2.
#
@@ -30,10 +30,8 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow
include $(top_builddir)/make.tmpl
-LIBS += -ldevmapper-event-lvm2 -ldevmapper
+LIBS += -ldevmapper-event-lvm2 -ldevmapper $(DAEMON_LIBS)
install_lvm2: install_dm_plugin
install: install_lvm2
-
-DISTCLEAN_TARGETS += .exported_symbols_generated
diff --git a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
index 3e97d87..e59feb4 100644
--- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
+++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2005-2012 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -18,6 +18,7 @@
#include "errors.h"
#include "libdevmapper-event.h"
#include "dmeventd_lvm.h"
+#include "defaults.h"
#include <syslog.h> /* FIXME Replace syslog with multilog */
/* FIXME Missing openlog? */
@@ -81,7 +82,8 @@ static int _get_mirror_event(char *params)
if (!dm_split_words(params, 1, 0, &p))
goto out_parse;
- if (!(num_devs = atoi(p)))
+ if (!(num_devs = atoi(p)) ||
+ (num_devs > DEFAULT_MIRROR_MAX_IMAGES) || (num_devs < 0))
goto out_parse;
p += strlen(p) + 1;
@@ -90,6 +92,7 @@ static int _get_mirror_event(char *params)
if (!args || dm_split_words(p, num_devs + 7, 0, args) < num_devs + 5)
goto out_parse;
+ /* FIXME: Code differs from lib/mirror/mirrored.c */
dev_status_str = args[2 + num_devs];
log_argc = atoi(args[3 + num_devs]);
log_status_str = args[3 + num_devs + log_argc];
@@ -121,7 +124,7 @@ static int _get_mirror_event(char *params)
out:
dm_free(args);
return r;
-
+
out_parse:
dm_free(args);
syslog(LOG_ERR, "Unable to parse mirror status string.");
@@ -133,32 +136,15 @@ static int _remove_failed_devices(const char *device)
int r;
#define CMD_SIZE 256 /* FIXME Use system restriction */
char cmd_str[CMD_SIZE];
- char *vg = NULL, *lv = NULL, *layer = NULL;
-
- if (strlen(device) > 200) /* FIXME Use real restriction */
- return -ENAMETOOLONG; /* FIXME These return code distinctions are not used so remove them! */
-
- if (!dm_split_lvm_name(dmeventd_lvm2_pool(), device, &vg, &lv, &layer)) {
- syslog(LOG_ERR, "Unable to determine VG name from %s.",
- device);
- return -ENOMEM; /* FIXME Replace with generic error return - reason for failure has already got logged */
- }
-
- /* strip off the mirror component designations */
- layer = strstr(lv, "_mlog");
- if (layer)
- *layer = '\0';
- /* FIXME Is any sanity-checking required on %s? */
- if (CMD_SIZE <= snprintf(cmd_str, CMD_SIZE, "lvconvert --config devices{ignore_suspended_devices=1} --repair --use-policies %s/%s", vg, lv)) {
- /* this error should be caught above, but doesn't hurt to check again */
- syslog(LOG_ERR, "Unable to form LVM command: Device name too long.");
+ if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), cmd_str, sizeof(cmd_str),
+ "lvconvert --config devices{ignore_suspended_devices=1} "
+ "--repair --use-policies", device))
return -ENAMETOOLONG; /* FIXME Replace with generic error return - reason for failure has already got logged */
- }
r = dmeventd_lvm2_run(cmd_str);
- syslog(LOG_INFO, "Repair of mirrored LV %s/%s %s.", vg, lv,
+ syslog(LOG_INFO, "Repair of mirrored device %s %s.", device,
(r == ECMD_PROCESSED) ? "finished successfully" : "failed");
return (r == ECMD_PROCESSED) ? 0 : -1;
@@ -227,9 +213,12 @@ int register_device(const char *device,
int minor __attribute__((unused)),
void **unused __attribute__((unused)))
{
- int r = dmeventd_lvm2_init();
+ if (!dmeventd_lvm2_init())
+ return 0;
+
syslog(LOG_INFO, "Monitoring mirror device %s for events.", device);
- return r;
+
+ return 1;
}
int unregister_device(const char *device,
@@ -241,5 +230,6 @@ int unregister_device(const char *device,
syslog(LOG_INFO, "No longer monitoring mirror device %s for events.",
device);
dmeventd_lvm2_exit();
+
return 1;
}
diff --git a/daemons/dmeventd/plugins/raid/.exported_symbols b/daemons/dmeventd/plugins/raid/.exported_symbols
new file mode 100644
index 0000000..b88c705
--- /dev/null
+++ b/daemons/dmeventd/plugins/raid/.exported_symbols
@@ -0,0 +1,3 @@
+process_event
+register_device
+unregister_device
diff --git a/daemons/dmeventd/plugins/raid/Makefile.in b/daemons/dmeventd/plugins/raid/Makefile.in
new file mode 100644
index 0000000..a6b7788
--- /dev/null
+++ b/daemons/dmeventd/plugins/raid/Makefile.in
@@ -0,0 +1,36 @@
+#
+# Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+top_builddir = @top_builddir@
+
+INCLUDES += -I$(top_srcdir)/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2
+CLDFLAGS += -L$(top_builddir)/tools -L$(top_builddir)/daemons/dmeventd/plugins/lvm2
+
+SOURCES = dmeventd_raid.c
+
+LIB_NAME = libdevmapper-event-lvm2raid
+LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX)
+LIB_VERSION = $(LIB_VERSION_LVM)
+
+CFLOW_LIST = $(SOURCES)
+CFLOW_LIST_TARGET = $(LIB_NAME).cflow
+
+include $(top_builddir)/make.tmpl
+
+LIBS += -ldevmapper-event-lvm2 -ldevmapper
+
+install_lvm2: install_dm_plugin
+
+install: install_lvm2
diff --git a/daemons/dmeventd/plugins/raid/dmeventd_raid.c b/daemons/dmeventd/plugins/raid/dmeventd_raid.c
new file mode 100644
index 0000000..a3ecdc1
--- /dev/null
+++ b/daemons/dmeventd/plugins/raid/dmeventd_raid.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "lib.h"
+
+#include "lvm2cmd.h"
+#include "errors.h"
+#include "libdevmapper-event.h"
+#include "dmeventd_lvm.h"
+
+#include <syslog.h> /* FIXME Replace syslog with multilog */
+/* FIXME Missing openlog? */
+/* FIXME Replace most syslogs with log_error() style messages and add complete context. */
+/* FIXME Reformat to 80 char lines. */
+
+/*
+ * run_repair is a close copy to
+ * plugins/mirror/dmeventd_mirror.c:_remove_failed_devices()
+ */
+static int run_repair(const char *device)
+{
+ int r;
+#define CMD_SIZE 256 /* FIXME Use system restriction */
+ char cmd_str[CMD_SIZE];
+
+ if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), cmd_str, sizeof(cmd_str),
+ "lvconvert --config devices{ignore_suspended_devices=1} "
+ "--repair --use-policies", device))
+ return -1;
+
+ r = dmeventd_lvm2_run(cmd_str);
+
+ if (r != ECMD_PROCESSED)
+ syslog(LOG_INFO, "Repair of RAID device %s failed.", device);
+
+ return (r == ECMD_PROCESSED) ? 0 : -1;
+}
+
+static int _process_raid_event(char *params, const char *device)
+{
+ int i, n, failure = 0;
+ char *p, *a[4];
+ char *raid_type;
+ char *num_devices;
+ char *health_chars;
+ char *resync_ratio;
+
+ /*
+ * RAID parms: <raid_type> <#raid_disks> \
+ * <health chars> <resync ratio>
+ */
+ if (!dm_split_words(params, 4, 0, a)) {
+ syslog(LOG_ERR, "Failed to process status line for %s\n",
+ device);
+ return -EINVAL;
+ }
+ raid_type = a[0];
+ num_devices = a[1];
+ health_chars = a[2];
+ resync_ratio = a[3];
+
+ if (!(n = atoi(num_devices))) {
+ syslog(LOG_ERR, "Failed to parse number of devices for %s: %s",
+ device, num_devices);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < n; i++) {
+ switch (health_chars[i]) {
+ case 'A':
+ /* Device is 'A'live and well */
+ case 'a':
+ /* Device is 'a'live, but not yet in-sync */
+ break;
+ case 'D':
+ syslog(LOG_ERR,
+ "Device #%d of %s array, %s, has failed.",
+ i, raid_type, device);
+ failure++;
+ break;
+ default:
+ /* Unhandled character returned from kernel */
+ break;
+ }
+ if (failure)
+ return run_repair(device);
+ }
+
+ p = strstr(resync_ratio, "/");
+ if (!p) {
+ syslog(LOG_ERR, "Failed to parse resync_ratio for %s: %s",
+ device, resync_ratio);
+ return -EINVAL;
+ }
+ p[0] = '\0';
+ syslog(LOG_INFO, "%s array, %s, is %s in-sync.",
+ raid_type, device, strcmp(resync_ratio, p+1) ? "not" : "now");
+
+ return 0;
+}
+
+void process_event(struct dm_task *dmt,
+ enum dm_event_mask event __attribute__((unused)),
+ void **unused __attribute__((unused)))
+{
+ void *next = NULL;
+ uint64_t start, length;
+ char *target_type = NULL;
+ char *params;
+ const char *device = dm_task_get_name(dmt);
+
+ dmeventd_lvm2_lock();
+
+ do {
+ next = dm_get_next_target(dmt, next, &start, &length,
+ &target_type, &params);
+
+ if (!target_type) {
+ syslog(LOG_INFO, "%s mapping lost.", device);
+ continue;
+ }
+
+ if (strcmp(target_type, "raid")) {
+ syslog(LOG_INFO, "%s has non-raid portion.", device);
+ continue;
+ }
+
+ if (_process_raid_event(params, device))
+ syslog(LOG_ERR, "Failed to process event for %s",
+ device);
+ } while (next);
+
+ dmeventd_lvm2_unlock();
+}
+
+int register_device(const char *device,
+ const char *uuid __attribute__((unused)),
+ int major __attribute__((unused)),
+ int minor __attribute__((unused)),
+ void **unused __attribute__((unused)))
+{
+ if (!dmeventd_lvm2_init())
+ return 0;
+
+ syslog(LOG_INFO, "Monitoring RAID device %s for events.", device);
+
+ return 1;
+}
+
+int unregister_device(const char *device,
+ const char *uuid __attribute__((unused)),
+ int major __attribute__((unused)),
+ int minor __attribute__((unused)),
+ void **unused __attribute__((unused)))
+{
+ syslog(LOG_INFO, "No longer monitoring RAID device %s for events.",
+ device);
+ dmeventd_lvm2_exit();
+
+ return 1;
+}
diff --git a/daemons/dmeventd/plugins/snapshot/Makefile.in b/daemons/dmeventd/plugins/snapshot/Makefile.in
index b8414b2..a4cff15 100644
--- a/daemons/dmeventd/plugins/snapshot/Makefile.in
+++ b/daemons/dmeventd/plugins/snapshot/Makefile.in
@@ -1,6 +1,6 @@
#
# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
-# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
#
# This file is part of the LVM2.
#
@@ -26,10 +26,8 @@ LIB_VERSION = $(LIB_VERSION_LVM)
include $(top_builddir)/make.tmpl
-LIBS += -ldevmapper-event-lvm2 -ldevmapper
+LIBS += -ldevmapper-event-lvm2 -ldevmapper $(DAEMON_LIBS)
install_lvm2: install_dm_plugin
install: install_lvm2
-
-DISTCLEAN_TARGETS += .exported_symbols_generated
diff --git a/daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c b/daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c
index 6fc9f56..205218a 100644
--- a/daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c
+++ b/daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007-2010 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2007-2011 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -19,8 +19,6 @@
#include "libdevmapper-event.h"
#include "dmeventd_lvm.h"
-#include "lvm-string.h"
-
#include <sys/wait.h>
#include <syslog.h> /* FIXME Replace syslog with multilog */
/* FIXME Missing openlog? */
@@ -40,6 +38,12 @@ struct snap_status {
int max;
};
+struct dso_state {
+ int percent_check;
+ int known_size;
+ char cmd_str[1024];
+};
+
/* FIXME possibly reconcile this with target_percent when we gain
access to regular LVM library here. */
static void _parse_snapshot_params(char *params, struct snap_status *status)
@@ -115,26 +119,9 @@ static int _run(const char *cmd, ...)
return 1; /* all good */
}
-static int _extend(const char *device)
+static int _extend(const char *cmd)
{
- char *vg = NULL, *lv = NULL, *layer = NULL;
- char cmd_str[1024];
- int r = 0;
-
- if (!dm_split_lvm_name(dmeventd_lvm2_pool(), device, &vg, &lv, &layer)) {
- syslog(LOG_ERR, "Unable to determine VG name from %s.", device);
- return 0;
- }
- if (sizeof(cmd_str) <= snprintf(cmd_str, sizeof(cmd_str),
- "lvextend --use-policies %s/%s", vg, lv)) {
- syslog(LOG_ERR, "Unable to form LVM command: Device name too long.");
- return 0;
- }
-
- r = dmeventd_lvm2_run(cmd_str);
- syslog(LOG_INFO, "Extension of snapshot %s/%s %s.", vg, lv,
- (r == ECMD_PROCESSED) ? "finished successfully" : "failed");
- return r == ECMD_PROCESSED;
+ return dmeventd_lvm2_run(cmd) == ECMD_PROCESSED;
}
static void _umount(const char *device, int major, int minor)
@@ -155,7 +142,8 @@ static void _umount(const char *device, int major, int minor)
break; /* eof, likely */
/* words[0] is the mount point and words[1] is the device path */
- dm_split_words(buffer, 3, 0, words);
+ if (dm_split_words(buffer, 3, 0, words) < 2)
+ continue;
/* find the major/minor of the device */
if (stat(words[0], &st))
@@ -164,9 +152,9 @@ static void _umount(const char *device, int major, int minor)
if (S_ISBLK(st.st_mode) &&
major(st.st_rdev) == major &&
minor(st.st_rdev) == minor) {
- syslog(LOG_ERR, "Unmounting invalid snapshot %s from %s.", device, words[1]);
+ syslog(LOG_ERR, "Unmounting invalid snapshot %s from %s.\n", device, words[1]);
if (!_run(UMOUNT_COMMAND, "-fl", words[1], NULL))
- syslog(LOG_ERR, "Failed to umount snapshot %s from %s: %s.",
+ syslog(LOG_ERR, "Failed to umount snapshot %s from %s: %s.\n",
device, words[1], strerror(errno));
}
}
@@ -185,10 +173,11 @@ void process_event(struct dm_task *dmt,
char *params;
struct snap_status status = { 0 };
const char *device = dm_task_get_name(dmt);
- int percent, *percent_check = (int*)private;
+ int percent;
+ struct dso_state *state = *private;
/* No longer monitoring, waiting for remove */
- if (!*percent_check)
+ if (!state->percent_check)
return;
dmeventd_lvm2_lock();
@@ -200,7 +189,6 @@ void process_event(struct dm_task *dmt,
_parse_snapshot_params(params, &status);
if (status.invalid) {
- syslog(LOG_ERR, "Trying to umount invalid snapshot %s...\n", device);
struct dm_info info;
if (dm_task_get_info(dmt, &info)) {
dmeventd_lvm2_unlock();
@@ -209,27 +197,35 @@ void process_event(struct dm_task *dmt,
} /* else; too bad, but this is best-effort thing... */
}
+ /* Snapshot size had changed. Clear the threshold. */
+ if (state->known_size != status.max) {
+ state->percent_check = CHECK_MINIMUM;
+ state->known_size = status.max;
+ }
+
/*
* If the snapshot has been invalidated or we failed to parse
* the status string. Report the full status string to syslog.
*/
if (status.invalid || !status.max) {
syslog(LOG_ERR, "Snapshot %s changed state to: %s\n", device, params);
- *percent_check = 0;
+ state->percent_check = 0;
goto out;
}
percent = 100 * status.used / status.max;
- if (percent >= *percent_check) {
+ if (percent >= state->percent_check) {
/* Usage has raised more than CHECK_STEP since the last
time. Run actions. */
- *percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP;
+ state->percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP;
+
if (percent >= WARNING_THRESH) /* Print a warning to syslog. */
syslog(LOG_WARNING, "Snapshot %s is now %i%% full.\n", device, percent);
/* Try to extend the snapshot, in accord with user-set policies */
- if (!_extend(device))
- syslog(LOG_ERR, "Failed to extend snapshot %s.", device);
+ if (!_extend(state->cmd_str))
+ syslog(LOG_ERR, "Failed to extend snapshot %s.\n", device);
}
+
out:
dmeventd_lvm2_unlock();
}
@@ -240,23 +236,46 @@ int register_device(const char *device,
int minor __attribute__((unused)),
void **private)
{
- int *percent_check = (int*)private;
- int r = dmeventd_lvm2_init();
+ struct dso_state *state;
+
+ if (!dmeventd_lvm2_init())
+ goto out;
- *percent_check = CHECK_MINIMUM;
+ if (!(state = dm_zalloc(sizeof(*state))))
+ goto bad;
+
+ if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(),
+ state->cmd_str, sizeof(state->cmd_str),
+ "lvextend --use-policies", device))
+ goto bad;
+
+ state->percent_check = CHECK_MINIMUM;
+ state->known_size = 0;
+ *private = state;
syslog(LOG_INFO, "Monitoring snapshot %s\n", device);
- return r;
+
+ return 1;
+bad:
+ dm_free(state);
+ dmeventd_lvm2_exit();
+out:
+ syslog(LOG_ERR, "Failed to monitor snapshot %s.\n", device);
+
+ return 0;
}
int unregister_device(const char *device,
const char *uuid __attribute__((unused)),
int major __attribute__((unused)),
int minor __attribute__((unused)),
- void **unused __attribute__((unused)))
+ void **private)
{
- syslog(LOG_INFO, "No longer monitoring snapshot %s\n",
- device);
+ struct dso_state *state = *private;
+
+ syslog(LOG_INFO, "No longer monitoring snapshot %s\n", device);
+ dm_free(state);
dmeventd_lvm2_exit();
+
return 1;
}
diff --git a/daemons/dmeventd/plugins/thin/.exported_symbols b/daemons/dmeventd/plugins/thin/.exported_symbols
new file mode 100644
index 0000000..b88c705
--- /dev/null
+++ b/daemons/dmeventd/plugins/thin/.exported_symbols
@@ -0,0 +1,3 @@
+process_event
+register_device
+unregister_device
diff --git a/daemons/dmeventd/plugins/thin/Makefile.in b/daemons/dmeventd/plugins/thin/Makefile.in
new file mode 100644
index 0000000..e964ab5
--- /dev/null
+++ b/daemons/dmeventd/plugins/thin/Makefile.in
@@ -0,0 +1,36 @@
+#
+# Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+top_builddir = @top_builddir@
+
+INCLUDES += -I$(top_srcdir)/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2
+CLDFLAGS += -L$(top_builddir)/tools -L$(top_builddir)/daemons/dmeventd/plugins/lvm2
+
+SOURCES = dmeventd_thin.c
+
+LIB_NAME = libdevmapper-event-lvm2thin
+LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX)
+LIB_VERSION = $(LIB_VERSION_LVM)
+
+CFLOW_LIST = $(SOURCES)
+CFLOW_LIST_TARGET = $(LIB_NAME).cflow
+
+include $(top_builddir)/make.tmpl
+
+LIBS += -ldevmapper-event-lvm2 -ldevmapper
+
+install_lvm2: install_dm_plugin
+
+install: install_lvm2
diff --git a/daemons/dmeventd/plugins/thin/dmeventd_thin.c b/daemons/dmeventd/plugins/thin/dmeventd_thin.c
new file mode 100644
index 0000000..a1af4c0
--- /dev/null
+++ b/daemons/dmeventd/plugins/thin/dmeventd_thin.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright (C) 2011-2012 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "lib.h"
+
+#include "lvm2cmd.h"
+#include "errors.h"
+#include "libdevmapper-event.h"
+#include "dmeventd_lvm.h"
+
+#include <sys/wait.h>
+#include <syslog.h> /* FIXME Replace syslog with multilog */
+/* FIXME Missing openlog? */
+
+/* First warning when thin is 80% full. */
+#define WARNING_THRESH 80
+/* Run a check every 5%. */
+#define CHECK_STEP 5
+/* Do not bother checking thins less than 50% full. */
+#define CHECK_MINIMUM 50
+
+#define UMOUNT_COMMAND "/bin/umount"
+
+#define THIN_DEBUG 0
+
+struct dso_state {
+ struct dm_pool *mem;
+ int metadata_percent_check;
+ int data_percent_check;
+ uint64_t known_metadata_size;
+ uint64_t known_data_size;
+ char cmd_str[1024];
+};
+
+
+/* TODO - move this mountinfo code into library to be reusable */
+#ifdef linux
+# include "kdev_t.h"
+#else
+# define MAJOR(x) major((x))
+# define MINOR(x) minor((x))
+# define MKDEV(x,y) makedev((x),(y))
+#endif
+
+/* Macros to make string defines */
+#define TO_STRING_EXP(A) #A
+#define TO_STRING(A) TO_STRING_EXP(A)
+
+static int _is_octal(int a)
+{
+ return (((a) & ~7) == '0');
+}
+
+/* Convert mangled mountinfo into normal ASCII string */
+static void _unmangle_mountinfo_string(const char *src, char *buf)
+{
+ if (!src)
+ return;
+
+ while (*src) {
+ if ((*src == '\\') &&
+ _is_octal(src[1]) && _is_octal(src[2]) && _is_octal(src[3])) {
+ *buf++ = 64 * (src[1] & 7) + 8 * (src[2] & 7) + (src[3] & 7);
+ src += 4;
+ } else
+ *buf++ = *src++;
+ }
+ *buf = '\0';
+}
+
+/* Parse one line of mountinfo */
+static int _parse_mountinfo_line(const char *line, unsigned *maj, unsigned *min, char *buf)
+{
+ char root[PATH_MAX + 1];
+ char target[PATH_MAX + 1];
+
+ /* TODO: maybe detect availability of %ms glib support ? */
+ if (sscanf(line, "%*u %*u %u:%u %" TO_STRING(PATH_MAX)
+ "s %" TO_STRING(PATH_MAX) "s",
+ maj, min, root, target) < 4)
+ return 0;
+
+ _unmangle_mountinfo_string(target, buf);
+
+#if THIN_DEBUG
+ syslog(LOG_DEBUG, "Mounted %u:%u %s", *maj, *min, buf);
+#endif
+
+ return 1;
+}
+
+/* Get dependencies for device, and try to find matching device */
+static int _has_deps(const char *name, int tp_major, int tp_minor, int *dev_minor)
+{
+ struct dm_task *dmt;
+ const struct dm_deps *deps;
+ struct dm_info info;
+ int major, minor;
+ int r = 0;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_DEPS)))
+ return 0;
+
+ if (!dm_task_set_name(dmt, name))
+ goto out;
+
+ if (!dm_task_no_open_count(dmt))
+ goto out;
+
+ if (!dm_task_run(dmt))
+ goto out;
+
+ if (!dm_task_get_info(dmt, &info))
+ goto out;
+
+ if (!(deps = dm_task_get_deps(dmt)))
+ goto out;
+
+ if (!info.exists || deps->count != 1)
+ goto out;
+
+ major = (int) MAJOR(deps->device[0]);
+ minor = (int) MINOR(deps->device[0]);
+ if ((major != tp_major) || (minor != tp_minor))
+ goto out;
+
+ *dev_minor = info.minor;
+
+#if THIN_DEBUG
+ {
+ char dev_name[PATH_MAX];
+ if (dm_device_get_name(major, minor, 0, dev_name, sizeof(dev_name)))
+ syslog(LOG_DEBUG, "Found %s (%u:%u) depends on %s",
+ name, major, *dev_minor, dev_name);
+ }
+#endif
+ r = 1;
+out:
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
+/* Get all active devices */
+static int _find_all_devs(dm_bitset_t bs, int tp_major, int tp_minor)
+{
+ struct dm_task *dmt;
+ struct dm_names *names;
+ unsigned next = 0;
+ int minor, r = 1;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_LIST)))
+ return 0;
+
+ if (!dm_task_run(dmt)) {
+ r = 0;
+ goto out;
+ }
+
+ if (!(names = dm_task_get_names(dmt))) {
+ r = 0;
+ goto out;
+ }
+
+ if (!names->dev)
+ goto out;
+
+ do {
+ names = (struct dm_names *)((char *) names + next);
+ if (_has_deps(names->name, tp_major, tp_minor, &minor))
+ dm_bit_set(bs, minor);
+ next = names->next;
+ } while (next);
+
+out:
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
+static int _extend(struct dso_state *state)
+{
+#if THIN_DEBUG
+ syslog(LOG_INFO, "dmeventd executes: %s.\n", state->cmd_str);
+#endif
+ return (dmeventd_lvm2_run(state->cmd_str) == ECMD_PROCESSED);
+}
+
+static int _run(const char *cmd, ...)
+{
+ va_list ap;
+ int argc = 1; /* for argv[0], i.e. cmd */
+ int i = 0;
+ const char **argv;
+ pid_t pid = fork();
+ int status;
+
+ if (pid == 0) { /* child */
+ va_start(ap, cmd);
+ while (va_arg(ap, const char *))
+ ++argc;
+ va_end(ap);
+
+ /* + 1 for the terminating NULL */
+ argv = alloca(sizeof(const char *) * (argc + 1));
+
+ argv[0] = cmd;
+ va_start(ap, cmd);
+ while ((argv[++i] = va_arg(ap, const char *)));
+ va_end(ap);
+
+ execvp(cmd, (char **)argv);
+ syslog(LOG_ERR, "Failed to execute %s: %s.\n", cmd, strerror(errno));
+ exit(127);
+ }
+
+ if (pid > 0) { /* parent */
+ if (waitpid(pid, &status, 0) != pid)
+ return 0; /* waitpid failed */
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ return 0; /* the child failed */
+ }
+
+ if (pid < 0)
+ return 0; /* fork failed */
+
+ return 1; /* all good */
+}
+
+/*
+ * Find all thin pool users and try to umount them.
+ * TODO: work with read-only thin pool support
+ */
+static void _umount(struct dm_task *dmt, const char *device)
+{
+ static const char mountinfo[] = "/proc/self/mountinfo";
+ static const size_t MINORS = 4096;
+ FILE *minfo;
+ char buffer[4096];
+ char target[PATH_MAX];
+ struct dm_info info;
+ unsigned maj, min;
+ dm_bitset_t minors; /* Bitset for active thin pool minors */
+
+ if (!dm_task_get_info(dmt, &info))
+ return;
+
+ dmeventd_lvm2_unlock();
+
+ if (!(minors = dm_bitset_create(NULL, MINORS))) {
+ syslog(LOG_ERR, "Failed to allocate bitset. Not unmounting %s.\n", device);
+ goto out;
+ }
+
+ if (!(minfo = fopen(mountinfo, "r"))) {
+ syslog(LOG_ERR, "Could not read %s. Not umounting %s.\n", mountinfo, device);
+ goto out;
+ }
+
+ if (!_find_all_devs(minors, info.major, info.minor)) {
+ syslog(LOG_ERR, "Failed to detect mounted volumes for %s.\n", device);
+ goto out;
+ }
+
+ while (!feof(minfo)) {
+ /* read mountinfo line */
+ if (!fgets(buffer, sizeof(buffer), minfo))
+ break; /* eof, likely */
+
+ if (_parse_mountinfo_line(buffer, &maj, &min, target) &&
+ (maj == info.major) && dm_bit(minors, min)) {
+ syslog(LOG_INFO, "Unmounting thin volume %s from %s.\n",
+ device, target);
+ if (!_run(UMOUNT_COMMAND, "-fl", target, NULL))
+ syslog(LOG_ERR, "Failed to umount thin %s from %s: %s.\n",
+ device, target, strerror(errno));
+ }
+ }
+
+ if (fclose(minfo))
+ syslog(LOG_ERR, "Failed to close %s\n", mountinfo);
+
+ dm_bitset_destroy(minors);
+out:
+ dmeventd_lvm2_lock();
+}
+
+void process_event(struct dm_task *dmt,
+ enum dm_event_mask event __attribute__((unused)),
+ void **private)
+{
+ const char *device = dm_task_get_name(dmt);
+ int percent;
+ struct dso_state *state = *private;
+ struct dm_status_thin_pool *tps = NULL;
+ void *next = NULL;
+ uint64_t start, length;
+ char *target_type = NULL;
+ char *params;
+
+#if 0
+ /* No longer monitoring, waiting for remove */
+ if (!state->meta_percent_check && !state->data_percent_check)
+ return;
+#endif
+ dmeventd_lvm2_lock();
+
+ dm_get_next_target(dmt, next, &start, &length, &target_type, &params);
+
+ if (!target_type || (strcmp(target_type, "thin-pool") != 0)) {
+ syslog(LOG_ERR, "Invalid target type.\n");
+ goto out;
+ }
+
+ if (!dm_get_status_thin_pool(state->mem, params, &tps)) {
+ syslog(LOG_ERR, "Failed to parse status.\n");
+ _umount(dmt, device);
+ goto out;
+ }
+
+#if THIN_DEBUG
+ syslog(LOG_INFO, "%p: Got status %" PRIu64 " / %" PRIu64
+ " %" PRIu64 " / %" PRIu64 ".\n", state,
+ tps->used_metadata_blocks, tps->total_metadata_blocks,
+ tps->used_data_blocks, tps->total_data_blocks);
+#endif
+
+ /* Thin pool size had changed. Clear the threshold. */
+ if (state->known_metadata_size != tps->total_metadata_blocks) {
+ state->metadata_percent_check = CHECK_MINIMUM;
+ state->known_metadata_size = tps->total_metadata_blocks;
+ }
+
+ if (state->known_data_size != tps->total_data_blocks) {
+ state->data_percent_check = CHECK_MINIMUM;
+ state->known_data_size = tps->total_data_blocks;
+ }
+
+ percent = 100 * tps->used_metadata_blocks / tps->total_metadata_blocks;
+ if (percent >= state->metadata_percent_check) {
+ /*
+ * Usage has raised more than CHECK_STEP since the last
+ * time. Run actions.
+ */
+ state->metadata_percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP;
+
+ /* FIXME: extension of metadata needs to be written! */
+ if (percent >= WARNING_THRESH) /* Print a warning to syslog. */
+ syslog(LOG_WARNING, "Thin metadata %s is now %i%% full.\n",
+ device, percent);
+ /* Try to extend the metadata, in accord with user-set policies */
+ if (!_extend(state)) {
+ syslog(LOG_ERR, "Failed to extend thin metadata %s.\n",
+ device);
+ _umount(dmt, device);
+ }
+ /* FIXME: hmm READ-ONLY switch should happen in error path */
+ }
+
+ percent = 100 * tps->used_data_blocks / tps->total_data_blocks;
+ if (percent >= state->data_percent_check) {
+ /*
+ * Usage has raised more than CHECK_STEP since
+ * the last time. Run actions.
+ */
+ state->data_percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP;
+
+ if (percent >= WARNING_THRESH) /* Print a warning to syslog. */
+ syslog(LOG_WARNING, "Thin %s is now %i%% full.\n", device, percent);
+ /* Try to extend the thin data, in accord with user-set policies */
+ if (!_extend(state)) {
+ syslog(LOG_ERR, "Failed to extend thin %s.\n", device);
+ state->data_percent_check = 0;
+ _umount(dmt, device);
+ }
+ /* FIXME: hmm READ-ONLY switch should happen in error path */
+ }
+out:
+ if (tps)
+ dm_pool_free(state->mem, tps);
+
+ dmeventd_lvm2_unlock();
+}
+
+int register_device(const char *device,
+ const char *uuid __attribute__((unused)),
+ int major __attribute__((unused)),
+ int minor __attribute__((unused)),
+ void **private)
+{
+ struct dm_pool *statemem = NULL;
+ struct dso_state *state;
+
+ if (!dmeventd_lvm2_init())
+ goto bad;
+
+ if (!(statemem = dm_pool_create("thin_pool_state", 2048)) ||
+ !(state = dm_pool_zalloc(statemem, sizeof(*state))) ||
+ !dmeventd_lvm2_command(statemem, state->cmd_str,
+ sizeof(state->cmd_str),
+ "lvextend --use-policies",
+ device)) {
+ if (statemem)
+ dm_pool_destroy(statemem);
+ dmeventd_lvm2_exit();
+ goto bad;
+ }
+
+ state->mem = statemem;
+ state->metadata_percent_check = CHECK_MINIMUM;
+ state->data_percent_check = CHECK_MINIMUM;
+ *private = state;
+
+ syslog(LOG_INFO, "Monitoring thin %s.\n", device);
+
+ return 1;
+bad:
+ syslog(LOG_ERR, "Failed to monitor thin %s.\n", device);
+
+ return 0;
+}
+
+int unregister_device(const char *device,
+ const char *uuid __attribute__((unused)),
+ int major __attribute__((unused)),
+ int minor __attribute__((unused)),
+ void **private)
+{
+ struct dso_state *state = *private;
+
+ syslog(LOG_INFO, "No longer monitoring thin %s.\n", device);
+ dm_pool_destroy(state->mem);
+ dmeventd_lvm2_exit();
+
+ return 1;
+}
diff --git a/daemons/lvmetad/Makefile.in b/daemons/lvmetad/Makefile.in
new file mode 100644
index 0000000..35aa4ab
--- /dev/null
+++ b/daemons/lvmetad/Makefile.in
@@ -0,0 +1,59 @@
+#
+# Copyright (C) 2011-2012 Red Hat, Inc.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU Lesser General Public License v.2.1.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+top_builddir = @top_builddir@
+
+SOURCES = lvmetad-core.c
+SOURCES2 = testclient.c
+
+TARGETS = lvmetad lvmetad-testclient
+
+.PHONY: install_lvmetad
+
+CFLOW_LIST = $(SOURCES)
+CFLOW_LIST_TARGET = $(LIB_NAME).cflow
+CFLOW_TARGET = lvmetad
+
+include $(top_builddir)/make.tmpl
+
+INCLUDES += -I$(top_srcdir)/libdaemon/server
+LVMLIBS = -ldaemonserver $(LVMINTERNAL_LIBS) -ldevmapper
+
+LIBS += $(PTHREAD_LIBS)
+
+LDFLAGS += -L$(top_builddir)/libdaemon/server
+CLDFLAGS += -L$(top_builddir)/libdaemon/server
+
+lvmetad: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
+ $(top_builddir)/libdaemon/server/libdaemonserver.a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) \
+ $(DL_LIBS) $(LVMLIBS) $(LIBS) -rdynamic
+
+# TODO: No idea. No idea how to test either.
+#ifneq ("$(CFLOW_CMD)", "")
+#CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES))
+#-include $(top_builddir)/libdm/libdevmapper.cflow
+#-include $(top_builddir)/lib/liblvm-internal.cflow
+#-include $(top_builddir)/lib/liblvm2cmd.cflow
+#-include $(top_builddir)/daemons/dmeventd/$(LIB_NAME).cflow
+#-include $(top_builddir)/daemons/dmeventd/plugins/mirror/$(LIB_NAME)-lvm2mirror.cflow
+#endif
+
+install_lvmetad: lvmetad
+ $(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
+
+install_lvm2: install_lvmetad
+
+install: install_lvm2
diff --git a/daemons/lvmetad/lvmetad-client.h b/daemons/lvmetad/lvmetad-client.h
new file mode 100644
index 0000000..fe8eedc
--- /dev/null
+++ b/daemons/lvmetad/lvmetad-client.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2011-2012 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LVM_LVMETAD_CLIENT_H
+#define _LVM_LVMETAD_CLIENT_H
+
+#include "daemon-client.h"
+
+struct volume_group;
+
+/* Different types of replies we may get from lvmetad. */
+
+typedef struct {
+ daemon_reply r;
+ const char **uuids; /* NULL terminated array */
+} lvmetad_uuidlist;
+
+typedef struct {
+ daemon_reply r;
+ struct dm_config_tree *cft;
+} lvmetad_vg;
+
+/* Get a list of VG UUIDs that match a given VG name. */
+lvmetad_uuidlist lvmetad_lookup_vgname(daemon_handle h, const char *name);
+
+/* Get the metadata of a single VG, identified by UUID. */
+lvmetad_vg lvmetad_get_vg(daemon_handle h, const char *uuid);
+
+/*
+ * Add and remove PVs on demand. Udev-driven systems will use this interface
+ * instead of scanning.
+ */
+daemon_reply lvmetad_add_pv(daemon_handle h, const char *pv_uuid, const char *mda_content);
+daemon_reply lvmetad_remove_pv(daemon_handle h, const char *pv_uuid);
+
+/* Trigger a full disk scan, throwing away all caches. XXX do we eventually want
+ * this? Probably not yet, anyway.
+ * daemon_reply lvmetad_rescan(daemon_handle h);
+ */
+
+/*
+ * Update the version of metadata of a volume group. The VG has to be locked for
+ * writing for this, and the VG metadata here has to match whatever has been
+ * written to the disk (under this lock). This initially avoids the requirement
+ * for lvmetad to write to disk (in later revisions, lvmetad_supersede_vg may
+ * also do the writing, or we probably add another function to do that).
+ */
+daemon_reply lvmetad_supersede_vg(daemon_handle h, struct volume_group *vg);
+
+/* Wrappers to open/close connection */
+
+static inline daemon_handle lvmetad_open(const char *socket)
+{
+ daemon_info lvmetad_info = {
+ .path = "lvmetad",
+ .socket = socket ?: DEFAULT_RUN_DIR "/lvmetad.socket",
+ .protocol = "lvmetad",
+ .protocol_version = 1,
+ .autostart = 0
+ };
+
+ return daemon_open(lvmetad_info);
+}
+
+static inline void lvmetad_close(daemon_handle h)
+{
+ return daemon_close(h);
+}
+
+#endif
diff --git a/daemons/lvmetad/lvmetad-core.c b/daemons/lvmetad/lvmetad-core.c
new file mode 100644
index 0000000..0a1c884
--- /dev/null
+++ b/daemons/lvmetad/lvmetad-core.c
@@ -0,0 +1,1204 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#define _XOPEN_SOURCE 500 /* pthread */
+
+#include "configure.h"
+#include "daemon-io.h"
+#include "config-util.h"
+#include "daemon-server.h"
+#include "daemon-log.h"
+#include "lvm-version.h"
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <unistd.h>
+
+typedef struct {
+ log_state *log; /* convenience */
+ const char *log_config;
+
+ struct dm_hash_table *pvid_to_pvmeta;
+ struct dm_hash_table *device_to_pvid; /* shares locks with above */
+
+ struct dm_hash_table *vgid_to_metadata;
+ struct dm_hash_table *vgid_to_vgname;
+ struct dm_hash_table *vgname_to_vgid;
+ struct dm_hash_table *pvid_to_vgid;
+ struct {
+ struct dm_hash_table *vg;
+ pthread_mutex_t vg_lock_map;
+ pthread_mutex_t pvid_to_pvmeta;
+ pthread_mutex_t vgid_to_metadata;
+ pthread_mutex_t pvid_to_vgid;
+ } lock;
+ char token[128];
+ pthread_mutex_t token_lock;
+} lvmetad_state;
+
+static void destroy_metadata_hashes(lvmetad_state *s)
+{
+ struct dm_hash_node *n = NULL;
+
+ n = dm_hash_get_first(s->vgid_to_metadata);
+ while (n) {
+ dm_config_destroy(dm_hash_get_data(s->vgid_to_metadata, n));
+ n = dm_hash_get_next(s->vgid_to_metadata, n);
+ }
+
+ n = dm_hash_get_first(s->pvid_to_pvmeta);
+ while (n) {
+ dm_config_destroy(dm_hash_get_data(s->pvid_to_pvmeta, n));
+ n = dm_hash_get_next(s->pvid_to_pvmeta, n);
+ }
+ dm_hash_destroy(s->pvid_to_pvmeta);
+ dm_hash_destroy(s->vgid_to_metadata);
+ dm_hash_destroy(s->vgid_to_vgname);
+ dm_hash_destroy(s->vgname_to_vgid);
+
+ n = dm_hash_get_first(s->device_to_pvid);
+ while (n) {
+ dm_free(dm_hash_get_data(s->device_to_pvid, n));
+ n = dm_hash_get_next(s->device_to_pvid, n);
+ }
+
+ dm_hash_destroy(s->device_to_pvid);
+ dm_hash_destroy(s->pvid_to_vgid);
+}
+
+static void create_metadata_hashes(lvmetad_state *s)
+{
+ s->pvid_to_pvmeta = dm_hash_create(32);
+ s->device_to_pvid = dm_hash_create(32);
+ s->vgid_to_metadata = dm_hash_create(32);
+ s->vgid_to_vgname = dm_hash_create(32);
+ s->pvid_to_vgid = dm_hash_create(32);
+ s->vgname_to_vgid = dm_hash_create(32);
+}
+
+static void lock_pvid_to_pvmeta(lvmetad_state *s) {
+ pthread_mutex_lock(&s->lock.pvid_to_pvmeta); }
+static void unlock_pvid_to_pvmeta(lvmetad_state *s) {
+ pthread_mutex_unlock(&s->lock.pvid_to_pvmeta); }
+
+static void lock_vgid_to_metadata(lvmetad_state *s) {
+ pthread_mutex_lock(&s->lock.vgid_to_metadata); }
+static void unlock_vgid_to_metadata(lvmetad_state *s) {
+ pthread_mutex_unlock(&s->lock.vgid_to_metadata); }
+
+static void lock_pvid_to_vgid(lvmetad_state *s) {
+ pthread_mutex_lock(&s->lock.pvid_to_vgid); }
+static void unlock_pvid_to_vgid(lvmetad_state *s) {
+ pthread_mutex_unlock(&s->lock.pvid_to_vgid); }
+
+static response reply_fail(const char *reason)
+{
+ return daemon_reply_simple("failed", "reason = %s", reason, NULL);
+}
+
+static response reply_unknown(const char *reason)
+{
+ return daemon_reply_simple("unknown", "reason = %s", reason, NULL);
+}
+
+/*
+ * TODO: It may be beneficial to clean up the vg lock hash from time to time,
+ * since if we have many "rogue" requests for nonexistent things, we will keep
+ * allocating memory that we never release. Not good.
+ */
+static struct dm_config_tree *lock_vg(lvmetad_state *s, const char *id) {
+ pthread_mutex_t *vg;
+ struct dm_config_tree *cft;
+
+ pthread_mutex_lock(&s->lock.vg_lock_map);
+ vg = dm_hash_lookup(s->lock.vg, id);
+ if (!vg) {
+ pthread_mutexattr_t rec;
+ pthread_mutexattr_init(&rec);
+ pthread_mutexattr_settype(&rec, PTHREAD_MUTEX_RECURSIVE_NP);
+ if (!(vg = malloc(sizeof(pthread_mutex_t))))
+ return NULL;
+ pthread_mutex_init(vg, &rec);
+ if (!dm_hash_insert(s->lock.vg, id, vg)) {
+ free(vg);
+ return NULL;
+ }
+ }
+ /* We never remove items from s->lock.vg => the pointer remains valid. */
+ pthread_mutex_unlock(&s->lock.vg_lock_map);
+
+ DEBUGLOG(s, "locking VG %s", id);
+ pthread_mutex_lock(vg);
+
+ /* Protect against structure changes of the vgid_to_metadata hash. */
+ lock_vgid_to_metadata(s);
+ cft = dm_hash_lookup(s->vgid_to_metadata, id);
+ unlock_vgid_to_metadata(s);
+ return cft;
+}
+
+static void unlock_vg(lvmetad_state *s, const char *id) {
+ pthread_mutex_t *vg;
+
+ DEBUGLOG(s, "unlocking VG %s", id);
+ /* Protect the s->lock.vg structure from concurrent access. */
+ pthread_mutex_lock(&s->lock.vg_lock_map);
+ if ((vg = dm_hash_lookup(s->lock.vg, id)))
+ pthread_mutex_unlock(vg);
+ pthread_mutex_unlock(&s->lock.vg_lock_map);
+}
+
+static struct dm_config_node *pvs(struct dm_config_node *vg)
+{
+ struct dm_config_node *pv = dm_config_find_node(vg, "metadata/physical_volumes");
+ if (pv)
+ pv = pv->child;
+ return pv;
+}
+
+static void filter_metadata(struct dm_config_node *vg) {
+ struct dm_config_node *pv = pvs(vg);
+ while (pv) {
+ struct dm_config_node *item = pv->child;
+ while (item) {
+ /* Remove the advisory device nodes. */
+ if (item->sib && !strcmp(item->sib->key, "device"))
+ item->sib = item->sib->sib;
+ item = item->sib;
+ }
+ pv = pv->sib;
+ }
+ vg->sib = NULL; /* Drop any trailing garbage. */
+}
+
+static void merge_pvmeta(struct dm_config_node *pv, struct dm_config_node *pvmeta)
+{
+ struct dm_config_node *tmp;
+
+ if (!pvmeta)
+ return;
+
+ tmp = pvmeta;
+ while (tmp->sib) {
+ /* drop the redundant ID and dev_size nodes */
+ if (!strcmp(tmp->sib->key, "id") || !strcmp(tmp->sib->key, "dev_size"))
+ tmp->sib = tmp->sib->sib;
+ if (!tmp->sib) break;
+ tmp = tmp->sib;
+ tmp->parent = pv;
+ }
+ tmp->sib = pv->child;
+ pv->child = pvmeta;
+ pvmeta->parent = pv;
+}
+
+/* Either the "big" vgs lock, or a per-vg lock needs to be held before entering
+ * this function. */
+static int update_pv_status(lvmetad_state *s,
+ struct dm_config_tree *cft,
+ struct dm_config_node *vg, int act)
+{
+ struct dm_config_node *pv;
+ int complete = 1;
+ const char *uuid;
+ struct dm_config_tree *pvmeta;
+
+ lock_pvid_to_pvmeta(s);
+
+ for (pv = pvs(vg); pv; pv = pv->sib) {
+ if (!(uuid = dm_config_find_str(pv->child, "id", NULL)))
+ continue;
+
+ pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, uuid);
+ if (act) {
+ set_flag(cft, pv, "status", "MISSING", !pvmeta);
+ if (pvmeta) {
+ struct dm_config_node *pvmeta_cn =
+ dm_config_clone_node(cft, pvmeta->root->child, 1);
+ merge_pvmeta(pv, pvmeta_cn);
+ }
+ }
+ if (!pvmeta) {
+ complete = 0;
+ if (!act) { /* optimisation */
+ unlock_pvid_to_pvmeta(s);
+ return complete;
+ }
+ }
+ }
+ unlock_pvid_to_pvmeta(s);
+
+ return complete;
+}
+
+static struct dm_config_node *make_pv_node(lvmetad_state *s, const char *pvid,
+ struct dm_config_tree *cft,
+ struct dm_config_node *parent,
+ struct dm_config_node *pre_sib)
+{
+ struct dm_config_tree *pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid);
+ const char *vgid = dm_hash_lookup(s->pvid_to_vgid, pvid), *vgname = NULL;
+ struct dm_config_node *pv;
+ struct dm_config_node *cn = NULL;
+
+ if (!pvmeta)
+ return NULL;
+
+ if (vgid) {
+ lock_vgid_to_metadata(s); // XXX
+ vgname = dm_hash_lookup(s->vgid_to_vgname, vgid);
+ unlock_vgid_to_metadata(s);
+ }
+
+ /* Nick the pvmeta config tree. */
+ if (!(pv = dm_config_clone_node(cft, pvmeta->root, 0)))
+ return 0;
+
+ if (pre_sib)
+ pre_sib->sib = pv;
+ if (parent && !parent->child)
+ parent->child = pv;
+ pv->parent = parent;
+ pv->key = pvid;
+
+ /* Add the "variable" bits to it. */
+
+ if (vgid && strcmp(vgid, "#orphan"))
+ cn = make_text_node(cft, "vgid", vgid, pv, cn);
+ if (vgname)
+ cn = make_text_node(cft, "vgname", vgname, pv, cn);
+
+ return pv;
+}
+
+static response pv_list(lvmetad_state *s, request r)
+{
+ struct dm_config_node *cn = NULL, *cn_pvs;
+ struct dm_hash_node *n;
+ const char *id;
+ response res;
+
+ buffer_init( &res.buffer );
+
+ if (!(res.cft = dm_config_create()))
+ return res; /* FIXME error reporting */
+
+ /* The response field */
+ res.cft->root = make_text_node(res.cft, "response", "OK", NULL, NULL);
+ cn_pvs = make_config_node(res.cft, "physical_volumes", NULL, res.cft->root);
+
+ lock_pvid_to_pvmeta(s);
+
+ for (n = dm_hash_get_first(s->pvid_to_pvmeta); n;
+ n = dm_hash_get_next(s->pvid_to_pvmeta, n)) {
+ id = dm_hash_get_key(s->pvid_to_pvmeta, n);
+ cn = make_pv_node(s, id, res.cft, cn_pvs, cn);
+ }
+
+ unlock_pvid_to_pvmeta(s);
+
+ return res;
+}
+
+static response pv_lookup(lvmetad_state *s, request r)
+{
+ const char *pvid = daemon_request_str(r, "uuid", NULL);
+ int64_t devt = daemon_request_int(r, "device", 0);
+ response res;
+ struct dm_config_node *pv;
+
+ buffer_init( &res.buffer );
+
+ if (!pvid && !devt)
+ return reply_fail("need PVID or device");
+
+ if (!(res.cft = dm_config_create()))
+ return reply_fail("out of memory");
+
+ if (!(res.cft->root = make_text_node(res.cft, "response", "OK", NULL, NULL)))
+ return reply_fail("out of memory");
+
+ lock_pvid_to_pvmeta(s);
+ if (!pvid && devt)
+ pvid = dm_hash_lookup_binary(s->device_to_pvid, &devt, sizeof(devt));
+
+ if (!pvid) {
+ WARN(s, "pv_lookup: could not find device %" PRIu64, devt);
+ unlock_pvid_to_pvmeta(s);
+ dm_config_destroy(res.cft);
+ return reply_unknown("device not found");
+ }
+
+ pv = make_pv_node(s, pvid, res.cft, NULL, res.cft->root);
+ if (!pv) {
+ unlock_pvid_to_pvmeta(s);
+ dm_config_destroy(res.cft);
+ return reply_unknown("PV not found");
+ }
+
+ pv->key = "physical_volume";
+ unlock_pvid_to_pvmeta(s);
+
+ return res;
+}
+
+static response vg_list(lvmetad_state *s, request r)
+{
+ struct dm_config_node *cn, *cn_vgs, *cn_last = NULL;
+ struct dm_hash_node *n;
+ const char *id;
+ const char *name;
+ response res;
+
+ buffer_init( &res.buffer );
+
+ if (!(res.cft = dm_config_create()))
+ goto bad; /* FIXME: better error reporting */
+
+ /* The response field */
+ res.cft->root = cn = dm_config_create_node(res.cft, "response");
+ if (!cn)
+ goto bad; /* FIXME */
+ cn->parent = res.cft->root;
+ if (!(cn->v = dm_config_create_value(res.cft)))
+ goto bad; /* FIXME */
+
+ cn->v->type = DM_CFG_STRING;
+ cn->v->v.str = "OK";
+
+ cn_vgs = cn = cn->sib = dm_config_create_node(res.cft, "volume_groups");
+ if (!cn_vgs)
+ goto bad; /* FIXME */
+
+ cn->parent = res.cft->root;
+ cn->v = NULL;
+ cn->child = NULL;
+
+ lock_vgid_to_metadata(s);
+
+ n = dm_hash_get_first(s->vgid_to_vgname);
+ while (n) {
+ id = dm_hash_get_key(s->vgid_to_vgname, n),
+ name = dm_hash_get_data(s->vgid_to_vgname, n);
+
+ if (!(cn = dm_config_create_node(res.cft, id)))
+ goto bad; /* FIXME */
+
+ if (cn_last)
+ cn_last->sib = cn;
+
+ cn->parent = cn_vgs;
+ cn->sib = NULL;
+ cn->v = NULL;
+
+ if (!(cn->child = dm_config_create_node(res.cft, "name")))
+ goto bad; /* FIXME */
+
+ cn->child->parent = cn;
+ cn->child->sib = 0;
+ if (!(cn->child->v = dm_config_create_value(res.cft)))
+ goto bad; /* FIXME */
+
+ cn->child->v->type = DM_CFG_STRING;
+ cn->child->v->v.str = name;
+
+ if (!cn_vgs->child)
+ cn_vgs->child = cn;
+ cn_last = cn;
+
+ n = dm_hash_get_next(s->vgid_to_vgname, n);
+ }
+
+ unlock_vgid_to_metadata(s);
+bad:
+ return res;
+}
+
+static response vg_lookup(lvmetad_state *s, request r)
+{
+ struct dm_config_tree *cft;
+ struct dm_config_node *metadata, *n;
+ response res;
+
+ const char *uuid = daemon_request_str(r, "uuid", NULL);
+ const char *name = daemon_request_str(r, "name", NULL);
+
+ buffer_init( &res.buffer );
+
+ DEBUGLOG(s, "vg_lookup: uuid = %s, name = %s", uuid, name);
+
+ if (!uuid || !name) {
+ lock_vgid_to_metadata(s);
+ if (name && !uuid)
+ uuid = dm_hash_lookup(s->vgname_to_vgid, name);
+ if (uuid && !name)
+ name = dm_hash_lookup(s->vgid_to_vgname, uuid);
+ unlock_vgid_to_metadata(s);
+ }
+
+ DEBUGLOG(s, "vg_lookup: updated uuid = %s, name = %s", uuid, name);
+
+ if (!uuid)
+ return reply_unknown("VG not found");
+
+ cft = lock_vg(s, uuid);
+ if (!cft || !cft->root) {
+ unlock_vg(s, uuid);
+ return reply_unknown("UUID not found");
+ }
+
+ metadata = cft->root;
+ if (!(res.cft = dm_config_create()))
+ goto bad;
+
+ /* The response field */
+ if (!(res.cft->root = n = dm_config_create_node(res.cft, "response")))
+ goto bad;
+
+ if (!(n->v = dm_config_create_value(cft)))
+ goto bad;
+
+ n->parent = res.cft->root;
+ n->v->type = DM_CFG_STRING;
+ n->v->v.str = "OK";
+
+ if (!(n = n->sib = dm_config_create_node(res.cft, "name")))
+ goto bad;
+
+ if (!(n->v = dm_config_create_value(res.cft)))
+ goto bad;
+
+ n->parent = res.cft->root;
+ n->v->type = DM_CFG_STRING;
+ n->v->v.str = name;
+
+ /* The metadata section */
+ if (!(n = n->sib = dm_config_clone_node(res.cft, metadata, 1)))
+ goto bad;
+ n->parent = res.cft->root;
+ res.error = 0;
+ unlock_vg(s, uuid);
+
+ update_pv_status(s, res.cft, n, 1); /* FIXME report errors */
+
+ return res;
+bad:
+ unlock_vg(s, uuid);
+ return reply_fail("out of memory");
+}
+
+static int compare_value(struct dm_config_value *a, struct dm_config_value *b)
+{
+ int r = 0;
+
+ if (a->type > b->type)
+ return 1;
+ if (a->type < b->type)
+ return -1;
+
+ switch (a->type) {
+ case DM_CFG_STRING: r = strcmp(a->v.str, b->v.str); break;
+ case DM_CFG_FLOAT: r = (a->v.f == b->v.f) ? 0 : (a->v.f > b->v.f) ? 1 : -1; break;
+ case DM_CFG_INT: r = (a->v.i == b->v.i) ? 0 : (a->v.i > b->v.i) ? 1 : -1; break;
+ case DM_CFG_EMPTY_ARRAY: return 0;
+ }
+
+ if (r == 0 && a->next && b->next)
+ r = compare_value(a->next, b->next);
+ return r;
+}
+
+static int compare_config(struct dm_config_node *a, struct dm_config_node *b)
+{
+ int result = 0;
+ if (a->v && b->v)
+ result = compare_value(a->v, b->v);
+ if (a->v && !b->v)
+ result = 1;
+ if (!a->v && b->v)
+ result = -1;
+ if (a->child && b->child)
+ result = compare_config(a->child, b->child);
+
+ if (result) {
+ // DEBUGLOG("config inequality at %s / %s", a->key, b->key);
+ return result;
+ }
+
+ if (a->sib && b->sib)
+ result = compare_config(a->sib, b->sib);
+ if (a->sib && !b->sib)
+ result = 1;
+ if (!a->sib && b->sib)
+ result = -1;
+
+ return result;
+}
+
+static int vg_remove_if_missing(lvmetad_state *s, const char *vgid);
+
+/* You need to be holding the pvid_to_vgid lock already to call this. */
+static int update_pvid_to_vgid(lvmetad_state *s, struct dm_config_tree *vg,
+ const char *vgid, int nuke_empty)
+{
+ struct dm_config_node *pv;
+ struct dm_hash_table *to_check;
+ struct dm_hash_node *n;
+ const char *pvid;
+ const char *vgid_old;
+ const char *check_vgid;
+ int r = 0;
+
+ if (!vgid)
+ return 0;
+
+ if (!(to_check = dm_hash_create(32)))
+ return 0;
+
+ for (pv = pvs(vg->root); pv; pv = pv->sib) {
+ if (!(pvid = dm_config_find_str(pv->child, "id", NULL)))
+ continue;
+
+ if (nuke_empty &&
+ (vgid_old = dm_hash_lookup(s->pvid_to_vgid, pvid)) &&
+ !dm_hash_insert(to_check, vgid_old, (void*) 1))
+ goto out;
+
+ if (!dm_hash_insert(s->pvid_to_vgid, pvid, (void*) vgid))
+ goto out;
+
+ DEBUGLOG(s, "moving PV %s to VG %s", pvid, vgid);
+ }
+
+ for (n = dm_hash_get_first(to_check); n;
+ n = dm_hash_get_next(to_check, n)) {
+ check_vgid = dm_hash_get_key(to_check, n);
+ lock_vg(s, check_vgid);
+ vg_remove_if_missing(s, check_vgid);
+ unlock_vg(s, check_vgid);
+ }
+
+ r = 1;
+ out:
+ dm_hash_destroy(to_check);
+
+ return r;
+}
+
+/* A pvid map lock needs to be held if update_pvids = 1. */
+static int remove_metadata(lvmetad_state *s, const char *vgid, int update_pvids)
+{
+ struct dm_config_tree *old;
+ const char *oldname;
+ lock_vgid_to_metadata(s);
+ old = dm_hash_lookup(s->vgid_to_metadata, vgid);
+ oldname = dm_hash_lookup(s->vgid_to_vgname, vgid);
+ unlock_vgid_to_metadata(s);
+
+ if (!old)
+ return 0;
+ assert(oldname);
+
+ if (update_pvids)
+ /* FIXME: What should happen when update fails */
+ update_pvid_to_vgid(s, old, "#orphan", 0);
+ /* need to update what we have since we found a newer version */
+ dm_hash_remove(s->vgid_to_metadata, vgid);
+ dm_hash_remove(s->vgid_to_vgname, vgid);
+ dm_hash_remove(s->vgname_to_vgid, oldname);
+ dm_config_destroy(old);
+ return 1;
+}
+
+/* The VG must be locked. */
+static int vg_remove_if_missing(lvmetad_state *s, const char *vgid)
+{
+ struct dm_config_tree *vg;
+ struct dm_config_node *pv;
+ const char *vgid_check;
+ const char *pvid;
+ int missing = 1;
+
+ if (!vgid)
+ return 0;
+
+ if (!(vg = dm_hash_lookup(s->vgid_to_metadata, vgid)))
+ return 1;
+
+ lock_pvid_to_pvmeta(s);
+ for (pv = pvs(vg->root); pv; pv = pv->sib) {
+ if (!(pvid = dm_config_find_str(pv->child, "id", NULL)))
+ continue;
+
+ if ((vgid_check = dm_hash_lookup(s->pvid_to_vgid, pvid)) &&
+ dm_hash_lookup(s->pvid_to_pvmeta, pvid) &&
+ !strcmp(vgid, vgid_check))
+ missing = 0; /* at least one PV is around */
+ }
+
+ if (missing) {
+ DEBUGLOG(s, "removing empty VG %s", vgid);
+ remove_metadata(s, vgid, 0);
+ }
+
+ unlock_pvid_to_pvmeta(s);
+
+ return 1;
+}
+
+/* No locks need to be held. The pointers are never used outside of the scope of
+ * this function, so they can be safely destroyed after update_metadata returns
+ * (anything that might have been retained is copied). */
+static int update_metadata(lvmetad_state *s, const char *name, const char *_vgid,
+ struct dm_config_node *metadata, int64_t *oldseq)
+{
+ struct dm_config_tree *cft = NULL;
+ struct dm_config_tree *old;
+ int retval = 0;
+ int seq;
+ int haveseq = -1;
+ const char *oldname = NULL;
+ const char *vgid;
+ char *cfgname;
+
+ lock_vgid_to_metadata(s);
+ old = dm_hash_lookup(s->vgid_to_metadata, _vgid);
+ lock_vg(s, _vgid);
+ unlock_vgid_to_metadata(s);
+
+ seq = dm_config_find_int(metadata, "metadata/seqno", -1);
+
+ if (old) {
+ haveseq = dm_config_find_int(old->root, "metadata/seqno", -1);
+ oldname = dm_hash_lookup(s->vgid_to_vgname, _vgid);
+ assert(oldname);
+ }
+
+ if (seq < 0)
+ goto out;
+
+ filter_metadata(metadata); /* sanitize */
+
+ if (oldseq) {
+ if (old)
+ *oldseq = haveseq;
+ else
+ *oldseq = seq;
+ }
+
+ if (seq == haveseq) {
+ retval = 1;
+ if (compare_config(metadata, old->root))
+ retval = 0;
+ DEBUGLOG(s, "Not updating metadata for %s at %d (%s)", _vgid, haveseq,
+ retval ? "ok" : "MISMATCH");
+ if (!retval) {
+ DEBUGLOG_cft(s, "OLD: ", old->root);
+ DEBUGLOG_cft(s, "NEW: ", metadata);
+ }
+ goto out;
+ }
+
+ if (seq < haveseq) {
+ DEBUGLOG(s, "Refusing to update metadata for %s (at %d) to %d", _vgid, haveseq, seq);
+ /* TODO: notify the client that their metadata is out of date? */
+ retval = 1;
+ goto out;
+ }
+
+ if (!(cft = dm_config_create()) ||
+ !(cft->root = dm_config_clone_node(cft, metadata, 0))) {
+ ERROR(s, "Out of memory");
+ goto out;
+ }
+
+ vgid = dm_config_find_str(cft->root, "metadata/id", NULL);
+
+ if (!vgid || !name) {
+ DEBUGLOG(s, "Name '%s' or uuid '%s' missing!", name, vgid);
+ goto out;
+ }
+
+ lock_pvid_to_vgid(s);
+
+ if (haveseq >= 0 && haveseq < seq) {
+ INFO(s, "Updating metadata for %s at %d to %d", _vgid, haveseq, seq);
+ /* temporarily orphan all of our PVs */
+ remove_metadata(s, vgid, 1);
+ }
+
+ lock_vgid_to_metadata(s);
+ DEBUGLOG(s, "Mapping %s to %s", vgid, name);
+
+ retval = ((cfgname = dm_pool_strdup(dm_config_memory(cft), name)) &&
+ dm_hash_insert(s->vgid_to_metadata, vgid, cft) &&
+ dm_hash_insert(s->vgid_to_vgname, vgid, cfgname) &&
+ dm_hash_insert(s->vgname_to_vgid, name, (void*) vgid)) ? 1 : 0;
+ unlock_vgid_to_metadata(s);
+
+ if (retval)
+ /* FIXME: What should happen when update fails */
+ retval = update_pvid_to_vgid(s, cft, vgid, 1);
+
+ unlock_pvid_to_vgid(s);
+out:
+ if (!retval && cft)
+ dm_config_destroy(cft);
+ unlock_vg(s, _vgid);
+ return retval;
+}
+
+static response pv_gone(lvmetad_state *s, request r)
+{
+ const char *pvid = daemon_request_str(r, "uuid", NULL);
+ int64_t device = daemon_request_int(r, "device", 0);
+ struct dm_config_tree *pvmeta;
+ char *pvid_old;
+
+ DEBUGLOG(s, "pv_gone: %s / %" PRIu64, pvid, device);
+
+ lock_pvid_to_pvmeta(s);
+ if (!pvid && device > 0)
+ pvid = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device));
+ if (!pvid) {
+ unlock_pvid_to_pvmeta(s);
+ return reply_unknown("device not in cache");
+ }
+
+ DEBUGLOG(s, "pv_gone (updated): %s / %" PRIu64, pvid, device);
+
+ pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid);
+ pvid_old = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device));
+ dm_hash_remove_binary(s->device_to_pvid, &device, sizeof(device));
+ dm_hash_remove(s->pvid_to_pvmeta, pvid);
+ vg_remove_if_missing(s, dm_hash_lookup(s->pvid_to_vgid, pvid));
+ unlock_pvid_to_pvmeta(s);
+
+ if (pvid_old)
+ dm_free(pvid_old);
+
+ if (pvmeta) {
+ dm_config_destroy(pvmeta);
+ return daemon_reply_simple("OK", NULL);
+ } else
+ return reply_unknown("PVID does not exist");
+}
+
+static response pv_clear_all(lvmetad_state *s, request r)
+{
+ DEBUGLOG(s, "pv_clear_all");
+
+ lock_pvid_to_pvmeta(s);
+ lock_vgid_to_metadata(s);
+ lock_pvid_to_vgid(s);
+
+ destroy_metadata_hashes(s);
+ create_metadata_hashes(s);
+
+ unlock_pvid_to_vgid(s);
+ unlock_vgid_to_metadata(s);
+ unlock_pvid_to_pvmeta(s);
+
+ return daemon_reply_simple("OK", NULL);
+}
+
+static response pv_found(lvmetad_state *s, request r)
+{
+ struct dm_config_node *metadata = dm_config_find_node(r.cft->root, "metadata");
+ const char *pvid = daemon_request_str(r, "pvmeta/id", NULL);
+ const char *vgname = daemon_request_str(r, "vgname", NULL);
+ const char *vgid = daemon_request_str(r, "metadata/id", NULL);
+ struct dm_config_node *pvmeta = dm_config_find_node(r.cft->root, "pvmeta");
+ uint64_t device;
+ struct dm_config_tree *cft, *pvmeta_old_dev = NULL, *pvmeta_old_pvid = NULL;
+ char *old;
+ const char *pvid_dup;
+ int complete = 0, orphan = 0;
+ int64_t seqno = -1, seqno_old = -1;
+
+ if (!pvid)
+ return reply_fail("need PV UUID");
+ if (!pvmeta)
+ return reply_fail("need PV metadata");
+
+ if (!dm_config_get_uint64(pvmeta, "pvmeta/device", &device))
+ return reply_fail("need PV device number");
+
+ lock_pvid_to_pvmeta(s);
+
+ if ((old = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device)))) {
+ pvmeta_old_dev = dm_hash_lookup(s->pvid_to_pvmeta, old);
+ dm_hash_remove(s->pvid_to_pvmeta, old);
+ }
+ pvmeta_old_pvid = dm_hash_lookup(s->pvid_to_pvmeta, pvid);
+
+ DEBUGLOG(s, "pv_found %s, vgid = %s, device = %" PRIu64 ", old = %s", pvid, vgid, device, old);
+
+ dm_free(old);
+
+ if (!(cft = dm_config_create()) ||
+ !(cft->root = dm_config_clone_node(cft, pvmeta, 0))) {
+ unlock_pvid_to_pvmeta(s);
+ return reply_fail("out of memory");
+ }
+
+ pvid_dup = dm_strdup(pvid);
+ if (!dm_hash_insert(s->pvid_to_pvmeta, pvid, cft) ||
+ !dm_hash_insert_binary(s->device_to_pvid, &device, sizeof(device), (void*)pvid_dup)) {
+ unlock_pvid_to_pvmeta(s);
+ return reply_fail("out of memory");
+ }
+ if (pvmeta_old_pvid)
+ dm_config_destroy(pvmeta_old_pvid);
+ if (pvmeta_old_dev && pvmeta_old_dev != pvmeta_old_pvid)
+ dm_config_destroy(pvmeta_old_dev);
+
+ unlock_pvid_to_pvmeta(s);
+
+ if (metadata) {
+ if (!vgid)
+ return reply_fail("need VG UUID");
+ DEBUGLOG(s, "obtained vgid = %s, vgname = %s", vgid, vgname);
+ if (!vgname)
+ return reply_fail("need VG name");
+ if (daemon_request_int(r, "metadata/seqno", -1) < 0)
+ return reply_fail("need VG seqno");
+
+ if (!update_metadata(s, vgname, vgid, metadata, &seqno_old))
+ return reply_fail("metadata update failed");
+ } else {
+ lock_pvid_to_vgid(s);
+ vgid = dm_hash_lookup(s->pvid_to_vgid, pvid);
+ unlock_pvid_to_vgid(s);
+ }
+
+ if (vgid) {
+ if ((cft = lock_vg(s, vgid))) {
+ complete = update_pv_status(s, cft, cft->root, 0);
+ seqno = dm_config_find_int(cft->root, "metadata/seqno", -1);
+ } else if (!strcmp(vgid, "#orphan"))
+ orphan = 1;
+ else {
+ unlock_vg(s, vgid);
+ return reply_fail("non-orphan VG without metadata encountered");
+ }
+ unlock_vg(s, vgid);
+ }
+
+ return daemon_reply_simple("OK",
+ "status = %s", orphan ? "orphan" :
+ (complete ? "complete" : "partial"),
+ "vgid = %s", vgid ? vgid : "#orphan",
+ "seqno_before = %"PRId64, seqno_old,
+ "seqno_after = %"PRId64, seqno,
+ NULL);
+}
+
+static response vg_update(lvmetad_state *s, request r)
+{
+ struct dm_config_node *metadata = dm_config_find_node(r.cft->root, "metadata");
+ const char *vgid = daemon_request_str(r, "metadata/id", NULL);
+ const char *vgname = daemon_request_str(r, "vgname", NULL);
+ if (metadata) {
+ if (!vgid)
+ return reply_fail("need VG UUID");
+ if (!vgname)
+ return reply_fail("need VG name");
+ if (daemon_request_int(r, "metadata/seqno", -1) < 0)
+ return reply_fail("need VG seqno");
+
+ /* TODO defer metadata update here; add a separate vg_commit
+ * call; if client does not commit, die */
+ if (!update_metadata(s, vgname, vgid, metadata, NULL))
+ return reply_fail("metadata update failed");
+ }
+ return daemon_reply_simple("OK", NULL);
+}
+
+static response vg_remove(lvmetad_state *s, request r)
+{
+ const char *vgid = daemon_request_str(r, "uuid", NULL);
+
+ if (!vgid)
+ return reply_fail("need VG UUID");
+
+ DEBUGLOG(s, "vg_remove: %s", vgid);
+
+ lock_pvid_to_vgid(s);
+ remove_metadata(s, vgid, 1);
+ unlock_pvid_to_vgid(s);
+
+ return daemon_reply_simple("OK", NULL);
+}
+
+static void _dump_cft(struct buffer *buf, struct dm_hash_table *ht, const char *key_addr)
+{
+ struct dm_hash_node *n = dm_hash_get_first(ht);
+ while (n) {
+ struct dm_config_tree *cft = dm_hash_get_data(ht, n);
+ const char *key_backup = cft->root->key;
+ cft->root->key = dm_config_find_str(cft->root, key_addr, "unknown");
+ dm_config_write_node(cft->root, buffer_line, buf);
+ cft->root->key = key_backup;
+ n = dm_hash_get_next(ht, n);
+ }
+}
+
+static void _dump_pairs(struct buffer *buf, struct dm_hash_table *ht, const char *name, int int_key)
+{
+ char *append;
+ struct dm_hash_node *n = dm_hash_get_first(ht);
+
+ buffer_append(buf, name);
+ buffer_append(buf, " {\n");
+
+ while (n) {
+ const char *key = dm_hash_get_key(ht, n),
+ *val = dm_hash_get_data(ht, n);
+ buffer_append(buf, " ");
+ if (int_key)
+ dm_asprintf(&append, "%d = \"%s\"", *(int*)key, val);
+ else
+ dm_asprintf(&append, "%s = \"%s\"", key, val);
+ if (append)
+ buffer_append(buf, append);
+ buffer_append(buf, "\n");
+ dm_free(append);
+ n = dm_hash_get_next(ht, n);
+ }
+ buffer_append(buf, "}\n");
+}
+
+static response dump(lvmetad_state *s)
+{
+ response res;
+ struct buffer *b = &res.buffer;
+
+ buffer_init(b);
+
+ /* Lock everything so that we get a consistent dump. */
+
+ lock_vgid_to_metadata(s);
+ lock_pvid_to_pvmeta(s);
+ lock_pvid_to_vgid(s);
+
+ buffer_append(b, "# VG METADATA\n\n");
+ _dump_cft(b, s->vgid_to_metadata, "metadata/id");
+
+ buffer_append(b, "\n# PV METADATA\n\n");
+ _dump_cft(b, s->pvid_to_pvmeta, "pvmeta/id");
+
+ buffer_append(b, "\n# VGID to VGNAME mapping\n\n");
+ _dump_pairs(b, s->vgid_to_vgname, "vgid_to_vgname", 0);
+
+ buffer_append(b, "\n# VGNAME to VGID mapping\n\n");
+ _dump_pairs(b, s->vgname_to_vgid, "vgname_to_vgid", 0);
+
+ buffer_append(b, "\n# PVID to VGID mapping\n\n");
+ _dump_pairs(b, s->pvid_to_vgid, "pvid_to_vgid", 0);
+
+ buffer_append(b, "\n# DEVICE to PVID mapping\n\n");
+ _dump_pairs(b, s->device_to_pvid, "device_to_pvid", 1);
+
+ unlock_pvid_to_vgid(s);
+ unlock_pvid_to_pvmeta(s);
+ unlock_vgid_to_metadata(s);
+
+ return res;
+}
+
+static response handler(daemon_state s, client_handle h, request r)
+{
+ lvmetad_state *state = s.private;
+ const char *rq = daemon_request_str(r, "request", "NONE");
+ const char *token = daemon_request_str(r, "token", "NONE");
+
+ pthread_mutex_lock(&state->token_lock);
+ if (!strcmp(rq, "token_update")) {
+ strncpy(state->token, token, 128);
+ state->token[127] = 0;
+ pthread_mutex_unlock(&state->token_lock);
+ return daemon_reply_simple("OK", NULL);
+ }
+
+ if (strcmp(token, state->token) && strcmp(rq, "dump")) {
+ pthread_mutex_unlock(&state->token_lock);
+ return daemon_reply_simple("token_mismatch",
+ "expected = %s", state->token,
+ "received = %s", token,
+ "reason = %s", "token mismatch", NULL);
+ }
+ pthread_mutex_unlock(&state->token_lock);
+
+ /*
+ * TODO Add a stats call, with transaction count/rate, time since last
+ * update &c.
+ */
+ if (!strcmp(rq, "pv_found"))
+ return pv_found(state, r);
+
+ if (!strcmp(rq, "pv_gone"))
+ return pv_gone(state, r);
+
+ if (!strcmp(rq, "pv_clear_all"))
+ return pv_clear_all(state, r);
+
+ if (!strcmp(rq, "pv_lookup"))
+ return pv_lookup(state, r);
+
+ if (!strcmp(rq, "vg_update"))
+ return vg_update(state, r);
+
+ if (!strcmp(rq, "vg_remove"))
+ return vg_remove(state, r);
+
+ if (!strcmp(rq, "vg_lookup"))
+ return vg_lookup(state, r);
+
+ if (!strcmp(rq, "pv_list"))
+ return pv_list(state, r);
+
+ if (!strcmp(rq, "vg_list"))
+ return vg_list(state, r);
+
+ if (!strcmp(rq, "dump"))
+ return dump(state);
+
+ return reply_fail("request not implemented");
+}
+
+static int init(daemon_state *s)
+{
+ pthread_mutexattr_t rec;
+ lvmetad_state *ls = s->private;
+ ls->log = s->log;
+
+ pthread_mutexattr_init(&rec);
+ pthread_mutexattr_settype(&rec, PTHREAD_MUTEX_RECURSIVE_NP);
+ pthread_mutex_init(&ls->lock.pvid_to_pvmeta, &rec);
+ pthread_mutex_init(&ls->lock.vgid_to_metadata, &rec);
+ pthread_mutex_init(&ls->lock.pvid_to_vgid, NULL);
+ pthread_mutex_init(&ls->lock.vg_lock_map, NULL);
+ pthread_mutex_init(&ls->token_lock, NULL);
+ create_metadata_hashes(ls);
+
+ ls->lock.vg = dm_hash_create(32);
+ ls->token[0] = 0;
+
+ /* Set up stderr logging depending on the -l option. */
+ if (!daemon_log_parse(ls->log, DAEMON_LOG_OUTLET_STDERR, ls->log_config, 1))
+ return 0;
+
+ DEBUGLOG(s, "initialised state: vgid_to_metadata = %p", ls->vgid_to_metadata);
+ if (!ls->pvid_to_vgid || !ls->vgid_to_metadata)
+ return 0;
+
+ /* if (ls->initial_registrations)
+ _process_initial_registrations(ds->initial_registrations); */
+
+ return 1;
+}
+
+static int fini(daemon_state *s)
+{
+ lvmetad_state *ls = s->private;
+ struct dm_hash_node *n;
+
+ DEBUGLOG(s, "fini");
+
+ destroy_metadata_hashes(ls);
+
+ /* Destroy the lock hashes now. */
+ n = dm_hash_get_first(ls->lock.vg);
+ while (n) {
+ pthread_mutex_destroy(dm_hash_get_data(ls->lock.vg, n));
+ free(dm_hash_get_data(ls->lock.vg, n));
+ n = dm_hash_get_next(ls->lock.vg, n);
+ }
+
+ dm_hash_destroy(ls->lock.vg);
+ return 1;
+}
+
+static void usage(char *prog, FILE *file)
+{
+ fprintf(file, "Usage:\n"
+ "%s [-V] [-h] [-f] [-l {all|wire|debug}] [-s path]\n\n"
+ " -V Show version of lvmetad\n"
+ " -h Show this help information\n"
+ " -f Don't fork, run in the foreground\n"
+ " -l Logging message level (-l {all|wire|debug})\n"
+ " -s Set path to the socket to listen on\n\n", prog);
+}
+
+int main(int argc, char *argv[])
+{
+ signed char opt;
+ daemon_state s = { .private = NULL };
+ lvmetad_state ls;
+ int _socket_override = 1;
+
+ s.name = "lvmetad";
+ s.private = &ls;
+ s.daemon_init = init;
+ s.daemon_fini = fini;
+ s.handler = handler;
+ s.socket_path = getenv("LVM_LVMETAD_SOCKET");
+ if (!s.socket_path) {
+ _socket_override = 0;
+ s.socket_path = DEFAULT_RUN_DIR "/lvmetad.socket";
+ }
+ s.pidfile = LVMETAD_PIDFILE;
+ s.protocol = "lvmetad";
+ s.protocol_version = 1;
+ ls.log_config = "";
+
+ // use getopt_long
+ while ((opt = getopt(argc, argv, "?fhVl:s:")) != EOF) {
+ switch (opt) {
+ case 'h':
+ usage(argv[0], stdout);
+ exit(0);
+ case '?':
+ usage(argv[0], stderr);
+ exit(0);
+ case 'f':
+ s.foreground = 1;
+ break;
+ case 'l':
+ ls.log_config = optarg;
+ break;
+ case 's': // --socket
+ s.socket_path = optarg;
+ _socket_override = 1;
+ break;
+ case 'V':
+ printf("lvmetad version: " LVM_VERSION "\n");
+ exit(1);
+ }
+ }
+
+ if (s.foreground) {
+ if (!_socket_override) {
+ fprintf(stderr, "A socket path (-s) is required in foreground mode.");
+ exit(2);
+ } else {
+ s.pidfile = NULL;
+ }
+ }
+
+ daemon_start(s);
+ return 0;
+}
diff --git a/daemons/lvmetad/test.sh b/daemons/lvmetad/test.sh
new file mode 100755
index 0000000..f937562
--- /dev/null
+++ b/daemons/lvmetad/test.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+export LD_LIBRARY_PATH="$1"
+
+test -n "$2" && {
+ rm -f /var/run/lvmetad.{socket,pid}
+ chmod +rx lvmetad
+ valgrind ./lvmetad -f &
+ PID=$!
+ sleep 1
+ ./testclient
+ kill $PID
+ exit 0
+}
+
+sudo ./test.sh "$1" .
diff --git a/daemons/lvmetad/testclient.c b/daemons/lvmetad/testclient.c
new file mode 100644
index 0000000..c4cf7c5
--- /dev/null
+++ b/daemons/lvmetad/testclient.c
@@ -0,0 +1,127 @@
+#include "lvmetad-client.h"
+#include "label.h"
+#include "lvmcache.h"
+#include "metadata.h"
+
+const char *uuid1 = "abcd-efgh";
+const char *uuid2 = "bbcd-efgh";
+const char *vgid = "yada-yada";
+const char *uuid3 = "cbcd-efgh";
+
+const char *metadata2 = "{\n"
+ "id = \"yada-yada\"\n"
+ "seqno = 15\n"
+ "status = [\"READ\", \"WRITE\"]\n"
+ "flags = []\n"
+ "extent_size = 8192\n"
+ "physical_volumes {\n"
+ " pv0 {\n"
+ " id = \"abcd-efgh\"\n"
+ " }\n"
+ " pv1 {\n"
+ " id = \"bbcd-efgh\"\n"
+ " }\n"
+ " pv2 {\n"
+ " id = \"cbcd-efgh\"\n"
+ " }\n"
+ "}\n"
+ "}\n";
+
+void _handle_reply(daemon_reply reply) {
+ const char *repl = daemon_reply_str(reply, "response", NULL);
+ const char *status = daemon_reply_str(reply, "status", NULL);
+ const char *vgid = daemon_reply_str(reply, "vgid", NULL);
+
+ fprintf(stderr, "[C] REPLY: %s\n", repl);
+ if (!strcmp(repl, "failed"))
+ fprintf(stderr, "[C] REASON: %s\n", daemon_reply_str(reply, "reason", "unknown"));
+ if (vgid)
+ fprintf(stderr, "[C] VGID: %s\n", vgid);
+ if (status)
+ fprintf(stderr, "[C] STATUS: %s\n", status);
+ daemon_reply_destroy(reply);
+}
+
+void _pv_add(daemon_handle h, const char *uuid, const char *metadata)
+{
+ daemon_reply reply = daemon_send_simple(h, "pv_add", "uuid = %s", uuid,
+ "metadata = %b", metadata,
+ NULL);
+ _handle_reply(reply);
+}
+
+int scan(daemon_handle h, char *fn) {
+ struct device *dev = dev_cache_get(fn, NULL);
+
+ struct label *label;
+ if (!label_read(dev, &label, 0)) {
+ fprintf(stderr, "[C] no label found on %s\n", fn);
+ return;
+ }
+
+ char uuid[64];
+ id_write_format(dev->pvid, uuid, 64);
+ fprintf(stderr, "[C] found PV: %s\n", uuid);
+ struct lvmcache_info *info = (struct lvmcache_info *) label->info;
+ struct physical_volume pv = { 0, };
+
+ if (!(info->fmt->ops->pv_read(info->fmt, dev_name(dev), &pv, 0))) {
+ fprintf(stderr, "[C] Failed to read PV %s", dev_name(dev));
+ return;
+ }
+
+ struct format_instance_ctx fic;
+ struct format_instance *fid = info->fmt->ops->create_instance(info->fmt, &fic);
+ struct metadata_area *mda;
+ struct volume_group *vg = NULL;
+ dm_list_iterate_items(mda, &info->mdas) {
+ struct volume_group *this = mda->ops->vg_read(fid, "", mda);
+ if (this && !vg || this->seqno > vg->seqno)
+ vg = this;
+ }
+ if (vg) {
+ char *buf = NULL;
+ /* TODO. This is not entirely correct, since export_vg_to_buffer
+ * adds trailing garbage to the buffer. We may need to use
+ * export_vg_to_config_tree and format the buffer ourselves. It
+ * does, however, work for now, since the garbage is well
+ * formatted and has no conflicting keys with the rest of the
+ * request. */
+ export_vg_to_buffer(vg, &buf);
+ daemon_reply reply =
+ daemon_send_simple(h, "pv_add", "uuid = %s", uuid,
+ "metadata = %b", strchr(buf, '{'),
+ NULL);
+ _handle_reply(reply);
+ }
+}
+
+void _dump_vg(daemon_handle h, const char *uuid)
+{
+ daemon_reply reply = daemon_send_simple(h, "vg_by_uuid", "uuid = %s", uuid, NULL);
+ fprintf(stderr, "[C] reply buffer: %s\n", reply.buffer);
+ daemon_reply_destroy(reply);
+}
+
+int main(int argc, char **argv) {
+ daemon_handle h = lvmetad_open();
+
+ if (argc > 1) {
+ int i;
+ struct cmd_context *cmd = create_toolcontext(0, NULL, 0, 0);
+ for (i = 1; i < argc; ++i) {
+ const char *uuid = NULL;
+ scan(h, argv[i]);
+ }
+ destroy_toolcontext(cmd);
+ return 0;
+ }
+
+ _pv_add(h, uuid1, NULL);
+ _pv_add(h, uuid2, metadata2);
+ _dump_vg(h, vgid);
+ _pv_add(h, uuid3, NULL);
+
+ daemon_close(h);
+ return 0;
+}