diff options
author | Anas Nashif <anas.nashif@intel.com> | 2013-03-05 01:47:43 -0800 |
---|---|---|
committer | Anas Nashif <anas.nashif@intel.com> | 2013-03-05 01:47:43 -0800 |
commit | 44a3c2255bc480c82f34db156553a595606d8a0b (patch) | |
tree | 5e6df96a6c6e40207cb3a711860e16b543918c0d /daemons | |
parent | 8bd28eea831fd5215c12e6fcecc8e9a772398ed9 (diff) | |
download | device-mapper-44a3c2255bc480c82f34db156553a595606d8a0b.tar.gz device-mapper-44a3c2255bc480c82f34db156553a595606d8a0b.tar.bz2 device-mapper-44a3c2255bc480c82f34db156553a595606d8a0b.zip |
Imported Upstream version 2.02.98upstream/2.02.98upstream/1.02.77
Diffstat (limited to 'daemons')
49 files changed, 3662 insertions, 2419 deletions
diff --git a/daemons/Makefile.in b/daemons/Makefile.in index ce400d7..c83e0da 100644 --- a/daemons/Makefile.in +++ b/daemons/Makefile.in @@ -1,5 +1,5 @@ # -# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This file is part of LVM2. # @@ -15,10 +15,14 @@ srcdir = @srcdir@ top_srcdir = @top_srcdir@ top_builddir = @top_builddir@ -.PHONY: dmeventd clvmd cmirrord +ifeq ("@BUILD_LVMETAD@", "yes") + SUBDIRS += lvmetad +endif + +.PHONY: dmeventd clvmd cmirrord lvmetad ifneq ("@CLVMD@", "none") - SUBDIRS = clvmd + SUBDIRS += clvmd endif ifeq ("@BUILD_CMIRRORD@", "yes") @@ -33,7 +37,7 @@ endif endif ifeq ($(MAKECMDGOALS),distclean) - SUBDIRS = clvmd cmirrord dmeventd + SUBDIRS = clvmd cmirrord dmeventd lvmetad endif include $(top_builddir)/make.tmpl diff --git a/daemons/clvmd/Makefile.in b/daemons/clvmd/Makefile.in index ee19e6c..9ca11ba 100644 --- a/daemons/clvmd/Makefile.in +++ b/daemons/clvmd/Makefile.in @@ -15,18 +15,16 @@ srcdir = @srcdir@ top_srcdir = @top_srcdir@ top_builddir = @top_builddir@ -CCS_LIBS = @CCS_LIBS@ -CCS_CFLAGS = @CCS_CFLAGS@ CMAN_LIBS = @CMAN_LIBS@ CMAN_CFLAGS = @CMAN_CFLAGS@ +CMAP_LIBS = @CMAP_LIBS@ +CMAP_CFLAGS = @CMAP_CFLAGS@ CONFDB_LIBS = @CONFDB_LIBS@ CONFDB_CFLAGS = @CONFDB_CFLAGS@ CPG_LIBS = @CPG_LIBS@ CPG_CFLAGS = @CPG_CFLAGS@ DLM_LIBS = @DLM_LIBS@ DLM_CFLAGS = @DLM_CFLAGS@ -GULM_LIBS = @GULM_LIBS@ -GULM_CFLAGS = @GULM_CFLAGS@ QUORUM_LIBS = @QUORUM_LIBS@ QUORUM_CFLAGS = @QUORUM_CFLAGS@ SALCK_LIBS = @SALCK_LIBS@ @@ -42,13 +40,6 @@ ifeq ("@DEBUG@", "yes") DEFS += -DDEBUG endif -ifneq (,$(findstring gulm,, "@CLVMD@,")) - SOURCES += clvmd-gulm.c tcp-comms.c - LMLIBS += $(CCS_LIBS) $(GULM_LIBS) - CFLAGS += $(CCS_CFLAGS) $(GULM_CFLAGS) - DEFS += -DUSE_GULM -endif - ifneq (,$(findstring cman,, "@CLVMD@,")) SOURCES += clvmd-cman.c LMLIBS += $(CMAN_LIBS) $(CONFDB_LIBS) $(DLM_LIBS) @@ -65,8 +56,8 @@ endif ifneq (,$(findstring corosync,, "@CLVMD@,")) SOURCES += clvmd-corosync.c - LMLIBS += $(CONFDB_LIBS) $(CPG_LIBS) $(DLM_LIBS) $(QUORUM_LIBS) - CFLAGS += $(CONFDB_CFLAGS) $(CPG_CFLAGS) $(DLM_CFLAGS) $(QUORUM_CFLAGS) + LMLIBS += $(CMAP_LIBS) $(CONFDB_LIBS) $(CPG_LIBS) $(DLM_LIBS) $(QUORUM_LIBS) + CFLAGS += $(CMAP_CFLAGS) $(CONFDB_CFLAGS) $(CPG_CFLAGS) $(DLM_CFLAGS) $(QUORUM_CFLAGS) DEFS += -DUSE_COROSYNC endif @@ -76,7 +67,6 @@ ifneq (,$(findstring singlenode,, "@CLVMD@,")) endif ifeq ($(MAKECMDGOALS),distclean) - SOURCES += clvmd-gulm.c tcp-comms.c SOURCES += clvmd-cman.c SOURCES += clvmd-openais.c SOURCES += clvmd-corosync.c diff --git a/daemons/clvmd/clvm.h b/daemons/clvmd/clvm.h index c9ea10c..8e24f15 100644 --- a/daemons/clvmd/clvm.h +++ b/daemons/clvmd/clvm.h @@ -30,22 +30,23 @@ struct clvm_header { uint16_t xid; /* Transaction ID */ uint32_t clientid; /* Only used in Daemon->Daemon comms */ int32_t status; /* For replies, whether request succeeded */ - uint32_t arglen; /* Length of argument below. - If >1500 then it will be passed + uint32_t arglen; /* Length of argument below. + If >1500 then it will be passed around the cluster in the system LV */ char node[1]; /* Actually a NUL-terminated string, node name. - If this is empty then the command is - forwarded to all cluster nodes unless - FLAG_LOCAL is also set. */ - char args[1]; /* Arguments for the command follow the + If this is empty then the command is + forwarded to all cluster nodes unless + FLAG_LOCAL or FLAG_REMOTE is also set. */ + char args[1]; /* Arguments for the command follow the node name, This member is only valid if the node name is empty */ } __attribute__ ((packed)); /* Flags */ -#define CLVMD_FLAG_LOCAL 1 /* Only do this on the local node */ -#define CLVMD_FLAG_SYSTEMLV 2 /* Data in system LV under my node name */ -#define CLVMD_FLAG_NODEERRS 4 /* Reply has errors in node-specific portion */ +#define CLVMD_FLAG_LOCAL 1 /* Only do this on the local node */ +#define CLVMD_FLAG_SYSTEMLV 2 /* Data in system LV under my node name */ +#define CLVMD_FLAG_NODEERRS 4 /* Reply has errors in node-specific portion */ +#define CLVMD_FLAG_REMOTE 8 /* Do this on all nodes except for the local node */ /* Name of the local socket to communicate between lvm and clvmd */ static const char CLVMD_SOCKNAME[]= DEFAULT_RUN_DIR "/clvmd.sock"; @@ -53,7 +54,7 @@ static const char CLVMD_SOCKNAME[]= DEFAULT_RUN_DIR "/clvmd.sock"; /* Internal commands & replies */ #define CLVMD_CMD_REPLY 1 #define CLVMD_CMD_VERSION 2 /* Send version around cluster when we start */ -#define CLVMD_CMD_GOAWAY 3 /* Die if received this - we are running +#define CLVMD_CMD_GOAWAY 3 /* Die if received this - we are running an incompatible version */ #define CLVMD_CMD_TEST 4 /* Just for mucking about */ @@ -71,4 +72,11 @@ static const char CLVMD_SOCKNAME[]= DEFAULT_RUN_DIR "/clvmd.sock"; #define CLVMD_CMD_SET_DEBUG 42 #define CLVMD_CMD_VG_BACKUP 43 #define CLVMD_CMD_RESTART 44 +#define CLVMD_CMD_SYNC_NAMES 45 + +/* Used internally by some callers, but not part of the protocol.*/ +#define NODE_ALL "*" +#define NODE_LOCAL "." +#define NODE_REMOTE "^" + #endif diff --git a/daemons/clvmd/clvmd-cman.c b/daemons/clvmd/clvmd-cman.c index 52da2ac..7e76dc4 100644 --- a/daemons/clvmd/clvmd-cman.c +++ b/daemons/clvmd/clvmd-cman.c @@ -89,16 +89,17 @@ static int _init_cluster(void) DEBUGLOG("CMAN initialisation complete\n"); /* Create a lockspace for LV & VG locks to live in */ - lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600); + lockspace = dlm_open_lockspace(LOCKSPACE_NAME); if (!lockspace) { - if (errno == EEXIST) { - lockspace = dlm_open_lockspace(LOCKSPACE_NAME); - } + lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600); if (!lockspace) { - syslog(LOG_ERR, "Unable to create lockspace for CLVM: %m"); + syslog(LOG_ERR, "Unable to create DLM lockspace for CLVM: %m"); return -1; } - } + DEBUGLOG("Created DLM lockspace for CLVMD.\n"); + } else + DEBUGLOG("Opened existing DLM lockspace for CLVMD.\n"); + dlm_ls_pthread_init(lockspace); DEBUGLOG("DLM initialisation complete\n"); return 0; @@ -478,6 +479,7 @@ static int _get_cluster_name(char *buf, int buflen) } static struct cluster_ops _cluster_cman_ops = { + .name = "cman", .cluster_init_completed = _cluster_init_completed, .cluster_send_message = _cluster_send_message, .name_from_csid = _name_from_csid, diff --git a/daemons/clvmd/clvmd-command.c b/daemons/clvmd/clvmd-command.c index 49f1197..c5cd461 100644 --- a/daemons/clvmd/clvmd-command.c +++ b/daemons/clvmd/clvmd-command.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -51,19 +51,16 @@ */ #include "clvmd-common.h" - -#include <pthread.h> - #include "clvmd-comms.h" #include "clvm.h" #include "clvmd.h" +#include "lvm-globals.h" #include "lvm-functions.h" #include "locking.h" #include <sys/utsname.h> -extern debug_t debug; extern struct cluster_ops *clops; static int restart_clvmd(void); @@ -81,6 +78,9 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen, unsigned char lock_cmd; unsigned char lock_flags; + /* Reset test mode before we start */ + init_test(0); + /* Do the command */ switch (msg->cmd) { /* Just a test message */ @@ -96,7 +96,9 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen, *buf = new_buf; } if (*buf) { - uname(&nodeinfo); + if (uname(&nodeinfo)) + memset(&nodeinfo, 0, sizeof(nodeinfo)); + *retlen = 1 + dm_snprintf(*buf, buflen, "TEST from %s: %s v%s", nodeinfo.nodename, args, @@ -110,14 +112,18 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen, lockname = &args[2]; /* Check to see if the VG is in use by LVM1 */ status = do_check_lvm1(lockname); + if (lock_flags & LCK_TEST_MODE) + init_test(1); do_lock_vg(lock_cmd, lock_flags, lockname); break; case CLVMD_CMD_LOCK_LV: /* This is the biggie */ - lock_cmd = args[0] & (LCK_NONBLOCK | LCK_HOLD | LCK_SCOPE_MASK | LCK_TYPE_MASK); + lock_cmd = args[0]; lock_flags = args[1]; lockname = &args[2]; + if (lock_flags & LCK_TEST_MODE) + init_test(1); status = do_lock_lv(lock_cmd, lock_flags, lockname); /* Replace EIO with something less scary */ if (status == EIO) { @@ -139,12 +145,16 @@ int do_command(struct local_client *client, struct clvm_header *msg, int msglen, do_refresh_cache(); break; + case CLVMD_CMD_SYNC_NAMES: + lvm_do_fs_unlock(); + break; + case CLVMD_CMD_SET_DEBUG: - debug = args[0]; + clvmd_set_debug((debug_t) args[0]); break; case CLVMD_CMD_RESTART: - restart_clvmd(); + status = restart_clvmd(); break; case CLVMD_CMD_GET_CLUSTERNAME: @@ -182,7 +192,6 @@ static int lock_vg(struct local_client *client) struct clvm_header *header = (struct clvm_header *) client->bits.localsock.cmd; unsigned char lock_cmd; - unsigned char lock_flags; int lock_mode; char *args = header->node + strlen(header->node) + 1; int lkid; @@ -204,7 +213,7 @@ static int lock_vg(struct local_client *client) lock_cmd = args[0] & (LCK_NONBLOCK | LCK_HOLD | LCK_SCOPE_MASK | LCK_TYPE_MASK); lock_mode = ((int)lock_cmd & LCK_TYPE_MASK); - lock_flags = args[1]; + /* lock_flags = args[1]; */ lockname = &args[2]; DEBUGLOG("doing PRE command LOCK_VG '%s' at %x (client=%p)\n", lockname, lock_cmd, client); @@ -228,7 +237,8 @@ static int lock_vg(struct local_client *client) if (status) status = errno; else - dm_hash_insert(lock_hash, lockname, (void *)(long)lkid); + if (!dm_hash_insert(lock_hash, lockname, (void *)(long)lkid)) + return ENOMEM; } return status; @@ -245,10 +255,11 @@ int do_pre_command(struct local_client *client) unsigned char lock_cmd; unsigned char lock_flags; char *args = header->node + strlen(header->node) + 1; - int lockid; + int lockid = 0; int status = 0; char *lockname; + init_test(0); switch (header->cmd) { case CLVMD_CMD_TEST: status = sync_lock("CLVMD_TEST", LCK_EXCL, 0, &lockid); @@ -267,6 +278,8 @@ int do_pre_command(struct local_client *client) lock_cmd = args[0]; lock_flags = args[1]; lockname = &args[2]; + if (lock_flags & LCK_TEST_MODE) + init_test(1); status = pre_lock_lv(lock_cmd, lock_flags, lockname); break; @@ -274,6 +287,7 @@ int do_pre_command(struct local_client *client) case CLVMD_CMD_GET_CLUSTERNAME: case CLVMD_CMD_SET_DEBUG: case CLVMD_CMD_VG_BACKUP: + case CLVMD_CMD_SYNC_NAMES: case CLVMD_CMD_LOCK_QUERY: case CLVMD_CMD_RESTART: break; @@ -297,6 +311,7 @@ int do_post_command(struct local_client *client) char *args = header->node + strlen(header->node) + 1; char *lockname; + init_test(0); switch (header->cmd) { case CLVMD_CMD_TEST: status = @@ -304,18 +319,18 @@ int do_post_command(struct local_client *client) client->bits.localsock.private = 0; break; - case CLVMD_CMD_LOCK_VG: - case CLVMD_CMD_VG_BACKUP: - case CLVMD_CMD_LOCK_QUERY: - /* Nothing to do here */ - break; - case CLVMD_CMD_LOCK_LV: lock_cmd = args[0]; lock_flags = args[1]; lockname = &args[2]; + if (lock_flags & LCK_TEST_MODE) + init_test(1); status = post_lock_lv(lock_cmd, lock_flags, lockname); break; + + default: + /* Nothing to do here */ + break; } return status; } @@ -324,90 +339,93 @@ int do_post_command(struct local_client *client) /* Called when the client is about to be deleted */ void cmd_client_cleanup(struct local_client *client) { - if (client->bits.localsock.private) { - struct dm_hash_node *v; - struct dm_hash_table *lock_hash = - (struct dm_hash_table *)client->bits.localsock.private; + struct dm_hash_table *lock_hash; + int lkid; + char *lockname; - dm_hash_iterate(v, lock_hash) { - int lkid = (int)(long)dm_hash_get_data(lock_hash, v); - char *lockname = dm_hash_get_key(lock_hash, v); + if (!client->bits.localsock.private) + return; + lock_hash = (struct dm_hash_table *)client->bits.localsock.private; + + dm_hash_iterate(v, lock_hash) { + lkid = (int)(long)dm_hash_get_data(lock_hash, v); + lockname = dm_hash_get_key(lock_hash, v); DEBUGLOG("cleanup: Unlocking lock %s %x\n", lockname, lkid); - sync_unlock(lockname, lkid); + (void) sync_unlock(lockname, lkid); } dm_hash_destroy(lock_hash); client->bits.localsock.private = 0; - } } static int restart_clvmd(void) { - char **argv = NULL; - char *debug_arg = NULL, *lv_name; - int i, argc = 0, max_locks = 0; + const char **argv; + char *lv_name; + int argc = 0, max_locks = 0; struct dm_hash_node *hn = NULL; + char debug_arg[16]; + const char *clvmd = getenv("LVM_CLVMD_BINARY") ? : CLVMD_PATH; DEBUGLOG("clvmd restart requested\n"); /* Count exclusively-open LVs */ - hn = NULL; do { hn = get_next_excl_lock(hn, &lv_name); - if (lv_name) + if (lv_name) { max_locks++; - } while (hn && *lv_name); + if (!*lv_name) + break; /* FIXME: Is this error ? */ + } + } while (hn); /* clvmd + locks (-E uuid) + debug (-d X) + NULL */ - argv = malloc((max_locks * 2 + 4) * sizeof(*argv)); - if (!argv) + if (!(argv = malloc((max_locks * 2 + 5) * sizeof(*argv)))) goto_out; /* * Build the command-line */ - argv[argc++] = strdup("clvmd"); - if (!argv[0]) - goto_out; + argv[argc++] = "clvmd"; /* Propogate debug options */ - if (debug) { - if (!(debug_arg = malloc(16)) || - dm_snprintf(debug_arg, 16, "-d%d", (int)debug) < 0) + if (clvmd_get_debug()) { + if (dm_snprintf(debug_arg, sizeof(debug_arg), "-d%u", clvmd_get_debug()) < 0) goto_out; argv[argc++] = debug_arg; } + argv[argc++] = "-I"; + argv[argc++] = clops->name; + /* Now add the exclusively-open LVs */ + hn = NULL; do { hn = get_next_excl_lock(hn, &lv_name); if (lv_name) { - argv[argc] = strdup("-E"); - if (!argv[argc++]) - goto_out; - argv[argc] = strdup(lv_name); - if (!argv[argc++]) - goto_out; - + if (!*lv_name) + break; /* FIXME: Is this error ? */ + argv[argc++] = "-E"; + argv[argc++] = lv_name; DEBUGLOG("excl lock: %s\n", lv_name); - hn = get_next_excl_lock(hn, &lv_name); } - } while (hn && *lv_name); - argv[argc++] = NULL; + } while (hn); + argv[argc] = NULL; /* Exec new clvmd */ + DEBUGLOG("--- Restarting %s ---\n", clvmd); + for (argc = 1; argv[argc]; argc++) DEBUGLOG("--- %d: %s\n", argc, argv[argc]); + /* NOTE: This will fail when downgrading! */ - execve(CLVMD_PATH, argv, NULL); + execvp(clvmd, (char **)argv); out: /* We failed */ DEBUGLOG("Restart of clvmd failed.\n"); - for (i = 0; i < argc && argv[i]; i++) - free(argv[i]); free(argv); - return 0; + return EIO; } diff --git a/daemons/clvmd/clvmd-comms.h b/daemons/clvmd/clvmd-comms.h index fbcfe8b..7207334 100644 --- a/daemons/clvmd/clvmd-comms.h +++ b/daemons/clvmd/clvmd-comms.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -23,6 +23,7 @@ struct local_client; struct cluster_ops { + const char *name; void (*cluster_init_completed) (void); int (*cluster_send_message) (const void *buf, int msglen, @@ -54,13 +55,6 @@ struct cluster_ops { }; -#ifdef USE_GULM -# include "tcp-comms.h" -struct cluster_ops *init_gulm_cluster(void); -#define MAX_CSID_LEN GULM_MAX_CSID_LEN -#define MAX_CLUSTER_MEMBER_NAME_LEN GULM_MAX_CLUSTER_MEMBER_NAME_LEN -#endif - #ifdef USE_CMAN # include <netinet/in.h> # include "libcman.h" diff --git a/daemons/clvmd/clvmd-corosync.c b/daemons/clvmd/clvmd-corosync.c index cfe7150..d85ec1e 100644 --- a/daemons/clvmd/clvmd-corosync.c +++ b/daemons/clvmd/clvmd-corosync.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009 Red Hat, Inc. All rights reserved. + * Copyright (C) 2009-2012 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -30,7 +30,15 @@ #include <corosync/cpg.h> #include <corosync/quorum.h> -#include <corosync/confdb.h> + +#ifdef HAVE_COROSYNC_CONFDB_H +# include <corosync/confdb.h> +#elif defined HAVE_COROSYNC_CMAP_H +# include <corosync/cmap.h> +#else +# error "Either HAVE_COROSYNC_CONFDB_H or HAVE_COROSYNC_CMAP_H must be defined." +#endif + #include <libdlm.h> #include <syslog.h> @@ -274,6 +282,10 @@ static int _init_cluster(void) { cs_error_t err; +#ifdef QUORUM_SET /* corosync/quorum.h */ + uint32_t quorum_type; +#endif + node_hash = dm_hash_create(100); err = cpg_initialize(&cpg_handle, @@ -285,8 +297,21 @@ static int _init_cluster(void) return cs_to_errno(err); } +#ifdef QUORUM_SET + err = quorum_initialize(&quorum_handle, + &quorum_callbacks, + &quorum_type); + + if (quorum_type != QUORUM_SET) { + syslog(LOG_ERR, "Corosync quorum service is not configured"); + DEBUGLOG("Corosync quorum service is not configured"); + return EINVAL; + } +#else err = quorum_initialize(&quorum_handle, &quorum_callbacks); +#endif + if (err != CS_OK) { syslog(LOG_ERR, "Cannot initialise Corosync quorum service: %d", err); @@ -294,19 +319,18 @@ static int _init_cluster(void) return cs_to_errno(err); } - /* Create a lockspace for LV & VG locks to live in */ - lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600); + lockspace = dlm_open_lockspace(LOCKSPACE_NAME); if (!lockspace) { - if (errno == EEXIST) { - lockspace = dlm_open_lockspace(LOCKSPACE_NAME); - } + lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600); if (!lockspace) { - syslog(LOG_ERR, "Unable to create lockspace for CLVM: %m"); - quorum_finalize(quorum_handle); + syslog(LOG_ERR, "Unable to create DLM lockspace for CLVM: %m"); return -1; } - } + DEBUGLOG("Created DLM lockspace for CLVMD.\n"); + } else + DEBUGLOG("Opened existing DLM lockspace for CLVMD.\n"); + dlm_ls_pthread_init(lockspace); DEBUGLOG("DLM initialisation complete\n"); @@ -552,6 +576,7 @@ static int _cluster_send_message(const void *buf, int msglen, const char *csid, return cs_to_errno(err); } +#ifdef HAVE_COROSYNC_CONFDB_H /* * We are not necessarily connected to a Red Hat Cluster system, * but if we are, this returns the cluster name from cluster.conf. @@ -598,7 +623,40 @@ out: return 0; } +#elif defined HAVE_COROSYNC_CMAP_H + +static int _get_cluster_name(char *buf, int buflen) +{ + cmap_handle_t cmap_handle = 0; + int result; + char *name = NULL; + + /* This is a default in case everything else fails */ + strncpy(buf, "Corosync", buflen); + + /* Look for a cluster name in cmap */ + result = cmap_initialize(&cmap_handle); + if (result != CS_OK) + return 0; + + result = cmap_get_string(cmap_handle, "totem.cluster_name", &name); + if (result != CS_OK) + goto out; + + memset(buf, 0, buflen); + strncpy(buf, name, buflen - 1); + +out: + if (name) + free(name); + cmap_finalize(cmap_handle); + return 0; +} + +#endif + static struct cluster_ops _cluster_corosync_ops = { + .name = "corosync", .cluster_init_completed = NULL, .cluster_send_message = _cluster_send_message, .name_from_csid = _name_from_csid, diff --git a/daemons/clvmd/clvmd-gulm.c b/daemons/clvmd/clvmd-gulm.c deleted file mode 100644 index 3561004..0000000 --- a/daemons/clvmd/clvmd-gulm.c +++ /dev/null @@ -1,1010 +0,0 @@ -/* - * Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU Lesser General Public License v.2.1. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * This provides the interface between clvmd and gulm as the cluster - * and lock manager. - * - * It also provides the "liblm" functions too as it's hard (and pointless) - * to seperate them out when using gulm. - * - * What it does /not/ provide is the communications between clvmd daemons - * on the cluster nodes. That is done in tcp-comms.c - */ - -#include "clvmd-common.h" - -#include <pthread.h> -#include <sys/utsname.h> -#include <sys/ioctl.h> -#include <sys/socket.h> -#include <sys/file.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <stdint.h> -#include <signal.h> -#include <fcntl.h> -#include <stddef.h> -#include <stdint.h> -#include <utmpx.h> -#include <syslog.h> -#include <assert.h> -#include <ccs.h> -#include <libgulm.h> - -#include "locking.h" -#include "clvm.h" -#include "clvmd-comms.h" -#include "lvm-functions.h" -#include "clvmd.h" -#include "clvmd-gulm.h" - -/* Hash list of nodes in the cluster */ -static struct dm_hash_table *node_hash; - -/* hash list of outstanding lock requests */ -static struct dm_hash_table *lock_hash; - -/* Copy of the current quorate state */ -static uint8_t gulm_quorate = 0; -static enum {INIT_NOTDONE, INIT_DONE, INIT_WAITQUORATE} init_state = INIT_NOTDONE; - -/* Number of active nodes */ -static int num_nodes; - -static char *cluster_name; -static int in_shutdown = 0; - -static pthread_mutex_t lock_start_mutex; -static volatile int lock_start_flag; - -struct node_info -{ - enum {NODE_UNKNOWN, NODE_DOWN, NODE_UP, NODE_CLVMD} state; - char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN]; -}; - -struct lock_wait -{ - pthread_cond_t cond; - pthread_mutex_t mutex; - int status; -}; - -/* Forward */ -static int read_from_core_sock(struct local_client *client, char *buf, int len, const char *csid, - struct local_client **new_client); -static int read_from_lock_sock(struct local_client *client, char *buf, int len, const char *csid, - struct local_client **new_client); -static int get_all_cluster_nodes(void); -static int _csid_from_name(char *csid, const char *name); -static void _cluster_closedown(void); - -/* In tcp-comms.c */ -extern struct dm_hash_table *sock_hash; - -static int add_internal_client(int fd, fd_callback_t callback) -{ - struct local_client *client; - - DEBUGLOG("Add_internal_client, fd = %d\n", fd); - - /* Add a GULM file descriptor it to the main loop */ - client = malloc(sizeof(struct local_client)); - if (!client) - { - DEBUGLOG("malloc failed\n"); - return -1; - } - - memset(client, 0, sizeof(struct local_client)); - client->fd = fd; - client->type = CLUSTER_INTERNAL; - client->callback = callback; - add_client(client); - - /* Set Close-on-exec */ - fcntl(fd, F_SETFD, 1); - - return 0; -} - -/* Gulm library handle */ -static gulm_interface_p gulm_if; -static lg_core_callbacks_t core_callbacks; -static lg_lockspace_callbacks_t lock_callbacks; - -static void badsig_handler(int sig) -{ - DEBUGLOG("got sig %d\n", sig); - _cluster_closedown(); - exit(0); -} - -static void _reread_config(void) -{ - /* Re-read CCS node list */ - DEBUGLOG("Re-reading CCS config\n"); - get_all_cluster_nodes(); -} - -static int _init_cluster(void) -{ - int status; - int ccs_h; - int port = 0; - char *portstr; - - /* Get cluster name from CCS */ - ccs_h = ccs_force_connect(NULL, 0); - if (ccs_h < 0) - { - syslog(LOG_ERR, "Cannot login in to CCSD server\n"); - return -1; - } - - ccs_get(ccs_h, "//cluster/@name", &cluster_name); - DEBUGLOG("got cluster name %s\n", cluster_name); - - if (!ccs_get(ccs_h, "//cluster/clvm/@port", &portstr)) - { - port = atoi(portstr); - free(portstr); - DEBUGLOG("got port number %d\n", port); - - if (port <= 0 && port >= 65536) - port = 0; - } - - ccs_disconnect(ccs_h); - - /* Block locking until we are logged in */ - pthread_mutex_init(&lock_start_mutex, NULL); - pthread_mutex_lock(&lock_start_mutex); - lock_start_flag = 1; - - node_hash = dm_hash_create(100); - lock_hash = dm_hash_create(10); - - /* Get all nodes from CCS */ - if (get_all_cluster_nodes()) - return -1; - - /* Initialise GULM library */ - status = lg_initialize(&gulm_if, cluster_name, "clvmd"); - if (status) - { - DEBUGLOG("lg_initialize failed: %d\n", status); - return status; - } - - /* Connect to core - we are not "important" :-) */ - status = lg_core_login(gulm_if, 0); - if (status) - { - DEBUGLOG("lg_core_login failed: %d\n", status); - return status; - } - - /* Initialise the inter-node comms */ - status = init_comms(port); - if (status) - return status; - - /* Add core FD to the list */ - status = add_internal_client(lg_core_selector(gulm_if), read_from_core_sock); - if (status) - { - DEBUGLOG("can't allocate client space\n"); - return status; - } - - /* Connect to the lock server */ - if (lg_lock_login(gulm_if, "CLVM")) - { - syslog(LOG_ERR, "Cannot login in to LOCK server\n"); - DEBUGLOG("Cannot login in to LOCK server\n"); - exit(88); - } - - /* Add lockspace FD to the list */ - status = add_internal_client(lg_lock_selector(gulm_if), read_from_lock_sock); - if (status) - { - DEBUGLOG("can't allocate client space\n"); - exit(status); - } - - /* Request a list of nodes, we can't really do anything until - this comes back */ - status = lg_core_nodelist(gulm_if); - if (status) - { - DEBUGLOG("lg_core_nodelist failed: %d\n", status); - return status; - } - - /* So I can kill it without taking GULM down too */ - signal(SIGINT, badsig_handler); - signal(SIGTERM, badsig_handler); - - return 0; -} - -static void _cluster_closedown(void) -{ - DEBUGLOG("cluster_closedown\n"); - in_shutdown = 1; - destroy_lvhash(); - lg_lock_logout(gulm_if); - lg_core_logout(gulm_if); - lg_release(gulm_if); -} - -/* Expire locks for a named node, or us */ -#define GIO_KEY_SIZE 46 -static void drop_expired_locks(char *nodename) -{ - struct utsname nodeinfo; - uint8_t mask[GIO_KEY_SIZE]; - - DEBUGLOG("Dropping expired locks for %s\n", nodename?nodename:"(null)"); - memset(mask, 0xff, GIO_KEY_SIZE); - - if (!nodename) - { - uname(&nodeinfo); - nodename = nodeinfo.nodename; - } - - if (lg_lock_drop_exp(gulm_if, nodename, mask, GIO_KEY_SIZE)) - { - DEBUGLOG("Error calling lg_lock_drop_exp()\n"); - } -} - - -static int read_from_core_sock(struct local_client *client, char *buf, int len, const char *csid, - struct local_client **new_client) -{ - int status; - - *new_client = NULL; - status = lg_core_handle_messages(gulm_if, &core_callbacks, NULL); - return status<0 ? status : 1; -} - -static int read_from_lock_sock(struct local_client *client, char *buf, int len, const char *csid, - struct local_client **new_client) -{ - int status; - - *new_client = NULL; - status = lg_lock_handle_messages(gulm_if, &lock_callbacks, NULL); - return status<0 ? status : 1; -} - - -/* CORE callback routines */ -static int core_login_reply(void *misc, uint64_t gen, uint32_t error, uint32_t rank, uint8_t corestate) -{ - DEBUGLOG("CORE Got a Login reply. gen:%lld err:%d rank:%d corestate:%d\n", - gen, error, rank, corestate); - - if (error) - exit(error); - - /* Get the current core state (for quorum) */ - lg_core_corestate(gulm_if); - - return 0; -} - -static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate) -{ - if (nodestate == lg_core_Logged_in) - { - /* Don't clobber NODE_CLVMD state */ - if (ninfo->state != NODE_CLVMD) - { - if (ninfo->state == NODE_UNKNOWN || - ninfo->state == NODE_DOWN) - num_nodes++; - - ninfo->state = NODE_UP; - } - } - else - { - if (nodestate == lg_core_Expired || - nodestate == lg_core_Fenced || - nodestate == lg_core_Logged_out) - { - if (ninfo->state != NODE_DOWN) - num_nodes--; - ninfo->state = NODE_DOWN; - } - } - /* Gulm doesn't always send node DOWN events, so even if this a a node UP we must - * assume (ahem) that it prevously went down at some time. So we close - * the sockets here to make sure that we don't have any dead connections - * to that node. - */ - tcp_remove_client(csid); - - DEBUGLOG("set_node_state, '%s' state = %d num_nodes=%d\n", - ninfo->name, ninfo->state, num_nodes); -} - -static struct node_info *add_or_set_node(char *name, struct in6_addr *ip, uint8_t state) -{ - struct node_info *ninfo; - - ninfo = dm_hash_lookup_binary(node_hash, (char *)ip, GULM_MAX_CSID_LEN); - if (!ninfo) - { - /* If we can't find that node then re-read the config file in case it - was added after we were started */ - DEBUGLOG("Node %s not found, re-reading config file\n", name); - get_all_cluster_nodes(); - - /* Now try again */ - ninfo = dm_hash_lookup_binary(node_hash, (char *)ip, GULM_MAX_CSID_LEN); - if (!ninfo) - { - DEBUGLOG("Ignoring node %s, not part of the SAN cluster\n", name); - return NULL; - } - } - - set_node_state(ninfo, (char *)ip, state); - - return ninfo; -} - -static void _get_our_csid(char *csid) -{ - get_our_gulm_csid(csid); -} - -static int core_nodelist(void *misc, lglcb_t type, char *name, struct in6_addr *ip, uint8_t state) -{ - DEBUGLOG("CORE nodelist\n"); - - if (type == lglcb_start) - { - DEBUGLOG("Got Nodelist, start\n"); - } - else - { - if (type == lglcb_item) - { - DEBUGLOG("Got nodelist, item: %s, %#x\n", name, state); - - add_or_set_node(name, ip, state); - } - else - { - if (type == lglcb_stop) - { - char ourcsid[GULM_MAX_CSID_LEN]; - - DEBUGLOG("Got Nodelist, stop\n"); - if (gulm_quorate) - { - clvmd_cluster_init_completed(); - init_state = INIT_DONE; - } - else - { - if (init_state == INIT_NOTDONE) - init_state = INIT_WAITQUORATE; - } - - /* Mark ourself as up */ - _get_our_csid(ourcsid); - gulm_add_up_node(ourcsid); - } - else - { - DEBUGLOG("Unknown lglcb_t %#x\n", type); - } - } - } - - return 0; -} - -static int core_statechange(void *misc, uint8_t corestate, uint8_t quorate, struct in6_addr *masterip, char *mastername) -{ - DEBUGLOG("CORE Got statechange. quorate:%d, corestate:%x mastername:%s\n", - quorate, corestate, mastername); - - gulm_quorate = quorate; - if (quorate && init_state == INIT_WAITQUORATE) - { - clvmd_cluster_init_completed(); - init_state = INIT_DONE; - } - return 0; -} - -static int core_nodechange(void *misc, char *nodename, struct in6_addr *nodeip, uint8_t nodestate) -{ - struct node_info *ninfo; - - DEBUGLOG("CORE node change, name=%s, state = %d\n", nodename, nodestate); - - /* If we don't get nodeip here, try a lookup by name */ - if (!nodeip) - _csid_from_name((char *)nodeip, nodename); - if (!nodeip) - return 0; - - ninfo = add_or_set_node(nodename, nodeip, nodestate); - if (!ninfo) - return 0; - - /* Check if we need to drop any expired locks */ - if (ninfo->state == NODE_DOWN) - { - drop_expired_locks(nodename); - } - - return 0; -} -static int core_error(void *misc, uint32_t err) -{ - DEBUGLOG("CORE error: %d\n", err); - // Not sure what happens here - return 0; -} - -/* LOCK callback routines */ -static int lock_login_reply(void *misc, uint32_t error, uint8_t which) -{ - DEBUGLOG("LOCK Got a Login reply. err:%d which:%d\n", - error, which); - - if (error) - exit(error); - - /* Drop any expired locks for us that might be hanging around */ - drop_expired_locks(NULL); - - /* Enable locking operations in other threads */ - if (lock_start_flag) - { - lock_start_flag = 0; - pthread_mutex_unlock(&lock_start_mutex); - } - - return 0; -} - -static int lock_lock_state(void *misc, uint8_t *key, uint16_t keylen, - uint64_t subid, uint64_t start, uint64_t stop, - uint8_t state, uint32_t flags, uint32_t error, - uint8_t *LVB, uint16_t LVBlen) -{ - struct lock_wait *lwait; - - DEBUGLOG("LOCK lock state: %s, error = %d\n", key, error); - - /* No waiting process to wake up when we are shutting down */ - if (in_shutdown) - return 0; - - lwait = dm_hash_lookup(lock_hash, key); - if (!lwait) - { - DEBUGLOG("Can't find hash entry for resource %s\n", key); - return 0; - } - lwait->status = error; - pthread_mutex_lock(&lwait->mutex); - pthread_cond_signal(&lwait->cond); - pthread_mutex_unlock(&lwait->mutex); - - return 0; -} -static int lock_error(void *misc, uint32_t err) -{ - DEBUGLOG("LOCK error: %d\n", err); - // Not sure what happens here - return 0; -} - - -/* CORE callbacks */ -static lg_core_callbacks_t core_callbacks = { - .login_reply = core_login_reply, - .nodelist = core_nodelist, - .statechange = core_statechange, - .nodechange = core_nodechange, - .error = core_error, -}; - -/* LOCK callbacks */ -static lg_lockspace_callbacks_t lock_callbacks = { - .login_reply = lock_login_reply, - .lock_state = lock_lock_state, - .error = lock_error, -}; - -/* Allow tcp-comms to loop round the list of active nodes */ -int get_next_node_csid(void **context, char *csid) -{ - struct node_info *ninfo = NULL; - - /* First node */ - if (!*context) - { - *context = dm_hash_get_first(node_hash); - } - else - { - *context = dm_hash_get_next(node_hash, *context); - } - if (*context) - ninfo = dm_hash_get_data(node_hash, *context); - - /* Find a node that is UP */ - while (*context && ninfo->state == NODE_DOWN) - { - *context = dm_hash_get_next(node_hash, *context); - if (*context) - { - ninfo = dm_hash_get_data(node_hash, *context); - } - } - - if (!*context || ninfo->state == NODE_DOWN) - { - return 0; - } - - memcpy(csid, dm_hash_get_key(node_hash, *context), GULM_MAX_CSID_LEN); - return 1; -} - -int gulm_name_from_csid(const char *csid, char *name) -{ - struct node_info *ninfo; - - ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN); - if (!ninfo) - { - sprintf(name, "UNKNOWN %s", print_csid(csid)); - return -1; - } - - strcpy(name, ninfo->name); - return 0; -} - - -static int _csid_from_name(char *csid, const char *name) -{ - struct dm_hash_node *hn; - struct node_info *ninfo; - - dm_hash_iterate(hn, node_hash) - { - ninfo = dm_hash_get_data(node_hash, hn); - if (strcmp(ninfo->name, name) == 0) - { - memcpy(csid, dm_hash_get_key(node_hash, hn), GULM_MAX_CSID_LEN); - return 0; - } - } - return -1; -} - -static int _get_num_nodes() -{ - DEBUGLOG("num_nodes = %d\n", num_nodes); - return num_nodes; -} - -/* Node is now known to be running a clvmd */ -void gulm_add_up_node(const char *csid) -{ - struct node_info *ninfo; - - ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN); - if (!ninfo) { - DEBUGLOG("gulm_add_up_node no node_hash entry for csid %s\n", print_csid(csid)); - return; - } - - DEBUGLOG("gulm_add_up_node %s\n", ninfo->name); - - if (ninfo->state == NODE_DOWN) - num_nodes++; - ninfo->state = NODE_CLVMD; - - return; - -} -/* Node is now known to be NOT running a clvmd */ -void add_down_node(char *csid) -{ - struct node_info *ninfo; - - ninfo = dm_hash_lookup_binary(node_hash, csid, GULM_MAX_CSID_LEN); - if (!ninfo) - return; - - /* Only set it to UP if it was previously known to be - running clvmd - gulm may set it DOWN quite soon */ - if (ninfo->state == NODE_CLVMD) - ninfo->state = NODE_UP; - drop_expired_locks(ninfo->name); - return; - -} - -/* Call a callback for each node, so the caller knows whether it's up or down */ -static int _cluster_do_node_callback(struct local_client *master_client, - void (*callback)(struct local_client *, const char *csid, int node_up)) -{ - struct dm_hash_node *hn; - struct node_info *ninfo; - int somedown = 0; - - dm_hash_iterate(hn, node_hash) - { - char csid[GULM_MAX_CSID_LEN]; - struct local_client *client; - - ninfo = dm_hash_get_data(node_hash, hn); - memcpy(csid, dm_hash_get_key(node_hash, hn), GULM_MAX_CSID_LEN); - - DEBUGLOG("down_callback. node %s, state = %d\n", ninfo->name, ninfo->state); - - client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN); - if (!client) - { - /* If it's up but not connected, try to make contact */ - if (ninfo->state == NODE_UP) - gulm_connect_csid(csid, &client); - - client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN); - - } - DEBUGLOG("down_callback2. node %s, state = %d\n", ninfo->name, ninfo->state); - if (ninfo->state != NODE_DOWN) - callback(master_client, csid, ninfo->state == NODE_CLVMD); - - if (ninfo->state != NODE_CLVMD) - somedown = -1; - } - return somedown; -} - -/* Convert gulm error codes to unix errno numbers */ -static int gulm_to_errno(int gulm_ret) -{ - switch (gulm_ret) - { - case lg_err_TryFailed: - case lg_err_AlreadyPend: - errno = EAGAIN; - break; - - /* More?? */ - default: - errno = EINVAL; - } - - return gulm_ret ? -1 : 0; -} - -/* Real locking */ -static int _lock_resource(char *resource, int mode, int flags, int *lockid) -{ - int status; - struct lock_wait lwait; - - /* Wait until the lock module is ready */ - if (lock_start_flag) - { - pthread_mutex_lock(&lock_start_mutex); - pthread_mutex_unlock(&lock_start_mutex); - } - - pthread_cond_init(&lwait.cond, NULL); - pthread_mutex_init(&lwait.mutex, NULL); - pthread_mutex_lock(&lwait.mutex); - - /* This needs to be converted from DLM/LVM2 value for GULM */ - if (flags & LCKF_NOQUEUE) flags = lg_lock_flag_Try; - - dm_hash_insert(lock_hash, resource, &lwait); - DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", resource, flags, mode); - - status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1, - 0, 0, 0, - mode, flags, NULL, 0); - if (status) - { - DEBUGLOG("lg_lock_state returned %d\n", status); - return status; - } - - /* Wait for it to complete */ - pthread_cond_wait(&lwait.cond, &lwait.mutex); - pthread_mutex_unlock(&lwait.mutex); - - dm_hash_remove(lock_hash, resource); - DEBUGLOG("lock-resource returning %d\n", lwait.status); - - return gulm_to_errno(lwait.status); -} - - -static int _unlock_resource(char *resource, int lockid) -{ - int status; - struct lock_wait lwait; - - pthread_cond_init(&lwait.cond, NULL); - pthread_mutex_init(&lwait.mutex, NULL); - pthread_mutex_lock(&lwait.mutex); - - dm_hash_insert(lock_hash, resource, &lwait); - - DEBUGLOG("unlock_resource %s\n", resource); - status = lg_lock_state_req(gulm_if, resource, strlen(resource)+1, - 0, 0, 0, - lg_lock_state_Unlock, 0, NULL, 0); - - if (status) - { - DEBUGLOG("lg_lock_state(unlock) returned %d\n", status); - return status; - } - - /* When we are shutting down, don't wait for unlocks - to be acknowledged, just do it. */ - if (in_shutdown) - return status; - - /* Wait for it to complete */ - - pthread_cond_wait(&lwait.cond, &lwait.mutex); - pthread_mutex_unlock(&lwait.mutex); - - dm_hash_remove(lock_hash, resource); - - return gulm_to_errno(lwait.status); -} - - -/* These two locking functions MUST be called in a seperate thread from - the clvmd main loop because they expect to be woken up by it. - - These are abstractions around the real locking functions (above) - as we need to emulate the DLM's EX/PW/CW interaction with GULM using - two locks. - To aid unlocking, we store the lock mode in the lockid (as GULM - doesn't use this). -*/ -static int _sync_lock(const char *resource, int mode, int flags, int *lockid) -{ - int status; - char lock1[strlen(resource)+3]; - char lock2[strlen(resource)+3]; - - snprintf(lock1, sizeof(lock1), "%s-1", resource); - snprintf(lock2, sizeof(lock2), "%s-2", resource); - - switch (mode) - { - case LCK_EXCL: - status = _lock_resource(lock1, lg_lock_state_Exclusive, flags, lockid); - if (status) - goto out; - - /* If we can't get this lock too then bail out */ - status = _lock_resource(lock2, lg_lock_state_Exclusive, LCK_NONBLOCK, lockid); - if (status == lg_err_TryFailed) - { - _unlock_resource(lock1, *lockid); - status = -1; - errno = EAGAIN; - } - break; - - case LCK_PREAD: - case LCK_READ: - status = _lock_resource(lock1, lg_lock_state_Shared, flags, lockid); - if (status) - goto out; - status = _unlock_resource(lock2, *lockid); - break; - - case LCK_WRITE: - status = _lock_resource(lock2, lg_lock_state_Exclusive, flags, lockid); - if (status) - goto out; - status = _unlock_resource(lock1, *lockid); - break; - - default: - status = -1; - errno = EINVAL; - break; - } - out: - *lockid = mode; - return status; -} - -static int _sync_unlock(const char *resource, int lockid) -{ - int status = 0; - char lock1[strlen(resource)+3]; - char lock2[strlen(resource)+3]; - - snprintf(lock1, sizeof(lock1), "%s-1", resource); - snprintf(lock2, sizeof(lock2), "%s-2", resource); - - /* The held lock mode is in the lock id */ - assert(lockid == LCK_EXCL || - lockid == LCK_READ || - lockid == LCK_PREAD || - lockid == LCK_WRITE); - - status = _unlock_resource(lock1, lockid); - if (!status) - status = _unlock_resource(lock2, lockid); - - return status; -} - -static int _is_quorate() -{ - return gulm_quorate; -} - -/* Get all the cluster node names & IPs from CCS and - add them to our node list so we know who to talk to. - Called when we start up and if we get sent SIGHUP. -*/ -static int get_all_cluster_nodes() -{ - int ctree; - char *nodename; - int error; - int i; - - /* Open the config file */ - ctree = ccs_force_connect(NULL, 1); - if (ctree < 0) - { - log_error("Error connecting to CCS"); - return -1; - } - - for (i=1;;i++) - { - char nodekey[256]; - char nodeip[GULM_MAX_CSID_LEN]; - int clvmflag = 1; - char *clvmflagstr; - char key[256]; - - sprintf(nodekey, "//cluster/clusternodes/clusternode[%d]/@name", i); - error = ccs_get(ctree, nodekey, &nodename); - if (error) - break; - - sprintf(key, "//cluster/clusternodes/clusternode[@name=\"%s\"]/clvm", nodename); - if (!ccs_get(ctree, key, &clvmflagstr)) - { - clvmflag = atoi(clvmflagstr); - free(clvmflagstr); - } - - DEBUGLOG("Got node %s from ccs(clvmflag = %d)\n", nodename, clvmflag); - if ((get_ip_address(nodename, nodeip) == 0) && clvmflag) - { - struct node_info *ninfo; - - /* If it's not in the list, then add it */ - ninfo = dm_hash_lookup_binary(node_hash, nodeip, GULM_MAX_CSID_LEN); - if (!ninfo) - { - ninfo = malloc(sizeof(struct node_info)); - if (!ninfo) - { - syslog(LOG_ERR, "Cannot alloc memory for node info\n"); - ccs_disconnect(ctree); - return -1; - } - strcpy(ninfo->name, nodename); - - ninfo->state = NODE_DOWN; - dm_hash_insert_binary(node_hash, nodeip, GULM_MAX_CSID_LEN, ninfo); - } - } - else - { - if (!clvmflag) { - DEBUGLOG("node %s has clvm disabled\n", nodename); - } - else { - DEBUGLOG("Cannot resolve host name %s\n", nodename); - log_error("Cannot resolve host name %s\n", nodename); - } - } - free(nodename); - } - - /* Finished with config file */ - ccs_disconnect(ctree); - - return 0; -} - -static int _get_main_cluster_fd(void) -{ - return get_main_gulm_cluster_fd(); -} - -static int _cluster_fd_callback(struct local_client *fd, char *buf, int len, const char *csid, struct local_client **new_client) -{ - return cluster_fd_gulm_callback(fd, buf, len, csid, new_client); -} - -static int _cluster_send_message(const void *buf, int msglen, const char *csid, const char *errtext) -{ - return gulm_cluster_send_message((char *)buf, msglen, csid, errtext); -} - -static int _get_cluster_name(char *buf, int buflen) -{ - strncpy(buf, cluster_name, buflen); - return 0; -} - -static struct cluster_ops _cluster_gulm_ops = { - .cluster_init_completed = NULL, - .cluster_send_message = _cluster_send_message, - .name_from_csid = gulm_name_from_csid, - .csid_from_name = _csid_from_name, - .get_num_nodes = _get_num_nodes, - .cluster_fd_callback = _cluster_fd_callback, - .get_main_cluster_fd = _get_main_cluster_fd, - .cluster_do_node_callback = _cluster_do_node_callback, - .is_quorate = _is_quorate, - .get_our_csid = _get_our_csid, - .add_up_node = gulm_add_up_node, - .reread_config = _reread_config, - .cluster_closedown = _cluster_closedown, - .get_cluster_name = _get_cluster_name, - .sync_lock = _sync_lock, - .sync_unlock = _sync_unlock, -}; - -struct cluster_ops *init_gulm_cluster(void) -{ - if (!_init_cluster()) - return &_cluster_gulm_ops; - else - return NULL; -} diff --git a/daemons/clvmd/clvmd-gulm.h b/daemons/clvmd/clvmd-gulm.h deleted file mode 100644 index 9416f5c..0000000 --- a/daemons/clvmd/clvmd-gulm.h +++ /dev/null @@ -1,9 +0,0 @@ -extern int get_next_node_csid(void **context, char *csid); -extern void add_down_node(char *csid); -extern int gulm_fd(void); -extern int get_ip_address(const char *node, char *addr); -extern void tcp_remove_client(const char *csid); -extern int alloc_client(int fd, const char *csid, struct local_client **new_client); - -void gulm_add_up_node(const char *csid); -int gulm_name_from_csid(const char *csid, char *name); diff --git a/daemons/clvmd/clvmd-openais.c b/daemons/clvmd/clvmd-openais.c index fc98b50..9ce73d6 100644 --- a/daemons/clvmd/clvmd-openais.c +++ b/daemons/clvmd/clvmd-openais.c @@ -197,14 +197,13 @@ static int add_internal_client(int fd, fd_callback_t callback) DEBUGLOG("Add_internal_client, fd = %d\n", fd); - client = malloc(sizeof(struct local_client)); + client = calloc(1, sizeof(struct local_client)); if (!client) { DEBUGLOG("malloc failed\n"); return -1; } - memset(client, 0, sizeof(struct local_client)); client->fd = fd; client->type = CLUSTER_INTERNAL; client->callback = callback; @@ -227,7 +226,7 @@ static void openais_cpg_deliver_callback (cpg_handle_t handle, memcpy(&target_nodeid, msg, OPENAIS_CSID_LEN); - DEBUGLOG("%u got message from nodeid %d for %d. len %d\n", + DEBUGLOG("%u got message from nodeid %d for %d. len %" PRIsize_t "\n", our_nodeid, nodeid, target_nodeid, msg_len-4); if (nodeid != our_nodeid) @@ -245,7 +244,8 @@ static void openais_cpg_confchg_callback(cpg_handle_t handle, int i; struct node_info *ninfo; - DEBUGLOG("confchg callback. %d joined, %d left, %d members\n", + DEBUGLOG("confchg callback. %" PRIsize_t " joined, " + "%" PRIsize_t " left, %" PRIsize_t " members\n", joined_list_entries, left_list_entries, member_list_entries); for (i=0; i<joined_list_entries; i++) { @@ -505,11 +505,11 @@ static int _lock_resource(char *resource, int mode, int flags, int *lockid) saLckResourceClose(res_handle); return ais_to_errno(err); } - + /* Wait for it to complete */ - DEBUGLOG("lock_resource returning %d, lock_id=%llx\n", err, - lock_id); + DEBUGLOG("lock_resource returning %d, lock_id=%" PRIx64 "\n", + err, lock_id); linfo->lock_id = lock_id; linfo->res_handle = res_handle; @@ -530,7 +530,7 @@ static int _unlock_resource(char *resource, int lockid) if (!linfo) return 0; - DEBUGLOG("unlock_resource: lockid: %llx\n", linfo->lock_id); + DEBUGLOG("unlock_resource: lockid: %" PRIx64 "\n", linfo->lock_id); err = saLckResourceUnlock(linfo->lock_id, SA_TIME_END); if (err != SA_AIS_OK) { @@ -666,6 +666,7 @@ static int _get_cluster_name(char *buf, int buflen) } static struct cluster_ops _cluster_openais_ops = { + .name = "openais", .cluster_init_completed = NULL, .cluster_send_message = _cluster_send_message, .name_from_csid = _name_from_csid, diff --git a/daemons/clvmd/clvmd-singlenode.c b/daemons/clvmd/clvmd-singlenode.c index 335cb98..3b35bf5 100644 --- a/daemons/clvmd/clvmd-singlenode.c +++ b/daemons/clvmd/clvmd-singlenode.c @@ -29,6 +29,15 @@ static const char SINGLENODE_CLVMD_SOCKNAME[] = DEFAULT_RUN_DIR "/clvmd_singlenode.sock"; static int listen_fd = -1; +static struct dm_hash_table *_locks; +static int _lockid; + +struct lock { + int lockid; + int mode; + int excl; +}; + static void close_comms(void) { if (listen_fd != -1 && close(listen_fd)) @@ -39,8 +48,15 @@ static void close_comms(void) static int init_comms(void) { - struct sockaddr_un addr; mode_t old_mask; + struct sockaddr_un addr = { .sun_family = AF_UNIX }; + + if (!dm_strncpy(addr.sun_path, SINGLENODE_CLVMD_SOCKNAME, + sizeof(addr.sun_path))) { + DEBUGLOG("%s: singlenode socket name too long.", + SINGLENODE_CLVMD_SOCKNAME); + return -1; + } close_comms(); @@ -53,12 +69,10 @@ static int init_comms(void) goto error; } /* Set Close-on-exec */ - fcntl(listen_fd, F_SETFD, 1); - - memset(&addr, 0, sizeof(addr)); - memcpy(addr.sun_path, SINGLENODE_CLVMD_SOCKNAME, - sizeof(SINGLENODE_CLVMD_SOCKNAME)); - addr.sun_family = AF_UNIX; + if (fcntl(listen_fd, F_SETFD, 1)) { + DEBUGLOG("Setting CLOEXEC on client fd failed: %s\n", strerror(errno)); + goto error; + } if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { DEBUGLOG("Can't bind local socket: %s\n", strerror(errno)); @@ -83,9 +97,16 @@ static int _init_cluster(void) { int r; + if (!(_locks = dm_hash_create(128))) { + DEBUGLOG("Failed to allocate single-node hash table.\n"); + return 1; + } + r = init_comms(); - if (r) + if (r) { + dm_hash_destroy(_locks); return r; + } DEBUGLOG("Single-node cluster initialised.\n"); return 0; @@ -97,6 +118,9 @@ static void _cluster_closedown(void) DEBUGLOG("cluster_closedown\n"); destroy_lvhash(); + dm_hash_destroy(_locks); + _locks = NULL; + _lockid = 0; } static void _get_our_csid(char *csid) @@ -136,95 +160,101 @@ static int _cluster_do_node_callback(struct local_client *master_client, int _lock_file(const char *file, uint32_t flags); -static int *_locks = NULL; -static char **_resources = NULL; -static int _lock_max = 1; static pthread_mutex_t _lock_mutex = PTHREAD_MUTEX_INITIALIZER; +/* Using one common condition for all locks for simplicity */ +static pthread_cond_t _lock_cond = PTHREAD_COND_INITIALIZER; /* Real locking */ static int _lock_resource(const char *resource, int mode, int flags, int *lockid) { - int *_locks_1; - char **_resources_1; - int i, j; + struct lock *lck; - DEBUGLOG("lock_resource '%s', flags=%d, mode=%d\n", + DEBUGLOG("Locking resource %s, flags=%d, mode=%d\n", resource, flags, mode); - retry: + mode &= LCK_TYPE_MASK; pthread_mutex_lock(&_lock_mutex); - - /* look for an existing lock for this resource */ - for (i = 1; i < _lock_max; ++i) { - if (!_resources[i]) - break; - if (!strcmp(_resources[i], resource)) { - if ((_locks[i] & LCK_TYPE_MASK) == LCK_WRITE || - (_locks[i] & LCK_TYPE_MASK) == LCK_EXCL) { - DEBUGLOG("%s already write/exclusively locked...\n", resource); - goto maybe_retry; - } - if ((mode & LCK_TYPE_MASK) == LCK_WRITE || - (mode & LCK_TYPE_MASK) == LCK_EXCL) { - DEBUGLOG("%s already locked and WRITE/EXCL lock requested...\n", - resource); - goto maybe_retry; - } - } +retry: + if (!(lck = dm_hash_lookup(_locks, resource))) { + /* Add new locked resource */ + if (!(lck = dm_zalloc(sizeof(struct lock))) || + !dm_hash_insert(_locks, resource, lck)) + goto bad; + + lck->lockid = ++_lockid; + goto out; } - if (i == _lock_max) { /* out of lock slots, extend */ - _locks_1 = dm_realloc(_locks, 2 * _lock_max * sizeof(int)); - if (!_locks_1) - return 1; /* fail */ - _locks = _locks_1; - _resources_1 = dm_realloc(_resources, 2 * _lock_max * sizeof(char *)); - if (!_resources_1) { - /* _locks may get realloc'd twice, but that should be safe */ - return 1; /* fail */ - } - _resources = _resources_1; - /* clear the new resource entries */ - for (j = _lock_max; j < 2 * _lock_max; ++j) - _resources[j] = NULL; - _lock_max = 2 * _lock_max; + /* Update/convert lock */ + if (flags == LCKF_CONVERT) { + if (lck->excl) + mode = LCK_EXCL; + } else if ((lck->mode == LCK_WRITE) || (lck->mode == LCK_EXCL)) { + DEBUGLOG("Resource %s already %s locked (%d)...\n", resource, + (lck->mode == LCK_WRITE) ? "write" : "exclusively", lck->lockid); + goto maybe_retry; + } else if (lck->mode > mode) { + DEBUGLOG("Resource %s already locked and %s lock requested...\n", + resource, + (mode == LCK_READ) ? "READ" : + (mode == LCK_WRITE) ? "WRITE" : "EXCLUSIVE"); + goto maybe_retry; } - /* resource is not currently locked, grab it */ - - *lockid = i; - _locks[i] = mode; - _resources[i] = dm_strdup(resource); - - DEBUGLOG("%s locked -> %d\n", resource, i); - +out: + *lockid = lck->lockid; + lck->mode = mode; + lck->excl |= (mode == LCK_EXCL); + DEBUGLOG("Locked resource %s, lockid=%d, mode=%d\n", resource, lck->lockid, mode); + pthread_cond_broadcast(&_lock_cond); /* wakeup waiters */ pthread_mutex_unlock(&_lock_mutex); + return 0; - maybe_retry: - pthread_mutex_unlock(&_lock_mutex); + +maybe_retry: if (!(flags & LCK_NONBLOCK)) { - usleep(10000); + pthread_cond_wait(&_lock_cond, &_lock_mutex); + DEBUGLOG("Resource %s RETRYING lock...\n", resource); goto retry; } +bad: + DEBUGLOG("Failed to lock resource %s\n", resource); + pthread_mutex_unlock(&_lock_mutex); return 1; /* fail */ } static int _unlock_resource(const char *resource, int lockid) { - DEBUGLOG("unlock_resource: %s lockid: %x\n", resource, lockid); - if(!_resources[lockid]) { - DEBUGLOG("(%s) %d not locked\n", resource, lockid); + struct lock *lck; + + if (lockid < 0) { + DEBUGLOG("Not tracking unlock of lockid -1: %s, lockid=%d\n", + resource, lockid); + return 0; + } + + DEBUGLOG("Unlocking resource %s, lockid=%d\n", resource, lockid); + pthread_mutex_lock(&_lock_mutex); + + if (!(lck = dm_hash_lookup(_locks, resource))) { + pthread_mutex_unlock(&_lock_mutex); + DEBUGLOG("Resource %s, lockid=%d is not locked.\n", resource, lockid); return 1; } - if(strcmp(_resources[lockid], resource)) { - DEBUGLOG("%d has wrong resource (requested %s, got %s)\n", - lockid, resource, _resources[lockid]); + + if (lck->lockid != lockid) { + pthread_mutex_unlock(&_lock_mutex); + DEBUGLOG("Resource %s has wrong lockid %d, expected %d.\n", + resource, lck->lockid, lockid); return 1; } - dm_free(_resources[lockid]); - _resources[lockid] = 0; + dm_hash_remove(_locks, resource); + dm_free(lck); + pthread_cond_broadcast(&_lock_cond); /* wakeup waiters */ + pthread_mutex_unlock(&_lock_mutex); + return 0; } @@ -260,6 +290,7 @@ static int _get_cluster_name(char *buf, int buflen) } static struct cluster_ops _cluster_singlenode_ops = { + .name = "singlenode", .cluster_init_completed = NULL, .cluster_send_message = _cluster_send_message, .name_from_csid = _name_from_csid, diff --git a/daemons/clvmd/clvmd.c b/daemons/clvmd/clvmd.c index 1a35c49..ac465d9 100644 --- a/daemons/clvmd/clvmd.c +++ b/daemons/clvmd/clvmd.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -20,6 +20,7 @@ #include "clvmd-common.h" #include <pthread.h> +#include <getopt.h> #include "clvmd-comms.h" #include "clvm.h" @@ -48,6 +49,7 @@ #endif #define MAX_RETRIES 4 +#define MAX_MISSING_LEN 8000 /* Max supported clvmd message size ? */ #define ISLOCAL_CSID(c) (memcmp(c, our_csid, max_csid_len) == 0) @@ -77,15 +79,18 @@ struct lvm_thread_cmd { }; struct lvm_startup_params { - int using_gulm; - char **argv; + struct dm_hash_table *excl_uuid; }; -debug_t debug; +static debug_t debug = DEBUG_OFF; +static int foreground_mode = 0; static pthread_t lvm_thread; +/* Stack size 128KiB for thread, must be bigger then DEFAULT_RESERVED_STACK */ +static const size_t STACK_SIZE = 128 * 1024; +static pthread_attr_t stack_attr; static pthread_mutex_t lvm_thread_mutex; static pthread_cond_t lvm_thread_cond; -static pthread_mutex_t lvm_start_mutex; +static pthread_barrier_t lvm_start_barrier; static struct dm_list lvm_cmd_head; static volatile sig_atomic_t quit = 0; static volatile sig_atomic_t reread_config = 0; @@ -99,9 +104,7 @@ static int child_pipe[2]; #define DFAIL_TIMEOUT 5 #define SUCCESS 0 -typedef enum {IF_AUTO, IF_CMAN, IF_GULM, IF_OPENAIS, IF_COROSYNC, IF_SINGLENODE} if_type_t; - -typedef void *(lvm_pthread_fn_t)(void*); +typedef enum {IF_AUTO, IF_CMAN, IF_OPENAIS, IF_COROSYNC, IF_SINGLENODE} if_type_t; /* Prototypes for code further down */ static void sigusr2_handler(int sig); @@ -132,7 +135,7 @@ static int check_all_clvmds_running(struct local_client *client); static int local_rendezvous_callback(struct local_client *thisfd, char *buf, int len, const char *csid, struct local_client **new_client); -static void lvm_thread_fn(void *) __attribute__ ((noreturn)); +static void *lvm_thread_fn(void *) __attribute__((noreturn)); static int add_to_lvmqueue(struct local_client *client, struct clvm_header *msg, int msglen, const char *csid); static int distribute_command(struct local_client *thisfd); @@ -145,12 +148,12 @@ static if_type_t get_cluster_type(void); static void usage(const char *prog, FILE *file) { - fprintf(file, "Usage:\n" - "%s [Vhd]\n\n" + fprintf(file, "Usage: %s [options]\n" " -V Show version of clvmd\n" " -h Show this help information\n" - " -d Set debug level\n" - " If starting clvmd then don't fork, run in the foreground\n" + " -d[n] Set debug logging (0:none, 1:stderr (implies -f option), 2:syslog)\n" + " -f Don't fork, run in the foreground\n" + " -E<lockuuid> Take this lock uuid as exclusively locked resource (for restart)\n" " -R Tell all running clvmds in the cluster to reload their device cache\n" " -S Restart clvmd, preserving exclusive locks\n" " -C Sets debug level (from -d) on all clvmd instances clusterwide\n" @@ -167,9 +170,6 @@ static void usage(const char *prog, FILE *file) #ifdef USE_OPENAIS "openais " #endif -#ifdef USE_GULM - "gulm " -#endif #ifdef USE_SINGLENODE "singlenode " #endif @@ -199,7 +199,8 @@ static void safe_close(int *fd) if (*fd >= 0) { int to_close = *fd; *fd = -1; - close(to_close); + if (close(to_close)) + log_sys_error("close", ""); /* path */ } } @@ -209,14 +210,15 @@ void debuglog(const char *fmt, ...) va_list ap; static int syslog_init = 0; - if (debug == DEBUG_STDERR) { + switch (clvmd_get_debug()) { + case DEBUG_STDERR: va_start(ap,fmt); time(&P); fprintf(stderr, "CLVMD[%x]: %.15s ", (int)pthread_self(), ctime(&P)+4 ); vfprintf(stderr, fmt, ap); va_end(ap); - } - if (debug == DEBUG_SYSLOG) { + break; + case DEBUG_SYSLOG: if (!syslog_init) { openlog("clvmd", LOG_PID, LOG_DAEMON); syslog_init = 1; @@ -225,9 +227,28 @@ void debuglog(const char *fmt, ...) va_start(ap,fmt); vsyslog(LOG_DEBUG, fmt, ap); va_end(ap); + break; + case DEBUG_OFF: + break; } } +void clvmd_set_debug(debug_t new_debug) +{ + if (!foreground_mode && new_debug == DEBUG_STDERR) + new_debug = DEBUG_SYSLOG; + + if (new_debug > DEBUG_SYSLOG) + new_debug = DEBUG_SYSLOG; + + debug = new_debug; +} + +debug_t clvmd_get_debug(void) +{ + return debug; +} + static const char *decode_cmd(unsigned char cmdl) { static char buf[128]; @@ -276,12 +297,15 @@ static const char *decode_cmd(unsigned char cmdl) case CLVMD_CMD_RESTART: command = "RESTART"; break; + case CLVMD_CMD_SYNC_NAMES: + command = "SYNC_NAMES"; + break; default: command = "unknown"; break; } - sprintf(buf, "%s (0x%x)", command, cmdl); + snprintf(buf, sizeof(buf), "%s (0x%x)", command, cmdl); return buf; } @@ -312,52 +336,63 @@ static void check_permissions(void) int main(int argc, char *argv[]) { int local_sock; - struct local_client *newfd; - struct utsname nodeinfo; + struct local_client *newfd, *delfd; struct lvm_startup_params lvm_params; - signed char opt; + int opt; int cmd_timeout = DEFAULT_CMD_TIMEOUT; int start_timeout = 0; if_type_t cluster_iface = IF_AUTO; sigset_t ss; - int using_gulm = 0; - int debug_opt = 0; + debug_t debug_opt = DEBUG_OFF; + debug_t debug_arg = DEBUG_OFF; int clusterwide_opt = 0; mode_t old_mask; + int ret = 1; + + struct option longopts[] = { + { "help", 0, 0, 'h' }, + { NULL, 0, 0, 0 } + }; + + if (!(lvm_params.excl_uuid = dm_hash_create(128))) { + fprintf(stderr, "Failed to allocate hash table\n"); + return 1; + } /* Deal with command-line arguments */ opterr = 0; optind = 0; - while ((opt = getopt(argc, argv, "?vVhd::t:RST:CI:E:")) != EOF) { + while ((opt = getopt_long(argc, argv, "vVhfd::t:RST:CI:E:", + longopts, NULL)) != -1) { switch (opt) { case 'h': usage(argv[0], stdout); exit(0); - case '?': - usage(argv[0], stderr); - exit(0); - case 'R': check_permissions(); - return refresh_clvmd(1)==1?0:1; + ret = (refresh_clvmd(1) == 1) ? 0 : 1; + goto out; case 'S': check_permissions(); - return restart_clvmd(clusterwide_opt)==1?0:1; + ret = (restart_clvmd(clusterwide_opt) == 1) ? 0 : 1; + goto out; case 'C': clusterwide_opt = 1; break; case 'd': - debug_opt = 1; - if (optarg) - debug = atoi(optarg); - else - debug = DEBUG_STDERR; + debug_opt = DEBUG_STDERR; + debug_arg = optarg ? (debug_t) atoi(optarg) : DEBUG_STDERR; + if (debug_arg == DEBUG_STDERR) + foreground_mode = 1; break; + case 'f': + foreground_mode = 1; + break; case 't': cmd_timeout = atoi(optarg); if (!cmd_timeout) { @@ -369,6 +404,12 @@ int main(int argc, char *argv[]) case 'I': cluster_iface = parse_cluster_interface(optarg); break; + case 'E': + if (!dm_hash_insert(lvm_params.excl_uuid, optarg, optarg)) { + fprintf(stderr, "Failed to allocate hash entry\n"); + goto out; + } + break; case 'T': start_timeout = atoi(optarg); if (start_timeout <= 0) { @@ -386,20 +427,14 @@ int main(int argc, char *argv[]) exit(0); break; + default: + usage(argv[0], stderr); + exit(2); } } check_permissions(); - /* Setting debug options on an existing clvmd */ - if (debug_opt && !check_local_clvmd()) { - - /* Sending to stderr makes no sense for a detached daemon */ - if (debug == DEBUG_STDERR) - debug = DEBUG_SYSLOG; - return debug_clvmd(debug, clusterwide_opt)==1?0:1; - } - /* * Switch to C locale to avoid reading large locale-archive file * used by some glibc (on some distributions it takes over 100MB). @@ -408,12 +443,19 @@ int main(int argc, char *argv[]) if (setenv("LANG", "C", 1)) perror("Cannot set LANG to C"); + /* Setting debug options on an existing clvmd */ + if (debug_opt && !check_local_clvmd()) { + dm_hash_destroy(lvm_params.excl_uuid); + return debug_clvmd(debug_arg, clusterwide_opt)==1?0:1; + } + + clvmd_set_debug(debug_opt); + /* Fork into the background (unless requested not to) */ - if (debug != DEBUG_STDERR) { + if (!foreground_mode) be_daemon(start_timeout); - } - dm_prepare_selinux_context(DEFAULT_RUN_DIR, S_IFDIR); + (void) dm_prepare_selinux_context(DEFAULT_RUN_DIR, S_IFDIR); old_mask = umask(0077); if (dm_create_dir(DEFAULT_RUN_DIR) == 0) { DEBUGLOG("clvmd: unable to create %s directory\n", @@ -447,7 +489,7 @@ int main(int argc, char *argv[]) /* Set up signal handlers, USR1 is for cluster change notifications (in cman) USR2 causes child threads to exit. - HUP causes gulm version to re-read nodes list from CCS. + (HUP used to cause gulm to re-read the nodes list from CCS.) PIPE should be ignored */ signal(SIGUSR2, sigusr2_handler); signal(SIGHUP, sighup_handler); @@ -462,9 +504,14 @@ int main(int argc, char *argv[]) /* Initialise the LVM thread variables */ dm_list_init(&lvm_cmd_head); + if (pthread_attr_init(&stack_attr) || + pthread_attr_setstacksize(&stack_attr, STACK_SIZE)) { + log_sys_error("pthread_attr_init", ""); + exit(1); + } pthread_mutex_init(&lvm_thread_mutex, NULL); pthread_cond_init(&lvm_thread_cond, NULL); - pthread_mutex_init(&lvm_start_mutex, NULL); + pthread_barrier_init(&lvm_start_barrier, NULL, 2); init_lvhash(); /* Start the cluster interface */ @@ -479,16 +526,6 @@ int main(int argc, char *argv[]) syslog(LOG_NOTICE, "Cluster LVM daemon started - connected to CMAN"); } #endif -#ifdef USE_GULM - if (!clops) - if ((cluster_iface == IF_AUTO || cluster_iface == IF_GULM) && (clops = init_gulm_cluster())) { - max_csid_len = GULM_MAX_CSID_LEN; - max_cluster_message = GULM_MAX_CLUSTER_MESSAGE; - max_cluster_member_name_len = GULM_MAX_CLUSTER_MEMBER_NAME_LEN; - using_gulm = 1; - syslog(LOG_NOTICE, "Cluster LVM daemon started - connected to GULM"); - } -#endif #ifdef USE_COROSYNC if (!clops) if (((cluster_iface == IF_AUTO || cluster_iface == IF_COROSYNC) && (clops = init_corosync_cluster()))) { @@ -526,7 +563,6 @@ int main(int argc, char *argv[]) DEBUGLOG("Cluster ready, doing some more initialisation\n"); /* Save our CSID */ - uname(&nodeinfo); clops->get_our_csid(our_csid); /* Initialise the FD list head */ @@ -553,16 +589,12 @@ int main(int argc, char *argv[]) DEBUGLOG("starting LVM thread\n"); /* Don't let anyone else to do work until we are started */ - pthread_mutex_lock(&lvm_start_mutex); - lvm_params.using_gulm = using_gulm; - lvm_params.argv = argv; - pthread_create(&lvm_thread, NULL, (lvm_pthread_fn_t*)lvm_thread_fn, - (void *)&lvm_params); + pthread_create(&lvm_thread, &stack_attr, lvm_thread_fn, &lvm_params); + + /* Don't start until the LVM thread is ready */ + pthread_barrier_wait(&lvm_start_barrier); /* Tell the rest of the cluster our version number */ - /* CMAN can do this immediately, gulm needs to wait until - the core initialisation has finished and the node list - has been gathered */ if (clops->cluster_init_completed) clops->cluster_init_completed(); @@ -576,15 +608,39 @@ int main(int argc, char *argv[]) /* Do some work */ main_loop(local_sock, cmd_timeout); + pthread_mutex_lock(&lvm_thread_mutex); + pthread_cond_signal(&lvm_thread_cond); + pthread_mutex_unlock(&lvm_thread_mutex); + if ((errno = pthread_join(lvm_thread, NULL))) + log_sys_error("pthread_join", ""); + close_local_sock(local_sock); destroy_lvm(); - return 0; + for (newfd = local_client_head.next; newfd != NULL;) { + delfd = newfd; + newfd = newfd->next; + if (delfd->fd == local_sock) + delfd->fd = -1; + /* + * FIXME: + * needs cleanup code from read_from_local_sock() for now + * break of 'clvmd' may access already free memory here. + */ + safe_close(&(delfd->fd)); + free(delfd); + } + + ret = 0; +out: + dm_hash_destroy(lvm_params.excl_uuid); + + return ret; } -/* Called when the GuLM cluster layer has completed initialisation. +/* Called when the cluster layer has completed initialisation. We send the version message */ -void clvmd_cluster_init_completed() +void clvmd_cluster_init_completed(void) { send_version_message(); } @@ -616,7 +672,8 @@ static int local_rendezvous_callback(struct local_client *thisfd, char *buf, if (client_fd >= 0) { newfd = malloc(sizeof(struct local_client)); if (!newfd) { - close(client_fd); + if (close(client_fd)) + log_sys_error("close", "socket"); return 1; } @@ -666,9 +723,9 @@ static int local_pipe_callback(struct local_client *thisfd, char *buf, /* EOF on pipe or an error, close it */ if (len <= 0) { - int jstat; void *ret = &status; - close(thisfd->fd); + if (close(thisfd->fd)) + log_sys_error("close", "local_pipe"); /* Clear out the cross-link */ if (thisfd->bits.pipe.client != NULL) @@ -677,7 +734,10 @@ static int local_pipe_callback(struct local_client *thisfd, char *buf, /* Reap child thread */ if (thisfd->bits.pipe.threadid) { - jstat = pthread_join(thisfd->bits.pipe.threadid, &ret); + if ((errno = pthread_join(thisfd->bits.pipe.threadid, + &ret))) + log_sys_error("pthread_join", ""); + thisfd->bits.pipe.threadid = 0; if (thisfd->bits.pipe.client != NULL) thisfd->bits.pipe.client->bits.localsock. @@ -689,27 +749,25 @@ static int local_pipe_callback(struct local_client *thisfd, char *buf, status, sock_client); /* But has the client gone away ?? */ if (sock_client == NULL) { - DEBUGLOG - ("Got PIPE response for dead client, ignoring it\n"); + DEBUGLOG("Got PIPE response for dead client, ignoring it\n"); } else { /* If error then just return that code */ if (status) send_local_reply(sock_client, status, sock_client->fd); else { - if (sock_client->bits.localsock.state == - POST_COMMAND) { + /* FIXME: closer inspect this code since state is write thread protected */ + pthread_mutex_lock(&sock_client->bits.localsock.mutex); + if (sock_client->bits.localsock.state == POST_COMMAND) { + pthread_mutex_unlock(&sock_client->bits.localsock.mutex); send_local_reply(sock_client, 0, sock_client->fd); - } else // PRE_COMMAND finished. - { - if ( - (status = - distribute_command(sock_client)) != - 0) send_local_reply(sock_client, - EFBIG, - sock_client-> - fd); + } else { + /* PRE_COMMAND finished. */ + pthread_mutex_unlock(&sock_client->bits.localsock.mutex); + if ((status = distribute_command(sock_client))) + send_local_reply(sock_client, EFBIG, + sock_client->fd); } } } @@ -774,9 +832,10 @@ static void request_timed_out(struct local_client *client) /* This is where the real work happens */ static void main_loop(int local_sock, int cmd_timeout) { + sigset_t ss; + DEBUGLOG("Using timeout of %d seconds\n", cmd_timeout); - sigset_t ss; sigemptyset(&ss); sigaddset(&ss, SIGINT); sigaddset(&ss, SIGTERM); @@ -827,7 +886,6 @@ static void main_loop(int local_sock, int cmd_timeout) struct local_client *free_fd; lastfd->next = thisfd->next; free_fd = thisfd; - thisfd = lastfd; DEBUGLOG("removeme set for fd %d\n", free_fd->fd); @@ -863,7 +921,6 @@ static void main_loop(int local_sock, int cmd_timeout) ret, errno); lastfd->next = thisfd->next; free_fd = thisfd; - thisfd = lastfd; safe_close(&(free_fd->fd)); /* Queue cleanup, this also frees the client struct */ @@ -987,7 +1044,10 @@ static void be_daemon(int timeout) exit(3); } - pipe(child_pipe); + if (pipe(child_pipe)) { + perror("Error creating pipe"); + exit(3); + } switch (fork()) { case -1: @@ -995,11 +1055,11 @@ static void be_daemon(int timeout) exit(2); case 0: /* Child */ - close(child_pipe[0]); + (void) close(child_pipe[0]); break; default: /* Parent */ - close(child_pipe[1]); + (void) close(child_pipe[1]); wait_for_child(child_pipe[0], timeout); } @@ -1030,9 +1090,9 @@ static int read_from_local_sock(struct local_client *thisfd) int len; int argslen; int missing_len; - char buffer[PIPE_BUF]; + char buffer[PIPE_BUF + 1]; - len = read(thisfd->fd, buffer, sizeof(buffer)); + len = read(thisfd->fd, buffer, sizeof(buffer) - 1); if (len == -1 && errno == EINTR) return 1; @@ -1041,7 +1101,6 @@ static int read_from_local_sock(struct local_client *thisfd) /* EOF or error on socket */ if (len <= 0) { int *status; - int jstat; DEBUGLOG("EOF on local socket: inprogress=%d\n", thisfd->bits.localsock.in_progress); @@ -1068,9 +1127,10 @@ static int read_from_local_sock(struct local_client *thisfd) pthread_cond_signal(&thisfd->bits.localsock.cond); pthread_mutex_unlock(&thisfd->bits.localsock.mutex); - jstat = - pthread_join(thisfd->bits.localsock.threadid, - (void **) &status); + if ((errno = pthread_join(thisfd->bits.localsock.threadid, + (void **) &status))) + log_sys_error("pthread_join", ""); + DEBUGLOG("Joined child thread\n"); thisfd->bits.localsock.threadid = 0; @@ -1083,8 +1143,8 @@ static int read_from_local_sock(struct local_client *thisfd) struct local_client *lastfd = NULL; struct local_client *free_fd = NULL; - close(thisfd->bits.localsock.pipe_client->fd); /* Close pipe */ - close(thisfd->bits.localsock.pipe); + (void) close(thisfd->bits.localsock.pipe_client->fd); /* Close pipe */ + (void) close(thisfd->bits.localsock.pipe); /* Remove pipe client */ for (newfd = &local_client_head; newfd != NULL; @@ -1122,6 +1182,7 @@ static int read_from_local_sock(struct local_client *thisfd) struct clvm_header *inheader; int status; + buffer[len] = 0; /* Ensure \0 terminated */ inheader = (struct clvm_header *) buffer; /* Fill in the client ID */ @@ -1129,20 +1190,16 @@ static int read_from_local_sock(struct local_client *thisfd) /* If we are already busy then return an error */ if (thisfd->bits.localsock.in_progress) { - struct clvm_header reply; - reply.cmd = CLVMD_CMD_REPLY; - reply.status = EBUSY; - reply.arglen = 0; - reply.flags = 0; + struct clvm_header reply = { + .cmd = CLVMD_CMD_REPLY, + .status = EBUSY + }; send_message(&reply, sizeof(reply), our_csid, thisfd->fd, "Error sending EBUSY reply to local user"); return len; } - /* Free any old buffer space */ - free(thisfd->bits.localsock.cmd); - /* See if we have the whole message */ argslen = len - strlen(inheader->node) - sizeof(struct clvm_header); @@ -1151,15 +1208,30 @@ static int read_from_local_sock(struct local_client *thisfd) if (missing_len < 0) missing_len = 0; + /* We need at least sizeof(struct clvm_header) bytes in buffer */ + if (len < sizeof(struct clvm_header) || argslen < 0 || + missing_len > MAX_MISSING_LEN) { + struct clvm_header reply = { + .cmd = CLVMD_CMD_REPLY, + .status = EINVAL + }; + send_message(&reply, sizeof(reply), our_csid, + thisfd->fd, + "Error sending EINVAL reply to local user"); + return 0; + } + + /* Free any old buffer space */ + free(thisfd->bits.localsock.cmd); + /* Save the message */ thisfd->bits.localsock.cmd = malloc(len + missing_len); if (!thisfd->bits.localsock.cmd) { - struct clvm_header reply; - reply.cmd = CLVMD_CMD_REPLY; - reply.status = ENOMEM; - reply.arglen = 0; - reply.flags = 0; + struct clvm_header reply = { + .cmd = CLVMD_CMD_REPLY, + .status = ENOMEM + }; send_message(&reply, sizeof(reply), our_csid, thisfd->fd, "Error sending ENOMEM reply to local user"); @@ -1174,15 +1246,22 @@ static int read_from_local_sock(struct local_client *thisfd) char *argptr = inheader->node + strlen(inheader->node) + 1; - while (missing_len > 0 && len >= 0) { - DEBUGLOG - ("got %d bytes, need another %d (total %d)\n", - argslen, missing_len, inheader->arglen); + while (missing_len > 0) { + DEBUGLOG("got %d bytes, need another %d (total %d)\n", + argslen, missing_len, inheader->arglen); len = read(thisfd->fd, argptr + argslen, missing_len); - if (len >= 0) { + if (len == -1 && errno == EINTR) + continue; + if (len > 0) { missing_len -= len; argslen += len; + } else { + /* EOF or error on socket */ + DEBUGLOG("EOF on local socket\n"); + free(thisfd->bits.localsock.cmd); + thisfd->bits.localsock.cmd = NULL; + return 0; } } } @@ -1207,13 +1286,12 @@ static int read_from_local_sock(struct local_client *thisfd) /* Check the node name for validity */ if (inheader->node[0] && clops->csid_from_name(csid, inheader->node)) { /* Error, node is not in the cluster */ - struct clvm_header reply; - DEBUGLOG("Unknown node: '%s'\n", inheader->node); + struct clvm_header reply = { + .cmd = CLVMD_CMD_REPLY, + .status = ENOENT + }; - reply.cmd = CLVMD_CMD_REPLY; - reply.status = ENOENT; - reply.flags = 0; - reply.arglen = 0; + DEBUGLOG("Unknown node: '%s'\n", inheader->node); send_message(&reply, sizeof(reply), our_csid, thisfd->fd, "Error sending ENOENT reply to local user"); @@ -1234,17 +1312,29 @@ static int read_from_local_sock(struct local_client *thisfd) } /* Create a pipe and add the reading end to our FD list */ - pipe(comms_pipe); + if (pipe(comms_pipe)) { + struct clvm_header reply = { + .cmd = CLVMD_CMD_REPLY, + .status = EBUSY + }; + + DEBUGLOG("creating pipe failed: %s\n", strerror(errno)); + send_message(&reply, sizeof(reply), our_csid, + thisfd->fd, + "Error sending EBUSY reply to local user"); + return len; + } + newfd = malloc(sizeof(struct local_client)); if (!newfd) { - struct clvm_header reply; - close(comms_pipe[0]); - close(comms_pipe[1]); - - reply.cmd = CLVMD_CMD_REPLY; - reply.status = ENOMEM; - reply.arglen = 0; - reply.flags = 0; + struct clvm_header reply = { + .cmd = CLVMD_CMD_REPLY, + .status = ENOMEM + }; + + (void) close(comms_pipe[0]); + (void) close(comms_pipe[1]); + send_message(&reply, sizeof(reply), our_csid, thisfd->fd, "Error sending ENOMEM reply to local user"); @@ -1279,8 +1369,8 @@ static int read_from_local_sock(struct local_client *thisfd) thisfd->bits.localsock.in_progress = TRUE; thisfd->bits.localsock.state = PRE_COMMAND; DEBUGLOG("Creating pre&post thread\n"); - status = pthread_create(&thisfd->bits.localsock.threadid, NULL, - pre_and_post_thread, thisfd); + status = pthread_create(&thisfd->bits.localsock.threadid, + &stack_attr, pre_and_post_thread, thisfd); DEBUGLOG("Created pre&post thread, state = %d\n", status); } return len; @@ -1306,7 +1396,10 @@ static int distribute_command(struct local_client *thisfd) int len = thisfd->bits.localsock.cmd_len; thisfd->xid = global_xid++; - DEBUGLOG("distribute command: XID = %d\n", thisfd->xid); + DEBUGLOG("distribute command: XID = %d, flags=0x%x (%s%s)\n", + thisfd->xid, inheader->flags, + (inheader->flags & CLVMD_FLAG_LOCAL) ? "LOCAL" : "", + (inheader->flags & CLVMD_FLAG_REMOTE) ? "REMOTE" : ""); /* Forward it to other nodes in the cluster if needed */ if (!(inheader->flags & CLVMD_FLAG_LOCAL)) { @@ -1319,7 +1412,11 @@ static int distribute_command(struct local_client *thisfd) thisfd->bits.localsock.in_progress = TRUE; thisfd->bits.localsock.sent_out = TRUE; - /* Do it here first */ + /* + * Send to local node first, even if CLVMD_FLAG_REMOTE + * is set so we still get a reply if this is the + * only node. + */ add_to_lvmqueue(thisfd, inheader, len, NULL); DEBUGLOG("Sending message to all cluster nodes\n"); @@ -1372,7 +1469,6 @@ static void process_remote_command(struct clvm_header *msg, int msglen, int fd, int replylen = 0; int buflen = max_cluster_message - sizeof(struct clvm_header) - 1; int status; - int msg_malloced = 0; /* Get the node name as we /may/ need it later */ clops->name_from_csid(csid, nodename); @@ -1431,9 +1527,9 @@ static void process_remote_command(struct clvm_header *msg, int msglen, int fd, if (replyargs != NULL) { /* Run the command */ - status = - do_command(NULL, msg, msglen, &replyargs, buflen, - &replylen); + /* FIXME: usage of init_test() is unprotected */ + status = do_command(NULL, msg, msglen, &replyargs, + buflen, &replylen); } else { status = ENOMEM; } @@ -1481,10 +1577,6 @@ static void process_remote_command(struct clvm_header *msg, int msglen, int fd, } } - /* Free buffer if it was malloced */ - if (msg_malloced) { - free(msg); - } free(replyargs); } @@ -1554,11 +1646,6 @@ static __attribute__ ((noreturn)) void *pre_and_post_thread(void *arg) DEBUGLOG("in sub thread: client = %p\n", client); pthread_mutex_lock(&client->bits.localsock.mutex); - /* Don't start until the LVM thread is ready */ - pthread_mutex_lock(&lvm_start_mutex); - pthread_mutex_unlock(&lvm_start_mutex); - DEBUGLOG("Sub thread ready for work.\n"); - /* Ignore SIGUSR1 (handled by master process) but enable SIGUSR2 (kills subthreads) */ sigemptyset(&ss); @@ -1572,6 +1659,7 @@ static __attribute__ ((noreturn)) void *pre_and_post_thread(void *arg) /* Loop around doing PRE and POST functions until the client goes away */ while (!client->bits.localsock.finished) { /* Execute the code */ + /* FIXME: usage of init_test() is unprotected as in do_command() */ status = do_pre_command(client); if (status) @@ -1654,7 +1742,12 @@ static int process_local_command(struct clvm_header *msg, int msglen, if (replybuf == NULL) return -1; - status = do_command(client, msg, msglen, &replybuf, buflen, &replylen); + /* If remote flag is set, just set a successful status code. */ + if (msg->flags & CLVMD_FLAG_REMOTE) + status = 0; + else + /* FIXME: usage of init_test() is unprotected */ + status = do_command(client, msg, msglen, &replybuf, buflen, &replylen); if (status) client->bits.localsock.all_success = 0; @@ -1723,13 +1816,18 @@ static void send_local_reply(struct local_client *client, int status, int fd) } /* Add in the size of our header */ - message_len = message_len + sizeof(struct clvm_header) + 1; - replybuf = malloc(message_len); + message_len = message_len + sizeof(struct clvm_header); + if (!(replybuf = malloc(message_len))) { + DEBUGLOG("Memory allocation fails\n"); + return; + } clientreply = (struct clvm_header *) replybuf; clientreply->status = status; clientreply->cmd = CLVMD_CMD_REPLY; clientreply->node[0] = '\0'; + clientreply->xid = 0; + clientreply->clientid = 0; clientreply->flags = 0; ptr = clientreply->args; @@ -1764,7 +1862,7 @@ static void send_local_reply(struct local_client *client, int status, int fd) /* Terminate with an empty node name */ *ptr = '\0'; - clientreply->arglen = ptr - clientreply->args + 1; + clientreply->arglen = ptr - clientreply->args; /* And send it */ send_message(replybuf, message_len, our_csid, fd, @@ -1795,7 +1893,7 @@ static void free_reply(struct local_client *client) } /* Send our version number to the cluster */ -static void send_version_message() +static void send_version_message(void) { char message[sizeof(struct clvm_header) + sizeof(int) * 3]; struct clvm_header *msg = (struct clvm_header *) message; @@ -1847,7 +1945,7 @@ static int send_message(void *buf, int msglen, const char *csid, int fd, break; } - len = write(fd, buf + ptr, msglen - ptr); + len = write(fd, (char*)buf + ptr, msglen - ptr); if (len <= 0) { if (errno == EINTR) @@ -1900,11 +1998,11 @@ static int process_work_item(struct lvm_thread_cmd *cmd) /* * Routine that runs in the "LVM thread". */ -static void lvm_thread_fn(void *arg) +static void *lvm_thread_fn(void *arg) { - struct dm_list *cmdl, *tmp; sigset_t ss; struct lvm_startup_params *lvm_params = arg; + struct lvm_thread_cmd *cmd; DEBUGLOG("LVM thread function started\n"); @@ -1915,24 +2013,22 @@ static void lvm_thread_fn(void *arg) pthread_sigmask(SIG_BLOCK, &ss, NULL); /* Initialise the interface to liblvm */ - init_clvm(lvm_params->using_gulm, lvm_params->argv); + init_clvm(lvm_params->excl_uuid); /* Allow others to get moving */ - pthread_mutex_unlock(&lvm_start_mutex); + pthread_barrier_wait(&lvm_start_barrier); + DEBUGLOG("Sub thread ready for work.\n"); /* Now wait for some actual work */ - for (;;) { - DEBUGLOG("LVM thread waiting for work\n"); + pthread_mutex_lock(&lvm_thread_mutex); - pthread_mutex_lock(&lvm_thread_mutex); - if (dm_list_empty(&lvm_cmd_head)) + while (!quit) { + if (dm_list_empty(&lvm_cmd_head)) { + DEBUGLOG("LVM thread waiting for work\n"); pthread_cond_wait(&lvm_thread_cond, &lvm_thread_mutex); - - dm_list_iterate_safe(cmdl, tmp, &lvm_cmd_head) { - struct lvm_thread_cmd *cmd; - - cmd = - dm_list_struct_base(cmdl, struct lvm_thread_cmd, list); + } else { + cmd = dm_list_item(dm_list_first(&lvm_cmd_head), + struct lvm_thread_cmd); dm_list_del(&cmd->list); pthread_mutex_unlock(&lvm_thread_mutex); @@ -1942,8 +2038,11 @@ static void lvm_thread_fn(void *arg) pthread_mutex_lock(&lvm_thread_mutex); } - pthread_mutex_unlock(&lvm_thread_mutex); } + + pthread_mutex_unlock(&lvm_thread_mutex); + + pthread_exit(NULL); } /* Pass down some work to the LVM thread */ @@ -1994,42 +2093,52 @@ static int add_to_lvmqueue(struct local_client *client, struct clvm_header *msg, static int check_local_clvmd(void) { int local_socket; - struct sockaddr_un sockaddr; int ret = 0; + struct sockaddr_un sockaddr = { .sun_family = AF_UNIX }; + + if (!dm_strncpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(sockaddr.sun_path))) { + log_error("%s: clvmd socket name too long.", CLVMD_SOCKNAME); + return -1; + } /* Open local socket */ if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) { + log_sys_error("socket", "local socket"); return -1; } - memset(&sockaddr, 0, sizeof(sockaddr)); - memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME)); - sockaddr.sun_family = AF_UNIX; - if (connect(local_socket,(struct sockaddr *) &sockaddr, sizeof(sockaddr))) { + log_sys_error("connect", "local socket"); ret = -1; } - close(local_socket); + if (close(local_socket)) + log_sys_error("close", "local socket"); + return ret; } static void close_local_sock(int local_socket) { if (local_socket != -1 && close(local_socket)) - stack; + log_sys_error("close", CLVMD_SOCKNAME); if (CLVMD_SOCKNAME[0] != '\0' && unlink(CLVMD_SOCKNAME)) stack; } /* Open the local socket, that's the one we talk to libclvm down */ -static int open_local_sock() +static int open_local_sock(void) { - int local_socket = -1; - struct sockaddr_un sockaddr; mode_t old_mask; + int local_socket = -1; + struct sockaddr_un sockaddr = { .sun_family = AF_UNIX }; + + if (!dm_strncpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(sockaddr.sun_path))) { + log_error("%s: clvmd socket name too long.", CLVMD_SOCKNAME); + return -1; + } close_local_sock(local_socket); @@ -2046,11 +2155,9 @@ static int open_local_sock() /* Set Close-on-exec & non-blocking */ if (fcntl(local_socket, F_SETFD, 1)) DEBUGLOG("setting CLOEXEC on local_socket failed: %s\n", strerror(errno)); - fcntl(local_socket, F_SETFL, fcntl(local_socket, F_GETFL, 0) | O_NONBLOCK); + if (fcntl(local_socket, F_SETFL, fcntl(local_socket, F_GETFL, 0) | O_NONBLOCK)) + DEBUGLOG("setting O_NONBLOCK on local_socket failed: %s\n", strerror(errno)); - memset(&sockaddr, 0, sizeof(sockaddr)); - memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME)); - sockaddr.sun_family = AF_UNIX; if (bind(local_socket, (struct sockaddr *) &sockaddr, sizeof(sockaddr))) { log_error("can't bind local socket: %m"); @@ -2071,7 +2178,7 @@ error: return -1; } -void process_message(struct local_client *client, const char *buf, int len, +void process_message(struct local_client *client, char *buf, int len, const char *csid) { struct clvm_header *inheader; @@ -2108,7 +2215,7 @@ static struct local_client *find_client(int clientid) { struct local_client *thisfd; for (thisfd = &local_client_head; thisfd != NULL; thisfd = thisfd->next) { - if (thisfd->fd == ntohl(clientid)) + if (thisfd->fd == (int)ntohl(clientid)) return thisfd; } return NULL; @@ -2170,8 +2277,6 @@ static if_type_t parse_cluster_interface(char *ifname) iface = IF_AUTO; if (!strcmp(ifname, "cman")) iface = IF_CMAN; - if (!strcmp(ifname, "gulm")) - iface = IF_GULM; if (!strcmp(ifname, "openais")) iface = IF_OPENAIS; if (!strcmp(ifname, "corosync")) @@ -2187,7 +2292,7 @@ static if_type_t parse_cluster_interface(char *ifname) * only called if the command-line option is not present, and if it fails * we still try the interfaces in order. */ -static if_type_t get_cluster_type() +static if_type_t get_cluster_type(void) { #ifdef HAVE_COROSYNC_CONFDB_H confdb_handle_t handle; @@ -2227,6 +2332,9 @@ static if_type_t get_cluster_type() if (result != CS_OK) goto out; + if (namelen >= sizeof(buf)) + namelen = sizeof(buf) - 1; + buf[namelen] = '\0'; type = parse_cluster_interface(buf); DEBUGLOG("got interface type '%s' from confdb\n", buf); diff --git a/daemons/clvmd/clvmd.h b/daemons/clvmd/clvmd.h index ccc79cc..5bad43a 100644 --- a/daemons/clvmd/clvmd.h +++ b/daemons/clvmd/clvmd.h @@ -112,11 +112,14 @@ extern void cmd_client_cleanup(struct local_client *client); extern int add_client(struct local_client *new_client); extern void clvmd_cluster_init_completed(void); -extern void process_message(struct local_client *client, const char *buf, +extern void process_message(struct local_client *client, char *buf, int len, const char *csid); extern void debuglog(const char *fmt, ... ) __attribute__ ((format(printf, 1, 2))); +void clvmd_set_debug(debug_t new_de); +debug_t clvmd_get_debug(void); + int sync_lock(const char *resource, int mode, int flags, int *lockid); int sync_unlock(const char *resource, int lockid); diff --git a/daemons/clvmd/lvm-functions.c b/daemons/clvmd/lvm-functions.c index 214f229..6793752 100644 --- a/daemons/clvmd/lvm-functions.c +++ b/daemons/clvmd/lvm-functions.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -109,10 +109,9 @@ static const char *decode_full_locking_cmd(uint32_t cmdl) break; } - sprintf(buf, "0x%x %s (%s|%s%s%s%s%s%s)", cmdl, command, type, scope, + sprintf(buf, "0x%x %s (%s|%s%s%s%s%s)", cmdl, command, type, scope, cmdl & LCK_NONBLOCK ? "|NONBLOCK" : "", cmdl & LCK_HOLD ? "|HOLD" : "", - cmdl & LCK_LOCAL ? "|LOCAL" : "", cmdl & LCK_CLUSTER_VG ? "|CLUSTER_VG" : "", cmdl & LCK_CACHE ? "|CACHE" : ""); @@ -132,12 +131,14 @@ static const char *decode_flags(unsigned char flags) static char buf[128]; int len; - len = sprintf(buf, "0x%x ( %s%s%s%s%s)", flags, + len = sprintf(buf, "0x%x ( %s%s%s%s%s%s%s)", flags, flags & LCK_PARTIAL_MODE ? "PARTIAL_MODE|" : "", flags & LCK_MIRROR_NOSYNC_MODE ? "MIRROR_NOSYNC|" : "", flags & LCK_DMEVENTD_MONITOR_MODE ? "DMEVENTD_MONITOR|" : "", flags & LCK_ORIGIN_ONLY_MODE ? "ORIGIN_ONLY|" : "", - flags & LCK_CONVERT ? "CONVERT|" : ""); + flags & LCK_TEST_MODE ? "TEST|" : "", + flags & LCK_CONVERT ? "CONVERT|" : "", + flags & LCK_DMEVENTD_MONITOR_IGNORE ? "DMEVENTD_MONITOR_IGNORE|" : ""); if (len > 1) buf[len - 2] = ' '; @@ -147,7 +148,7 @@ static const char *decode_flags(unsigned char flags) return buf; } -char *get_last_lvm_error() +char *get_last_lvm_error(void) { return last_error; } @@ -166,11 +167,15 @@ static struct lv_info *lookup_info(const char *resource) return lvi; } -static void insert_info(const char *resource, struct lv_info *lvi) +static int insert_info(const char *resource, struct lv_info *lvi) { + int ret; + pthread_mutex_lock(&lv_hash_lock); - dm_hash_insert(lv_hash, resource, lvi); + ret = dm_hash_insert(lv_hash, resource, lvi); pthread_mutex_unlock(&lv_hash_lock); + + return ret; } static void remove_info(const char *resource) @@ -194,16 +199,16 @@ static int get_current_lock(char *resource) } -void init_lvhash() +void init_lvhash(void) { /* Create hash table for keeping LV locks & status */ - lv_hash = dm_hash_create(100); + lv_hash = dm_hash_create(1024); pthread_mutex_init(&lv_hash_lock, NULL); pthread_mutex_init(&lvm_lock, NULL); } /* Called at shutdown to tidy the lockspace */ -void destroy_lvhash() +void destroy_lvhash(void) { struct dm_hash_node *v; struct lv_info *lvi; @@ -235,14 +240,25 @@ static int hold_lock(char *resource, int mode, int flags) int saved_errno; struct lv_info *lvi; + if (test_mode()) + return 0; + /* Mask off invalid options */ flags &= LCKF_NOQUEUE | LCKF_CONVERT; lvi = lookup_info(resource); - if (lvi && lvi->lock_mode == mode) { - DEBUGLOG("hold_lock, lock mode %d already held\n", mode); - return 0; + if (lvi) { + if (lvi->lock_mode == mode) { + DEBUGLOG("hold_lock, lock mode %d already held\n", + mode); + return 0; + } + if ((lvi->lock_mode == LCK_EXCL) && (mode == LCK_WRITE)) { + DEBUGLOG("hold_lock, lock already held LCK_EXCL, " + "ignoring LCK_WRITE request"); + return 0; + } } /* Only allow explicit conversions */ @@ -265,8 +281,10 @@ static int hold_lock(char *resource, int mode, int flags) errno = saved_errno; } else { lvi = malloc(sizeof(struct lv_info)); - if (!lvi) + if (!lvi) { + errno = ENOMEM; return -1; + } lvi->lock_mode = mode; status = sync_lock(resource, mode, flags & ~LCKF_CONVERT, &lvi->lock_id); @@ -276,7 +294,10 @@ static int hold_lock(char *resource, int mode, int flags) DEBUGLOG("hold_lock. lock at %d failed: %s\n", mode, strerror(errno)); } else - insert_info(resource, lvi); + if (!insert_info(resource, lvi)) { + errno = ENOMEM; + return -1; + } errno = saved_errno; } @@ -290,6 +311,9 @@ static int hold_unlock(char *resource) int status; int saved_errno; + if (test_mode()) + return 0; + if (!(lvi = lookup_info(resource))) { DEBUGLOG("hold_unlock, lock not already held\n"); return 0; @@ -316,7 +340,7 @@ static int hold_unlock(char *resource) */ /* Activate LV exclusive or non-exclusive */ -static int do_activate_lv(char *resource, unsigned char lock_flags, int mode) +static int do_activate_lv(char *resource, unsigned char command, unsigned char lock_flags, int mode) { int oldmode; int status; @@ -326,7 +350,7 @@ static int do_activate_lv(char *resource, unsigned char lock_flags, int mode) /* Is it already open ? */ oldmode = get_current_lock(resource); - if (oldmode == mode && (lock_flags & LCK_CLUSTER_VG)) { + if (oldmode == mode && (command & LCK_CLUSTER_VG)) { DEBUGLOG("do_activate_lv, lock already held at %d\n", oldmode); return 0; /* Nothing to do */ } @@ -349,7 +373,7 @@ static int do_activate_lv(char *resource, unsigned char lock_flags, int mode) * Use lock conversion only if requested, to prevent implicit conversion * of exclusive lock to shared one during activation. */ - if (lock_flags & LCK_CLUSTER_VG) { + if (command & LCK_CLUSTER_VG) { status = hold_lock(resource, mode, LCKF_NOQUEUE | (lock_flags & LCK_CONVERT ? LCKF_CONVERT:0)); if (status) { /* Return an LVM-sensible error for this. @@ -369,9 +393,9 @@ static int do_activate_lv(char *resource, unsigned char lock_flags, int mode) goto error; if (lvi.suspended) { - memlock_inc(cmd); + critical_section_inc(cmd, "resuming"); if (!lv_resume(cmd, resource, 0)) { - memlock_dec(cmd); + critical_section_dec(cmd, "resumed"); goto error; } } @@ -389,55 +413,62 @@ error: } /* Resume the LV if it was active */ -static int do_resume_lv(char *resource, unsigned char lock_flags) +static int do_resume_lv(char *resource, unsigned char command, unsigned char lock_flags) { - int oldmode; + int oldmode, origin_only, exclusive, revert; /* Is it open ? */ oldmode = get_current_lock(resource); - if (oldmode == -1 && (lock_flags & LCK_CLUSTER_VG)) { + if (oldmode == -1 && (command & LCK_CLUSTER_VG)) { DEBUGLOG("do_resume_lv, lock not already held\n"); return 0; /* We don't need to do anything */ } + origin_only = (lock_flags & LCK_ORIGIN_ONLY_MODE) ? 1 : 0; + exclusive = (oldmode == LCK_EXCL) ? 1 : 0; + revert = (lock_flags & LCK_REVERT_MODE) ? 1 : 0; - if (!lv_resume_if_active(cmd, resource, (lock_flags & LCK_ORIGIN_ONLY_MODE) ? 1 : 0)) + if (!lv_resume_if_active(cmd, resource, origin_only, exclusive, revert)) return EIO; return 0; } /* Suspend the device if active */ -static int do_suspend_lv(char *resource, unsigned char lock_flags) +static int do_suspend_lv(char *resource, unsigned char command, unsigned char lock_flags) { int oldmode; struct lvinfo lvi; unsigned origin_only = (lock_flags & LCK_ORIGIN_ONLY_MODE) ? 1 : 0; + unsigned exclusive; /* Is it open ? */ oldmode = get_current_lock(resource); - if (oldmode == -1 && (lock_flags & LCK_CLUSTER_VG)) { + if (oldmode == -1 && (command & LCK_CLUSTER_VG)) { DEBUGLOG("do_suspend_lv, lock not already held\n"); return 0; /* Not active, so it's OK */ } + exclusive = (oldmode == LCK_EXCL) ? 1 : 0; + /* Only suspend it if it exists */ if (!lv_info_by_lvid(cmd, resource, origin_only, &lvi, 0, 0)) return EIO; - if (lvi.exists && !lv_suspend_if_active(cmd, resource, origin_only)) + if (lvi.exists && + !lv_suspend_if_active(cmd, resource, origin_only, exclusive)) return EIO; return 0; } -static int do_deactivate_lv(char *resource, unsigned char lock_flags) +static int do_deactivate_lv(char *resource, unsigned char command, unsigned char lock_flags) { int oldmode; int status; /* Is it open ? */ oldmode = get_current_lock(resource); - if (oldmode == -1 && (lock_flags & LCK_CLUSTER_VG)) { + if (oldmode == -1 && (command & LCK_CLUSTER_VG)) { DEBUGLOG("do_deactivate_lock, lock not already held\n"); return 0; /* We don't need to do anything */ } @@ -445,7 +476,7 @@ static int do_deactivate_lv(char *resource, unsigned char lock_flags) if (!lv_deactivate(cmd, resource)) return EIO; - if (lock_flags & LCK_CLUSTER_VG) { + if (command & LCK_CLUSTER_VG) { status = hold_unlock(resource); if (status) return errno; @@ -479,8 +510,8 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource) { int status = 0; - DEBUGLOG("do_lock_lv: resource '%s', cmd = %s, flags = %s, memlock = %d\n", - resource, decode_locking_cmd(command), decode_flags(lock_flags), memlock()); + DEBUGLOG("do_lock_lv: resource '%s', cmd = %s, flags = %s, critical_section = %d\n", + resource, decode_locking_cmd(command), decode_flags(lock_flags), critical_section()); if (!cmd->config_valid || config_files_changed(cmd)) { /* Reinitialise various settings inc. logging, filters */ @@ -494,10 +525,14 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource) if (lock_flags & LCK_MIRROR_NOSYNC_MODE) init_mirror_in_sync(1); - if (lock_flags & LCK_DMEVENTD_MONITOR_MODE) - init_dmeventd_monitor(1); - else - init_dmeventd_monitor(0); + if (lock_flags & LCK_DMEVENTD_MONITOR_IGNORE) + init_dmeventd_monitor(DMEVENTD_MONITOR_IGNORE); + else { + if (lock_flags & LCK_DMEVENTD_MONITOR_MODE) + init_dmeventd_monitor(1); + else + init_dmeventd_monitor(0); + } cmd->partial_activation = (lock_flags & LCK_PARTIAL_MODE) ? 1 : 0; @@ -506,24 +541,24 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource) switch (command & LCK_MASK) { case LCK_LV_EXCLUSIVE: - status = do_activate_lv(resource, lock_flags, LCK_EXCL); + status = do_activate_lv(resource, command, lock_flags, LCK_EXCL); break; case LCK_LV_SUSPEND: - status = do_suspend_lv(resource, lock_flags); + status = do_suspend_lv(resource, command, lock_flags); break; case LCK_UNLOCK: case LCK_LV_RESUME: /* if active */ - status = do_resume_lv(resource, lock_flags); + status = do_resume_lv(resource, command, lock_flags); break; case LCK_LV_ACTIVATE: - status = do_activate_lv(resource, lock_flags, LCK_READ); + status = do_activate_lv(resource, command, lock_flags, LCK_READ); break; case LCK_LV_DEACTIVATE: - status = do_deactivate_lv(resource, lock_flags); + status = do_deactivate_lv(resource, command, lock_flags); break; default: @@ -541,7 +576,7 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource) dm_pool_empty(cmd->mem); pthread_mutex_unlock(&lvm_lock); - DEBUGLOG("Command return is %d, memlock is %d\n", status, memlock()); + DEBUGLOG("Command return is %d, critical_section is %d\n", status, critical_section()); return status; } @@ -554,7 +589,7 @@ int pre_lock_lv(unsigned char command, unsigned char lock_flags, char *resource) LCKF_CONVERT is used always, local node is going to modify metadata */ if ((command & (LCK_SCOPE_MASK | LCK_TYPE_MASK)) == LCK_LV_SUSPEND && - (lock_flags & LCK_CLUSTER_VG)) { + (command & LCK_CLUSTER_VG)) { DEBUGLOG("pre_lock_lv: resource '%s', cmd = %s, flags = %s\n", resource, decode_locking_cmd(command), decode_flags(lock_flags)); @@ -573,7 +608,7 @@ int post_lock_lv(unsigned char command, unsigned char lock_flags, /* Opposite of above, done on resume after a metadata update */ if ((command & (LCK_SCOPE_MASK | LCK_TYPE_MASK)) == LCK_LV_RESUME && - (lock_flags & LCK_CLUSTER_VG)) { + (command & LCK_CLUSTER_VG)) { int oldmode; DEBUGLOG @@ -611,7 +646,7 @@ int do_check_lvm1(const char *vgname) return status == 1 ? 0 : EBUSY; } -int do_refresh_cache() +int do_refresh_cache(void) { DEBUGLOG("Refreshing context\n"); log_notice("Refreshing context"); @@ -633,46 +668,6 @@ int do_refresh_cache() return 0; } - -/* Only called at gulm startup. Drop any leftover VG or P_orphan locks - that might be hanging around if we died for any reason -*/ -static void drop_vg_locks(void) -{ - char vg[128]; - char line[255]; - FILE *vgs = - popen - (LVM_PATH " pvs --config 'log{command_names=0 prefix=\"\"}' --nolocking --noheadings -o vg_name", "r"); - - sync_unlock("P_" VG_ORPHANS, LCK_EXCL); - sync_unlock("P_" VG_GLOBAL, LCK_EXCL); - - if (!vgs) - return; - - while (fgets(line, sizeof(line), vgs)) { - char *vgend; - char *vgstart; - - if (line[strlen(line)-1] == '\n') - line[strlen(line)-1] = '\0'; - - vgstart = line + strspn(line, " "); - vgend = vgstart + strcspn(vgstart, " "); - *vgend = '\0'; - - if (strncmp(vgstart, "WARNING:", 8) == 0) - continue; - - sprintf(vg, "V_%s", vgstart); - sync_unlock(vg, LCK_EXCL); - - } - if (fclose(vgs)) - DEBUGLOG("vgs fclose failed: %s\n", strerror(errno)); -} - /* * Handle VG lock - drop metadata or update lvmcache state */ @@ -689,8 +684,8 @@ void do_lock_vg(unsigned char command, unsigned char lock_flags, char *resource) if (strncmp(resource, "P_#", 3) && !strncmp(resource, "P_", 2)) lock_cmd |= LCK_CACHE; - DEBUGLOG("do_lock_vg: resource '%s', cmd = %s, flags = %s, memlock = %d\n", - resource, decode_full_locking_cmd(lock_cmd), decode_flags(lock_flags), memlock()); + DEBUGLOG("do_lock_vg: resource '%s', cmd = %s, flags = %s, critical_section = %d\n", + resource, decode_full_locking_cmd(lock_cmd), decode_flags(lock_flags), critical_section()); /* P_#global causes a full cache refresh */ if (!strcmp(resource, "P_" VG_GLOBAL)) { @@ -717,49 +712,33 @@ void do_lock_vg(unsigned char command, unsigned char lock_flags, char *resource) } /* - * Compare the uuid with the list of exclusive locks that clvmd - * held before it was restarted, so we can get the right kind - * of lock now we are restarting. - */ -static int was_ex_lock(char *uuid, char **argv) -{ - int optnum = 0; - char *opt = argv[optnum]; - - while (opt) { - if (strcmp(opt, "-E") == 0) { - opt = argv[++optnum]; - if (opt && (strcmp(opt, uuid) == 0)) { - DEBUGLOG("Lock %s is exclusive\n", uuid); - return 1; - } - } - opt = argv[++optnum]; - } - return 0; -} - -/* * Ideally, clvmd should be started before any LVs are active * but this may not be the case... * I suppose this also comes in handy if clvmd crashes, not that it would! */ -static void *get_initial_state(char **argv) +static int get_initial_state(struct dm_hash_table *excl_uuid) { int lock_mode; char lv[64], vg[64], flags[25], vg_flags[25]; char uuid[65]; char line[255]; - FILE *lvs = - popen - (LVM_PATH " lvs --config 'log{command_names=0 prefix=\"\"}' --nolocking --noheadings -o vg_uuid,lv_uuid,lv_attr,vg_attr", - "r"); - - if (!lvs) - return NULL; + char *lvs_cmd; + const char *lvm_binary = getenv("LVM_BINARY") ? : LVM_PATH; + FILE *lvs; + + if (dm_asprintf(&lvs_cmd, "%s lvs --config 'log{command_names=0 prefix=\"\"}' " + "--nolocking --noheadings -o vg_uuid,lv_uuid,lv_attr,vg_attr", + lvm_binary) < 0) + return_0; + + /* FIXME: Maybe link and use liblvm2cmd directly instead of fork */ + if (!(lvs = popen(lvs_cmd, "r"))) { + dm_free(lvs_cmd); + return 0; + } while (fgets(line, sizeof(line), lvs)) { - if (sscanf(line, "%s %s %s %s\n", vg, lv, flags, vg_flags) == 4) { + if (sscanf(line, "%64s %64s %25s %25s\n", vg, lv, flags, vg_flags) == 4) { /* States: s:suspended a:active S:dropped snapshot I:invalid snapshot */ if (strlen(vg) == 38 && /* is is a valid UUID ? */ @@ -782,21 +761,23 @@ static void *get_initial_state(char **argv) memcpy(&uuid[58], &lv[32], 6); uuid[64] = '\0'; - lock_mode = LCK_READ; - /* Look for this lock in the list of EX locks we were passed on the command-line */ - if (was_ex_lock(uuid, argv)) - lock_mode = LCK_EXCL; + lock_mode = (dm_hash_lookup(excl_uuid, uuid)) ? + LCK_EXCL : LCK_READ; DEBUGLOG("getting initial lock for %s\n", uuid); - hold_lock(uuid, lock_mode, LCKF_NOQUEUE); + if (hold_lock(uuid, lock_mode, LCKF_NOQUEUE)) + DEBUGLOG("Failed to hold lock %s\n", uuid); } } } if (fclose(lvs)) DEBUGLOG("lvs fclose failed: %s\n", strerror(errno)); - return NULL; + + dm_free(lvs_cmd); + + return 1; } static void lvm2_log_fn(int level, const char *file, int line, int dm_errno, @@ -863,7 +844,7 @@ void lvm_do_backup(const char *vgname) else log_error("Error backing up metadata, can't find VG for group %s", vgname); - free_vg(vg); + release_vg(vg); dm_pool_empty(cmd->mem); pthread_mutex_unlock(&lvm_lock); @@ -894,10 +875,26 @@ struct dm_hash_node *get_next_excl_lock(struct dm_hash_node *v, char **name) return v; } +void lvm_do_fs_unlock(void) +{ + pthread_mutex_lock(&lvm_lock); + DEBUGLOG("Syncing device names\n"); + fs_unlock(); + pthread_mutex_unlock(&lvm_lock); +} + /* Called to initialise the LVM context of the daemon */ -int init_clvm(int using_gulm, char **argv) +int init_clvm(struct dm_hash_table *excl_uuid) { - if (!(cmd = create_toolcontext(1, NULL))) { + /* Use LOG_DAEMON for syslog messages instead of LOG_USER */ + init_syslog(LOG_DAEMON); + openlog("clvmd", LOG_PID, LOG_DAEMON); + + /* Initialise already held locks */ + if (!get_initial_state(excl_uuid)) + log_error("Cannot load initial lock states."); + + if (!(cmd = create_toolcontext(1, NULL, 0, 1))) { log_error("Failed to allocate command context"); return 0; } @@ -907,21 +904,12 @@ int init_clvm(int using_gulm, char **argv) return 0; } - /* Use LOG_DAEMON for syslog messages instead of LOG_USER */ - init_syslog(LOG_DAEMON); - openlog("clvmd", LOG_PID, LOG_DAEMON); cmd->cmd_line = "clvmd"; /* Check lvm.conf is setup for cluster-LVM */ check_config(); init_ignore_suspended_devices(1); - /* Remove any non-LV locks that may have been left around */ - if (using_gulm) - drop_vg_locks(); - - get_initial_state(argv); - /* Trap log messages so we can pass them back to the user */ init_log_fn(lvm2_log_fn); memlock_inc_daemon(cmd); diff --git a/daemons/clvmd/lvm-functions.h b/daemons/clvmd/lvm-functions.h index 97153d4..565f878 100644 --- a/daemons/clvmd/lvm-functions.h +++ b/daemons/clvmd/lvm-functions.h @@ -27,7 +27,7 @@ extern int post_lock_lv(unsigned char lock_cmd, unsigned char lock_flags, char *resource); extern int do_check_lvm1(const char *vgname); extern int do_refresh_cache(void); -extern int init_clvm(int using_gulm, char **argv); +extern int init_clvm(struct dm_hash_table *excl_uuid); extern void destroy_lvm(void); extern void init_lvhash(void); extern void destroy_lvhash(void); @@ -36,5 +36,6 @@ extern char *get_last_lvm_error(void); extern void do_lock_vg(unsigned char command, unsigned char lock_flags, char *resource); extern struct dm_hash_node *get_next_excl_lock(struct dm_hash_node *v, char **name); +void lvm_do_fs_unlock(void); #endif diff --git a/daemons/clvmd/refresh_clvmd.c b/daemons/clvmd/refresh_clvmd.c index 1e88b5c..28b5625 100644 --- a/daemons/clvmd/refresh_clvmd.c +++ b/daemons/clvmd/refresh_clvmd.c @@ -13,6 +13,8 @@ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* FIXME Remove duplicated functions from this file. */ + /* * Send a command to a running clvmd from the command-line */ @@ -45,7 +47,12 @@ static int _clvmd_sock = -1; static int _open_local_sock(void) { int local_socket; - struct sockaddr_un sockaddr; + struct sockaddr_un sockaddr = { .sun_family = AF_UNIX }; + + if (!dm_strncpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(sockaddr.sun_path))) { + fprintf(stderr, "%s: clvmd socket name too long.", CLVMD_SOCKNAME); + return -1; + } /* Open local socket */ if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) { @@ -53,11 +60,6 @@ static int _open_local_sock(void) return -1; } - memset(&sockaddr, 0, sizeof(sockaddr)); - memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME)); - - sockaddr.sun_family = AF_UNIX; - if (connect(local_socket,(struct sockaddr *) &sockaddr, sizeof(sockaddr))) { int saved_errno = errno; @@ -80,7 +82,7 @@ static int _send_request(const char *inbuf, int inlen, char **retbuf, int no_res char outbuf[PIPE_BUF]; struct clvm_header *outheader = (struct clvm_header *) outbuf; int len; - int off; + unsigned off; int buflen; int err; @@ -149,29 +151,31 @@ static int _send_request(const char *inbuf, int inlen, char **retbuf, int no_res /* Build the structure header and parse-out wildcard node names */ static void _build_header(struct clvm_header *head, int cmd, const char *node, - int len) + unsigned int len) { head->cmd = cmd; head->status = 0; head->flags = 0; + head->xid = 0; head->clientid = 0; - head->arglen = len; - - if (node) { - /* - * Allow a couple of special node names: - * "*" for all nodes, - * "." for the local node only - */ - if (strcmp(node, "*") == 0) { - head->node[0] = '\0'; - } else if (strcmp(node, ".") == 0) { - head->node[0] = '\0'; - head->flags = CLVMD_FLAG_LOCAL; - } else - strcpy(head->node, node); - } else + if (len) + /* 1 byte is used from struct clvm_header.args[1], so -> len - 1 */ + head->arglen = len - 1; + else { + head->arglen = 0; + *head->args = '\0'; + } + + /* + * Translate special node names. + */ + if (!node || !strcmp(node, NODE_ALL)) + head->node[0] = '\0'; + else if (!strcmp(node, NODE_LOCAL)) { head->node[0] = '\0'; + head->flags = CLVMD_FLAG_LOCAL; + } else + strcpy(head->node, node); } /* @@ -198,7 +202,8 @@ static int _cluster_request(char cmd, const char *node, void *data, int len, return 0; _build_header(head, cmd, node, len); - memcpy(head->node + strlen(head->node) + 1, data, len); + if (len) + memcpy(head->node + strlen(head->node) + 1, data, len); status = _send_request(outbuf, sizeof(struct clvm_header) + strlen(head->node) + len, &retbuf, no_response); @@ -290,7 +295,7 @@ int refresh_clvmd(int all_nodes) int status; int i; - status = _cluster_request(CLVMD_CMD_REFRESH, all_nodes?"*":".", args, 0, &response, &num_responses, 0); + status = _cluster_request(CLVMD_CMD_REFRESH, all_nodes ? NODE_ALL : NODE_LOCAL, args, 0, &response, &num_responses, 0); /* If any nodes were down then display them and return an error */ for (i = 0; i < num_responses; i++) { @@ -321,7 +326,7 @@ int restart_clvmd(int all_nodes) { int dummy, status; - status = _cluster_request(CLVMD_CMD_RESTART, all_nodes?"*":".", NULL, 0, NULL, &dummy, 1); + status = _cluster_request(CLVMD_CMD_RESTART, all_nodes ? NODE_ALL : NODE_LOCAL, NULL, 0, NULL, &dummy, 1); /* * FIXME: we cannot receive response, clvmd re-exec before it. @@ -348,9 +353,9 @@ int debug_clvmd(int level, int clusterwide) args[0] = level; if (clusterwide) - nodes = "*"; + nodes = NODE_ALL; else - nodes = "."; + nodes = NODE_LOCAL; status = _cluster_request(CLVMD_CMD_SET_DEBUG, nodes, args, 1, &response, &num_responses, 0); diff --git a/daemons/clvmd/tcp-comms.c b/daemons/clvmd/tcp-comms.c deleted file mode 100644 index 5f86556..0000000 --- a/daemons/clvmd/tcp-comms.c +++ /dev/null @@ -1,502 +0,0 @@ -/* - * Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU Lesser General Public License v.2.1. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * This provides the inter-clvmd communications for a system without CMAN. - * There is a listening TCP socket which accepts new connections in the - * normal way. - * It can also make outgoing connnections to the other clvmd nodes. - */ - -#include "clvmd-common.h" - -#include <pthread.h> -#include <sys/utsname.h> -#include <sys/ioctl.h> -#include <sys/socket.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <stdint.h> -#include <fcntl.h> -#include <stddef.h> -#include <stdint.h> -#include <errno.h> -#include <syslog.h> -#include <netdb.h> -#include <assert.h> - -#include "clvm.h" -#include "clvmd-comms.h" -#include "clvmd.h" -#include "clvmd-gulm.h" - -#define DEFAULT_TCP_PORT 21064 - -static int listen_fd = -1; -static int tcp_port; -struct dm_hash_table *sock_hash; - -static int get_our_ip_address(char *addr, int *family); -static int read_from_tcpsock(struct local_client *fd, char *buf, int len, char *csid, - struct local_client **new_client); - -/* Called by init_cluster() to open up the listening socket */ -int init_comms(unsigned short port) -{ - struct sockaddr_in6 addr; - - sock_hash = dm_hash_create(100); - tcp_port = port ? : DEFAULT_TCP_PORT; - - listen_fd = socket(AF_INET6, SOCK_STREAM, 0); - - if (listen_fd < 0) - { - return -1; - } - else - { - int one = 1; - setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(int)); - setsockopt(listen_fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int)); - } - - memset(&addr, 0, sizeof(addr)); // Bind to INADDR_ANY - addr.sin6_family = AF_INET6; - addr.sin6_port = htons(tcp_port); - - if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) - { - DEBUGLOG("Can't bind to port: %s\n", strerror(errno)); - syslog(LOG_ERR, "Can't bind to port %d, is clvmd already running ?", tcp_port); - close(listen_fd); - return -1; - } - - listen(listen_fd, 5); - - /* Set Close-on-exec */ - fcntl(listen_fd, F_SETFD, 1); - - return 0; -} - -void tcp_remove_client(const char *c_csid) -{ - struct local_client *client; - char csid[GULM_MAX_CSID_LEN]; - unsigned int i; - memcpy(csid, c_csid, sizeof csid); - DEBUGLOG("tcp_remove_client\n"); - - /* Don't actually close the socket here - that's the - job of clvmd.c whch will do the job when it notices the - other end has gone. We just need to remove the client(s) from - the hash table so we don't try to use it for sending any more */ - for (i = 0; i < 2; i++) - { - client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN); - if (client) - { - dm_hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN); - client->removeme = 1; - close(client->fd); - } - /* Look for a mangled one too, on the 2nd iteration. */ - csid[0] ^= 0x80; - } -} - -int alloc_client(int fd, const char *c_csid, struct local_client **new_client) -{ - struct local_client *client; - char csid[GULM_MAX_CSID_LEN]; - memcpy(csid, c_csid, sizeof csid); - - DEBUGLOG("alloc_client %d csid = %s\n", fd, print_csid(csid)); - - /* Create a local_client and return it */ - client = malloc(sizeof(struct local_client)); - if (!client) - { - DEBUGLOG("malloc failed\n"); - return -1; - } - - memset(client, 0, sizeof(struct local_client)); - client->fd = fd; - client->type = CLUSTER_DATA_SOCK; - client->callback = read_from_tcpsock; - if (new_client) - *new_client = client; - - /* Add to our list of node sockets */ - if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN)) - { - DEBUGLOG("alloc_client mangling CSID for second connection\n"); - /* This is a duplicate connection but we can't close it because - the other end may already have started sending. - So, we mangle the IP address and keep it, all sending will - go out of the main FD - */ - csid[0] ^= 0x80; - client->bits.net.flags = 1; /* indicate mangled CSID */ - - /* If it still exists then kill the connection as we should only - ever have one incoming connection from each node */ - if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN)) - { - DEBUGLOG("Multiple incoming connections from node\n"); - syslog(LOG_ERR, " Bogus incoming connection from %d.%d.%d.%d\n", csid[0],csid[1],csid[2],csid[3]); - - free(client); - errno = ECONNREFUSED; - return -1; - } - } - dm_hash_insert_binary(sock_hash, csid, GULM_MAX_CSID_LEN, client); - - return 0; -} - -int get_main_gulm_cluster_fd() -{ - return listen_fd; -} - - -/* Read on main comms (listen) socket, accept it */ -int cluster_fd_gulm_callback(struct local_client *fd, char *buf, int len, const char *csid, - struct local_client **new_client) -{ - int newfd; - struct sockaddr_in6 addr; - socklen_t addrlen = sizeof(addr); - int status; - char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN]; - - DEBUGLOG("cluster_fd_callback\n"); - *new_client = NULL; - newfd = accept(listen_fd, (struct sockaddr *)&addr, &addrlen); - - DEBUGLOG("cluster_fd_callback, newfd=%d (errno=%d)\n", newfd, errno); - if (!newfd) - { - syslog(LOG_ERR, "error in accept: %m"); - errno = EAGAIN; - return -1; /* Don't return an error or clvmd will close the listening FD */ - } - - /* Check that the client is a member of the cluster - and reject if not. - */ - if (gulm_name_from_csid((char *)&addr.sin6_addr, name) < 0) - { - syslog(LOG_ERR, "Got connect from non-cluster node %s\n", - print_csid((char *)&addr.sin6_addr)); - DEBUGLOG("Got connect from non-cluster node %s\n", - print_csid((char *)&addr.sin6_addr)); - close(newfd); - - errno = EAGAIN; - return -1; - } - - status = alloc_client(newfd, (char *)&addr.sin6_addr, new_client); - if (status) - { - DEBUGLOG("cluster_fd_callback, alloc_client failed, status = %d\n", status); - close(newfd); - /* See above... */ - errno = EAGAIN; - return -1; - } - DEBUGLOG("cluster_fd_callback, returning %d, %p\n", newfd, *new_client); - return newfd; -} - -/* Try to get at least 'len' bytes from the socket */ -static int really_read(int fd, char *buf, int len) -{ - int got, offset; - - got = offset = 0; - - do { - got = read(fd, buf+offset, len-offset); - DEBUGLOG("really_read. got %d bytes\n", got); - offset += got; - } while (got > 0 && offset < len); - - if (got < 0) - return got; - else - return offset; -} - - -static int read_from_tcpsock(struct local_client *client, char *buf, int len, char *csid, - struct local_client **new_client) -{ - struct sockaddr_in6 addr; - socklen_t slen = sizeof(addr); - struct clvm_header *header = (struct clvm_header *)buf; - int status; - uint32_t arglen; - - DEBUGLOG("read_from_tcpsock fd %d\n", client->fd); - *new_client = NULL; - - /* Get "csid" */ - getpeername(client->fd, (struct sockaddr *)&addr, &slen); - memcpy(csid, &addr.sin6_addr, GULM_MAX_CSID_LEN); - - /* Read just the header first, then get the rest if there is any. - * Stream sockets, sigh. - */ - status = really_read(client->fd, buf, sizeof(struct clvm_header)); - if (status > 0) - { - int status2; - - arglen = ntohl(header->arglen); - - /* Get the rest */ - if (arglen && arglen < GULM_MAX_CLUSTER_MESSAGE) - { - status2 = really_read(client->fd, buf+status, arglen); - if (status2 > 0) - status += status2; - else - status = status2; - } - } - - DEBUGLOG("read_from_tcpsock, status = %d(errno = %d)\n", status, errno); - - /* Remove it from the hash table if there's an error, clvmd will - remove the socket from its lists and free the client struct */ - if (status == 0 || - (status < 0 && errno != EAGAIN && errno != EINTR)) - { - char remcsid[GULM_MAX_CSID_LEN]; - - memcpy(remcsid, csid, GULM_MAX_CSID_LEN); - close(client->fd); - - /* If the csid was mangled, then make sure we remove the right entry */ - if (client->bits.net.flags) - remcsid[0] ^= 0x80; - dm_hash_remove_binary(sock_hash, remcsid, GULM_MAX_CSID_LEN); - - /* Tell cluster manager layer */ - add_down_node(remcsid); - } - else { - gulm_add_up_node(csid); - /* Send it back to clvmd */ - process_message(client, buf, status, csid); - } - return status; -} - -int gulm_connect_csid(const char *csid, struct local_client **newclient) -{ - int fd; - struct sockaddr_in6 addr; - int status; - int one = 1; - - DEBUGLOG("Connecting socket\n"); - fd = socket(PF_INET6, SOCK_STREAM, 0); - - if (fd < 0) - { - syslog(LOG_ERR, "Unable to create new socket: %m"); - return -1; - } - - addr.sin6_family = AF_INET6; - memcpy(&addr.sin6_addr, csid, GULM_MAX_CSID_LEN); - addr.sin6_port = htons(tcp_port); - - DEBUGLOG("Connecting socket %d\n", fd); - if (connect(fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_in6)) < 0) - { - /* "Connection refused" is "normal" because clvmd may not yet be running - * on that node. - */ - if (errno != ECONNREFUSED) - { - syslog(LOG_ERR, "Unable to connect to remote node: %m"); - } - DEBUGLOG("Unable to connect to remote node: %s\n", strerror(errno)); - close(fd); - return -1; - } - - /* Set Close-on-exec */ - fcntl(fd, F_SETFD, 1); - setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int)); - - status = alloc_client(fd, csid, newclient); - if (status) - close(fd); - else - add_client(*newclient); - - /* If we can connect to it, it must be running a clvmd */ - gulm_add_up_node(csid); - return status; -} - -/* Send a message to a known CSID */ -static int tcp_send_message(void *buf, int msglen, const char *csid, const char *errtext) -{ - int status; - struct local_client *client; - char ourcsid[GULM_MAX_CSID_LEN]; - - assert(csid); - - DEBUGLOG("tcp_send_message, csid = %s, msglen = %d\n", print_csid(csid), msglen); - - /* Don't connect to ourself */ - get_our_gulm_csid(ourcsid); - if (memcmp(csid, ourcsid, GULM_MAX_CSID_LEN) == 0) - return msglen; - - client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN); - if (!client) - { - status = gulm_connect_csid(csid, &client); - if (status) - return -1; - } - DEBUGLOG("tcp_send_message, fd = %d\n", client->fd); - - return write(client->fd, buf, msglen); -} - - -int gulm_cluster_send_message(void *buf, int msglen, const char *csid, const char *errtext) -{ - int status=0; - - DEBUGLOG("cluster send message, csid = %p, msglen = %d\n", csid, msglen); - - /* If csid is NULL then send to all known (not just connected) nodes */ - if (!csid) - { - void *context = NULL; - char loop_csid[GULM_MAX_CSID_LEN]; - - /* Loop round all gulm-known nodes */ - while (get_next_node_csid(&context, loop_csid)) - { - status = tcp_send_message(buf, msglen, loop_csid, errtext); - if (status == 0 || - (status < 0 && (errno == EAGAIN || errno == EINTR))) - break; - } - } - else - { - - status = tcp_send_message(buf, msglen, csid, errtext); - } - return status; -} - -/* To get our own IP address we get the locally bound address of the - socket that's talking to GULM in the assumption(eek) that it will - be on the "right" network in a multi-homed system */ -static int get_our_ip_address(char *addr, int *family) -{ - struct utsname info; - - uname(&info); - get_ip_address(info.nodename, addr); - - return 0; -} - -/* Public version of above for those that don't care what protocol - we're using */ -void get_our_gulm_csid(char *csid) -{ - static char our_csid[GULM_MAX_CSID_LEN]; - static int got_csid = 0; - - if (!got_csid) - { - int family; - - memset(our_csid, 0, sizeof(our_csid)); - if (get_our_ip_address(our_csid, &family)) - { - got_csid = 1; - } - } - memcpy(csid, our_csid, GULM_MAX_CSID_LEN); -} - -static void map_v4_to_v6(struct in_addr *ip4, struct in6_addr *ip6) -{ - ip6->s6_addr32[0] = 0; - ip6->s6_addr32[1] = 0; - ip6->s6_addr32[2] = htonl(0xffff); - ip6->s6_addr32[3] = ip4->s_addr; -} - -/* Get someone else's IP address from DNS */ -int get_ip_address(const char *node, char *addr) -{ - struct hostent *he; - - memset(addr, 0, GULM_MAX_CSID_LEN); - - // TODO: what do we do about multi-homed hosts ??? - // CCSs ip_interfaces solved this but some bugger removed it. - - /* Try IPv6 first. The man page for gethostbyname implies that - it will lookup ip6 & ip4 names, but it seems not to */ - he = gethostbyname2(node, AF_INET6); - if (he) - { - memcpy(addr, he->h_addr_list[0], - he->h_length); - } - else - { - he = gethostbyname2(node, AF_INET); - if (!he) - return -1; - map_v4_to_v6((struct in_addr *)he->h_addr_list[0], (struct in6_addr *)addr); - } - - return 0; -} - -char *print_csid(const char *csid) -{ - static char buf[128]; - int *icsid = (int *)csid; - - sprintf(buf, "[%x.%x.%x.%x]", - icsid[0],icsid[1],icsid[2],icsid[3]); - - return buf; -} diff --git a/daemons/clvmd/tcp-comms.h b/daemons/clvmd/tcp-comms.h deleted file mode 100644 index 9260e12..0000000 --- a/daemons/clvmd/tcp-comms.h +++ /dev/null @@ -1,13 +0,0 @@ -#include <netinet/in.h> - -#define GULM_MAX_CLUSTER_MESSAGE 1600 -#define GULM_MAX_CSID_LEN sizeof(struct in6_addr) -#define GULM_MAX_CLUSTER_MEMBER_NAME_LEN 128 - -extern int init_comms(unsigned short); -extern char *print_csid(const char *); -int get_main_gulm_cluster_fd(void); -int cluster_fd_gulm_callback(struct local_client *fd, char *buf, int len, const char *csid, struct local_client **new_client); -int gulm_cluster_send_message(void *buf, int msglen, const char *csid, const char *errtext); -void get_our_gulm_csid(char *csid); -int gulm_connect_csid(const char *csid, struct local_client **newclient); diff --git a/daemons/cmirrord/clogd.c b/daemons/cmirrord/clogd.c index 8d9a7b9..adf7a92 100644 --- a/daemons/cmirrord/clogd.c +++ b/daemons/cmirrord/clogd.c @@ -121,7 +121,8 @@ static void process_signals(void) static void remove_lockfile(void) { - unlink(CMIRRORD_PIDFILE); + if (unlink(CMIRRORD_PIDFILE)) + LOG_ERROR("Unable to remove \"" CMIRRORD_PIDFILE "\" %s", strerror(errno)); } /* @@ -133,6 +134,12 @@ static void daemonize(void) { int pid; int status; + int devnull; + + if ((devnull = open("/dev/null", O_RDWR)) == -1) { + LOG_ERROR("Can't open /dev/null: %s", strerror(errno)); + exit(EXIT_FAILURE); + } signal(SIGTERM, &parent_exit_handler); @@ -178,13 +185,22 @@ static void daemonize(void) } setsid(); - chdir("/"); + if (chdir("/")) { + LOG_ERROR("Failed to chdir /: %s", strerror(errno)); + exit(EXIT_FAILURE); + } + umask(0); - close(0); close(1); close(2); - open("/dev/null", O_RDONLY); /* reopen stdin */ - open("/dev/null", O_WRONLY); /* reopen stdout */ - open("/dev/null", O_WRONLY); /* reopen stderr */ + if (close(0) || close(1) || close(2)) { + LOG_ERROR("Failed to close terminal FDs"); + exit(EXIT_FAILURE); + } + + if ((dup2(devnull, 0) < 0) || /* reopen stdin */ + (dup2(devnull, 1) < 0) || /* reopen stdout */ + (dup2(devnull, 2) < 0)) /* reopen stderr */ + exit(EXIT_FAILURE); LOG_OPEN("cmirrord", LOG_PID, LOG_DAEMON); diff --git a/daemons/cmirrord/cluster.c b/daemons/cmirrord/cluster.c index b6c925a..70c76c3 100644 --- a/daemons/cmirrord/cluster.c +++ b/daemons/cmirrord/cluster.c @@ -20,10 +20,12 @@ #include <corosync/cpg.h> #include <errno.h> -#include <openais/saAis.h> -#include <openais/saCkpt.h> #include <signal.h> #include <unistd.h> +#if CMIRROR_HAS_CHECKPOINT +#include <openais/saAis.h> +#include <openais/saCkpt.h> +#endif /* Open AIS error codes */ #define str_ais_error(x) \ @@ -62,13 +64,13 @@ RQ_TYPE((x) & ~DM_ULOG_RESPONSE) static uint32_t my_cluster_id = 0xDEAD; +#if CMIRROR_HAS_CHECKPOINT static SaCkptHandleT ckpt_handle = 0; static SaCkptCallbacksT callbacks = { 0, 0 }; static SaVersionT version = { 'B', 1, 1 }; +#endif #define DEBUGGING_HISTORY 100 -//static char debugging[DEBUGGING_HISTORY][128]; -//static int idx = 0; #define LOG_SPRINT(cc, f, arg...) do { \ cc->idx++; \ cc->idx = cc->idx % DEBUGGING_HISTORY; \ @@ -77,6 +79,7 @@ static SaVersionT version = { 'B', 1, 1 }; static int log_resp_rec = 0; +#define RECOVERING_REGION_SECTION_SIZE 64 struct checkpoint_data { uint32_t requester; char uuid[CPG_MAX_NAME_LENGTH]; @@ -86,7 +89,7 @@ struct checkpoint_data { char *clean_bits; char *recovering_region; struct checkpoint_data *next; -}; +}; #define INVALID 0 #define VALID 1 @@ -128,7 +131,6 @@ static struct dm_list clog_cpg_list; int cluster_send(struct clog_request *rq) { int r; - int count=0; int found = 0; struct iovec iov; struct clog_cpg *entry; @@ -165,7 +167,10 @@ int cluster_send(struct clog_request *rq) if (entry->cpg_state != VALID) return -EINVAL; +#if CMIRROR_HAS_CHECKPOINT do { + int count = 0; + r = cpg_mcast_joined(entry->handle, CPG_TYPE_AGREED, &iov, 1); if (r != SA_AIS_ERR_TRY_AGAIN) break; @@ -189,12 +194,14 @@ int cluster_send(struct clog_request *rq) str_ais_error(r)); usleep(1000); } while (1); - - if (r == CPG_OK) +#else + r = cpg_mcast_joined(entry->handle, CPG_TYPE_AGREED, &iov, 1); +#endif + if (r == CS_OK) return 0; /* error codes found in openais/cpg.h */ - LOG_ERROR("cpg_mcast_joined error: %s", str_ais_error(r)); + LOG_ERROR("cpg_mcast_joined error: %d", r); rq->u_rq.error = -EBADE; return -EBADE; @@ -419,6 +426,7 @@ static void free_checkpoint(struct checkpoint_data *cp) free(cp); } +#if CMIRROR_HAS_CHECKPOINT static int export_checkpoint(struct checkpoint_data *cp) { SaCkptCheckpointCreationAttributesT attr; @@ -587,7 +595,54 @@ rr_create_retry: return 0; } -static int import_checkpoint(struct clog_cpg *entry, int no_read) +#else +static int export_checkpoint(struct checkpoint_data *cp) +{ + int r, rq_size; + struct clog_request *rq; + + rq_size = sizeof(*rq); + rq_size += RECOVERING_REGION_SECTION_SIZE; + rq_size += cp->bitmap_size * 2; /* clean|sync_bits */ + + rq = malloc(rq_size); + if (!rq) { + LOG_ERROR("export_checkpoint: " + "Unable to allocate transfer structs"); + return -ENOMEM; + } + memset(rq, 0, rq_size); + + dm_list_init(&rq->u.list); + rq->u_rq.request_type = DM_ULOG_CHECKPOINT_READY; + rq->originator = cp->requester; + strncpy(rq->u_rq.uuid, cp->uuid, CPG_MAX_NAME_LENGTH); + rq->u_rq.seq = my_cluster_id; + rq->u_rq.data_size = rq_size - sizeof(*rq); + + /* Sync bits */ + memcpy(rq->u_rq.data, cp->sync_bits, cp->bitmap_size); + + /* Clean bits */ + memcpy(rq->u_rq.data + cp->bitmap_size, cp->clean_bits, cp->bitmap_size); + + /* Recovering region */ + memcpy(rq->u_rq.data + (cp->bitmap_size * 2), cp->recovering_region, + strlen(cp->recovering_region)); + + r = cluster_send(rq); + if (r) + LOG_ERROR("Failed to send checkpoint ready notice: %s", + strerror(-r)); + + free(rq); + return 0; +} +#endif /* CMIRROR_HAS_CHECKPOINT */ + +#if CMIRROR_HAS_CHECKPOINT +static int import_checkpoint(struct clog_cpg *entry, int no_read, + struct clog_request *rq __attribute__((unused))) { int rtn = 0; SaCkptCheckpointHandleT h; @@ -619,6 +674,7 @@ open_retry: if (rv != SA_AIS_OK) { LOG_ERROR("[%s] Failed to open checkpoint: %s", SHORT_UUID(entry->name.value), str_ais_error(rv)); + free(bitmap); return -EIO; /* FIXME: better error */ } @@ -647,6 +703,7 @@ init_retry: if (rv != SA_AIS_OK) { LOG_ERROR("[%s] Sync checkpoint section creation failed: %s", SHORT_UUID(entry->name.value), str_ais_error(rv)); + free(bitmap); return -EIO; /* FIXME: better error */ } @@ -740,6 +797,32 @@ no_read: return rtn; } +#else +static int import_checkpoint(struct clog_cpg *entry, int no_read, + struct clog_request *rq) +{ + int bitmap_size; + + bitmap_size = (rq->u_rq.data_size - RECOVERING_REGION_SECTION_SIZE) / 2; + if (bitmap_size < 0) { + LOG_ERROR("Checkpoint has invalid payload size."); + return -EINVAL; + } + + if (pull_state(entry->name.value, entry->luid, "sync_bits", + rq->u_rq.data, bitmap_size) || + pull_state(entry->name.value, entry->luid, "clean_bits", + rq->u_rq.data + bitmap_size, bitmap_size) || + pull_state(entry->name.value, entry->luid, "recovering_region", + rq->u_rq.data + (bitmap_size * 2), + RECOVERING_REGION_SECTION_SIZE)) { + LOG_ERROR("Error loading bitmap state from checkpoint."); + return -EIO; + } + return 0; +} +#endif /* CMIRROR_HAS_CHECKPOINT */ + static void do_checkpoints(struct clog_cpg *entry, int leaving) { struct checkpoint_data *cp; @@ -827,10 +910,11 @@ static int resend_requests(struct clog_cpg *entry) rq->u_rq.seq); rq->u_rq.data_size = 0; - kernel_send(&rq->u_rq); - + if (kernel_send(&rq->u_rq)) + LOG_ERROR("Failed to respond to kernel [%s]", + RQ_TYPE(rq->u_rq.request_type)); break; - + default: /* * If an action or a response is required, then @@ -857,13 +941,13 @@ static int resend_requests(struct clog_cpg *entry) static int do_cluster_work(void *data __attribute__((unused))) { - int r = SA_AIS_OK; + int r = CS_OK; struct clog_cpg *entry, *tmp; dm_list_iterate_items_safe(entry, tmp, &clog_cpg_list) { - r = cpg_dispatch(entry->handle, CPG_DISPATCH_ALL); - if (r != SA_AIS_OK) - LOG_ERROR("cpg_dispatch failed: %s", str_ais_error(r)); + r = cpg_dispatch(entry->handle, CS_DISPATCH_ALL); + if (r != CS_OK) + LOG_ERROR("cpg_dispatch failed: %d", r); if (entry->free_me) { free(entry); @@ -874,7 +958,7 @@ static int do_cluster_work(void *data __attribute__((unused))) resend_requests(entry); } - return (r == SA_AIS_OK) ? 0 : -1; /* FIXME: good error number? */ + return (r == CS_OK) ? 0 : -1; /* FIXME: good error number? */ } static int flush_startup_list(struct clog_cpg *entry) @@ -939,16 +1023,19 @@ static void cpg_message_callback(cpg_handle_t handle, const struct cpg_name *gna struct clog_request *tmp_rq; struct clog_cpg *match; - if (clog_request_from_network(rq, msg_len) < 0) - /* Error message comes from 'clog_request_from_network' */ - return; - match = find_clog_cpg(handle); if (!match) { LOG_ERROR("Unable to find clog_cpg for cluster message"); return; } + /* + * Perform necessary endian and version compatibility conversions + */ + if (clog_request_from_network(rq, msg_len) < 0) + /* Any error messages come from 'clog_request_from_network' */ + return; + if ((nodeid == my_cluster_id) && !(rq->u_rq.request_type & DM_ULOG_RESPONSE) && (rq->u_rq.request_type != DM_ULOG_RESUME) && @@ -967,7 +1054,7 @@ static void cpg_message_callback(cpg_handle_t handle, const struct cpg_name *gna } memcpy(tmp_rq, rq, sizeof(*rq) + rq->u_rq.data_size); dm_list_init(&tmp_rq->u.list); - dm_list_add( &match->working_list, &tmp_rq->u.list); + dm_list_add(&match->working_list, &tmp_rq->u.list); } if (rq->u_rq.request_type == DM_ULOG_POSTSUSPEND) { @@ -1020,7 +1107,8 @@ static void cpg_message_callback(cpg_handle_t handle, const struct cpg_name *gna /* Redundant checkpoints ignored if match->valid */ LOG_SPRINT(match, "[%s] CHECKPOINT_READY notification from %u", SHORT_UUID(rq->u_rq.uuid), nodeid); - if (import_checkpoint(match, (match->state != INVALID))) { + if (import_checkpoint(match, + (match->state != INVALID), rq)) { LOG_SPRINT(match, "[%s] Failed to import checkpoint from %u", SHORT_UUID(rq->u_rq.uuid), nodeid); @@ -1144,11 +1232,11 @@ out: _RQ_TYPE(rq->u_rq.request_type), rq->originator, (response) ? "YES" : "NO"); else - LOG_SPRINT(match, "SEQ#=%u, UUID=%s, TYPE=%s, ORIG=%u, RESP=%s, RSPR=%u", + LOG_SPRINT(match, "SEQ#=%u, UUID=%s, TYPE=%s, ORIG=%u, RESP=%s, RSPR=%u, error=%d", rq->u_rq.seq, SHORT_UUID(rq->u_rq.uuid), _RQ_TYPE(rq->u_rq.request_type), rq->originator, (response) ? "YES" : "NO", - nodeid); + nodeid, rq->u_rq.error); } } @@ -1161,7 +1249,7 @@ static void cpg_join_callback(struct clog_cpg *match, uint32_t my_pid = (uint32_t)getpid(); uint32_t lowest = match->lowest_id; struct clog_request *rq; - char dbuf[32]; + char dbuf[32] = { 0 }; /* Assign my_cluster_id */ if ((my_cluster_id == 0xDEAD) && (joined->pid == my_pid)) @@ -1177,7 +1265,6 @@ static void cpg_join_callback(struct clog_cpg *match, if (joined->nodeid == my_cluster_id) goto out; - memset(dbuf, 0, sizeof(dbuf)); for (i = 0; i < member_list_entries - 1; i++) sprintf(dbuf+strlen(dbuf), "%u-", member_list[i].nodeid); sprintf(dbuf+strlen(dbuf), "(%u)", joined->nodeid); @@ -1259,7 +1346,9 @@ static void cpg_leave_callback(struct clog_cpg *match, dm_list_del(&rq->u.list); if (rq->u_rq.request_type == DM_ULOG_POSTSUSPEND) - kernel_send(&rq->u_rq); + if (kernel_send(&rq->u_rq)) + LOG_ERROR("Failed to respond to kernel [%s]", + RQ_TYPE(rq->u_rq.request_type)); free(rq); } @@ -1268,7 +1357,7 @@ static void cpg_leave_callback(struct clog_cpg *match, match->free_me = 1; match->lowest_id = 0xDEAD; match->state = INVALID; - } + } /* Remove any pending checkpoints for the leaving node. */ for (p_cp = NULL, c_cp = match->checkpoint_list; @@ -1324,7 +1413,7 @@ static void cpg_leave_callback(struct clog_cpg *match, left->nodeid); return; } - + match->lowest_id = member_list[0].nodeid; for (i = 0; i < member_list_entries; i++) if (match->lowest_id > member_list[i].nodeid) @@ -1413,6 +1502,7 @@ cpg_callbacks_t cpg_callbacks = { */ static int remove_checkpoint(struct clog_cpg *entry) { +#if CMIRROR_HAS_CHECKPOINT int len; SaNameT name; SaAisErrorT rv; @@ -1442,7 +1532,7 @@ unlink_retry: usleep(1000); goto unlink_retry; } - + if (rv != SA_AIS_OK) { LOG_ERROR("[%s] Failed to unlink checkpoint: %s", SHORT_UUID(entry->name.value), str_ais_error(rv)); @@ -1452,6 +1542,10 @@ unlink_retry: saCkptCheckpointClose(h); return 1; +#else + /* No checkpoint to remove, so 'success' */ + return 1; +#endif } int create_cluster_cpg(char *uuid, uint64_t luid) @@ -1493,14 +1587,14 @@ int create_cluster_cpg(char *uuid, uint64_t luid) SHORT_UUID(new->name.value)); r = cpg_initialize(&new->handle, &cpg_callbacks); - if (r != SA_AIS_OK) { + if (r != CS_OK) { LOG_ERROR("cpg_initialize failed: Cannot join cluster"); free(new); return -EPERM; } r = cpg_join(new->handle, &new->name); - if (r != SA_AIS_OK) { + if (r != CS_OK) { LOG_ERROR("cpg_join failed: Cannot join cluster"); free(new); return -EPERM; @@ -1541,7 +1635,7 @@ static int _destroy_cluster_cpg(struct clog_cpg *del) { int r; int state; - + LOG_COND(log_resend_requests, "[%s] I am leaving.2.....", SHORT_UUID(del->name.value)); @@ -1573,7 +1667,7 @@ static int _destroy_cluster_cpg(struct clog_cpg *del) abort_startup(del); r = cpg_leave(del->handle, &del->name); - if (r != CPG_OK) + if (r != CS_OK) LOG_ERROR("Error leaving CPG!"); return 0; } @@ -1591,24 +1685,27 @@ int destroy_cluster_cpg(char *uuid) int init_cluster(void) { +#if CMIRROR_HAS_CHECKPOINT SaAisErrorT rv; - dm_list_init(&clog_cpg_list); rv = saCkptInitialize(&ckpt_handle, &callbacks, &version); if (rv != SA_AIS_OK) return EXIT_CLUSTER_CKPT_INIT; - +#endif + dm_list_init(&clog_cpg_list); return 0; } void cleanup_cluster(void) { +#if CMIRROR_HAS_CHECKPOINT SaAisErrorT err; err = saCkptFinalize(ckpt_handle); if (err != SA_AIS_OK) LOG_ERROR("Failed to finalize checkpoint handle"); +#endif } void cluster_debug(void) diff --git a/daemons/cmirrord/functions.c b/daemons/cmirrord/functions.c index de80793..f6e0918 100644 --- a/daemons/cmirrord/functions.c +++ b/daemons/cmirrord/functions.c @@ -235,11 +235,9 @@ static int rw_log(struct log_c *lc, int do_write) */ static int read_log(struct log_c *lc) { - struct log_header lh; + struct log_header lh = { 0 }; size_t bitset_size; - memset(&lh, 0, sizeof(struct log_header)); - if (rw_log(lc, 0)) return -EIO; /* Failed disk read */ @@ -329,19 +327,26 @@ static int find_disk_path(char *major_minor_str, char *path_rtn, int *unlink_pat */ sprintf(path_rtn, "/dev/mapper/%s", dep->d_name); - stat(path_rtn, &statbuf); + if (stat(path_rtn, &statbuf) < 0) { + LOG_DBG("Unable to stat %s", path_rtn); + continue; + } if (S_ISBLK(statbuf.st_mode) && (major(statbuf.st_rdev) == major) && (minor(statbuf.st_rdev) == minor)) { LOG_DBG(" %s: YES", dep->d_name); - closedir(dp); + if (closedir(dp)) + LOG_DBG("Unable to closedir /dev/mapper %s", + strerror(errno)); return 0; } else { LOG_DBG(" %s: NO", dep->d_name); } } - closedir(dp); + if (closedir(dp)) + LOG_DBG("Unable to closedir /dev/mapper %s", + strerror(errno)); /* FIXME Find out why this was here and deal with underlying problem. */ LOG_DBG("Path not found for %d/%d", major, minor); @@ -369,10 +374,10 @@ static int _clog_ctr(char *uuid, uint64_t luid, enum sync log_sync = DEFAULTSYNC; uint32_t block_on_error = 0; - int disk_log = 0; + int disk_log; char disk_path[128]; int unlink_path = 0; - size_t page_size; + long page_size; int pages; /* If core log request, then argv[0] will be region_size */ @@ -476,7 +481,12 @@ static int _clog_ctr(char *uuid, uint64_t luid, lc->sync_count = (log_sync == NOSYNC) ? region_count : 0; if (disk_log) { - page_size = sysconf(_SC_PAGESIZE); + if ((page_size = sysconf(_SC_PAGESIZE)) < 0) { + LOG_ERROR("Unable to read pagesize: %s", + strerror(errno)); + r = errno; + goto fail; + } pages = *(lc->clean_bits) / page_size; pages += *(lc->clean_bits) % page_size ? 1 : 0; pages += 1; /* for header */ @@ -489,7 +499,10 @@ static int _clog_ctr(char *uuid, uint64_t luid, goto fail; } if (unlink_path) - unlink(disk_path); + if (unlink(disk_path) < 0) { + LOG_DBG("Warning: Unable to unlink log device, %s: %s", + disk_path, strerror(errno)); + } lc->disk_fd = r; lc->disk_size = pages * page_size; @@ -586,7 +599,10 @@ static int clog_ctr(struct dm_ulog_request *rq) /* We join the CPG when we resume */ /* No returning data */ - rq->data_size = 0; + if ((rq->version > 1) && !strcmp(argv[0], "clustered-disk")) + rq->data_size = sprintf(rq->data, "%s", argv[1]) + 1; + else + rq->data_size = 0; if (r) { LOG_ERROR("Failed to create cluster log (%s)", rq->uuid); @@ -626,8 +642,9 @@ static int clog_dtr(struct dm_ulog_request *rq) LOG_DBG("[%s] Cluster log removed", SHORT_UUID(lc->uuid)); dm_list_del(&lc->list); - if (lc->disk_fd != -1) - close(lc->disk_fd); + if (lc->disk_fd != -1 && close(lc->disk_fd)) + LOG_ERROR("Failed to close disk log: %s", + strerror(errno)); if (lc->disk_buffer) free(lc->disk_buffer); dm_free(lc->clean_bits); @@ -770,7 +787,7 @@ static int clog_resume(struct dm_ulog_request *rq) else if (lc->disk_nr_regions > lc->region_count) LOG_DBG("[%s] Mirror has shrunk, updating log bits", SHORT_UUID(lc->uuid)); - break; + break; case -EINVAL: LOG_DBG("[%s] (Re)initializing mirror log - resync issued.", SHORT_UUID(lc->uuid)); @@ -823,7 +840,7 @@ out: lc->sync_search = 0; lc->state = LOG_RESUMED; lc->recovery_halted = 0; - + return rq->error; } @@ -1018,7 +1035,7 @@ static int clog_flush(struct dm_ulog_request *rq, int server) { int r = 0; struct log_c *lc = get_log(rq->uuid, rq->luid); - + if (!lc) return -EINVAL; @@ -1600,7 +1617,7 @@ out: rq->data_size = sizeof(*pkg); - return 0; + return 0; } @@ -1705,14 +1722,12 @@ int do_request(struct clog_request *rq, int server) static void print_bits(dm_bitset_t bs, int print) { int i, size; - char outbuf[128]; + char outbuf[128] = { 0 }; unsigned char *buf = (unsigned char *)(bs + 1); size = (*bs % 8) ? 1 : 0; size += (*bs / 8); - memset(outbuf, 0, sizeof(outbuf)); - for (i = 0; i < size; i++) { if (!(i % 16)) { if (outbuf[0] != '\0') { @@ -1817,8 +1832,11 @@ int pull_state(const char *uuid, uint64_t luid, } if (!strncmp(which, "recovering_region", 17)) { - sscanf(buf, "%llu %u", (unsigned long long *)&lc->recovering_region, - &lc->recoverer); + if (sscanf(buf, "%llu %u", (unsigned long long *)&lc->recovering_region, + &lc->recoverer) != 2) { + LOG_ERROR("cannot parse recovering region from: %s", buf); + return -EINVAL; + } LOG_SPRINT(lc, "CKPT INIT - SEQ#=X, UUID=%s, nodeid = X:: " "recovering_region=%llu, recoverer=%u", SHORT_UUID(lc->uuid), @@ -1853,7 +1871,7 @@ int pull_state(const char *uuid, uint64_t luid, LOG_DBG("[%s] loading clean_bits:", SHORT_UUID(lc->uuid)); - print_bits(lc->sync_bits, 0); + print_bits(lc->clean_bits, 0); } return 0; diff --git a/daemons/cmirrord/link_mon.c b/daemons/cmirrord/link_mon.c index 80a98d0..74058f9 100644 --- a/daemons/cmirrord/link_mon.c +++ b/daemons/cmirrord/link_mon.c @@ -58,7 +58,7 @@ int links_register(int fd, const char *name, int (*callback)(void *data), void * free(lc); return -ENOMEM; } - + pfds = tmp; free_pfds = used_pfds + 1; } @@ -125,7 +125,7 @@ int links_monitor(void) for (i = 0; i < used_pfds; i++) if (pfds[i].revents & POLLIN) { LOG_DBG("Data ready on %d", pfds[i].fd); - + /* FIXME: Add this back return 1;*/ r++; } diff --git a/daemons/cmirrord/local.c b/daemons/cmirrord/local.c index 9e076c4..500f6dc 100644 --- a/daemons/cmirrord/local.c +++ b/daemons/cmirrord/local.c @@ -27,7 +27,7 @@ #define CN_VAL_DM_USERSPACE_LOG 0x1 #endif -static int cn_fd; /* Connector (netlink) socket fd */ +static int cn_fd = -1; /* Connector (netlink) socket fd */ static char recv_buf[2048]; static char send_buf[2048]; @@ -237,7 +237,6 @@ static int do_local_work(void *data __attribute__((unused))) case DM_ULOG_GET_REGION_SIZE: case DM_ULOG_IN_SYNC: case DM_ULOG_GET_SYNC_COUNT: - case DM_ULOG_STATUS_INFO: case DM_ULOG_STATUS_TABLE: case DM_ULOG_PRESUSPEND: /* We do not specify ourselves as server here */ @@ -249,7 +248,7 @@ static int do_local_work(void *data __attribute__((unused))) if (r) LOG_ERROR("Failed to respond to kernel [%s]", RQ_TYPE(u_rq->request_type)); - + break; case DM_ULOG_RESUME: /* @@ -273,13 +272,16 @@ static int do_local_work(void *data __attribute__((unused))) case DM_ULOG_MARK_REGION: case DM_ULOG_GET_RESYNC_WORK: case DM_ULOG_SET_REGION_SYNC: + case DM_ULOG_STATUS_INFO: case DM_ULOG_IS_REMOTE_RECOVERING: case DM_ULOG_POSTSUSPEND: r = cluster_send(rq); if (r) { u_rq->data_size = 0; u_rq->error = r; - kernel_send(u_rq); + if (kernel_send(u_rq)) + LOG_ERROR("Failed to respond to kernel [%s]", + RQ_TYPE(u_rq->request_type)); } break; @@ -384,14 +386,18 @@ int init_local(void) r = bind(cn_fd, (struct sockaddr *) &addr, sizeof(addr)); if (r < 0) { - close(cn_fd); + if (close(cn_fd)) + LOG_ERROR("Failed to close socket: %s", + strerror(errno)); return EXIT_KERNEL_BIND; } opt = addr.nl_groups; r = setsockopt(cn_fd, 270, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt)); if (r) { - close(cn_fd); + if (close(cn_fd)) + LOG_ERROR("Failed to close socket: %s", + strerror(errno)); return EXIT_KERNEL_SETSOCKOPT; } @@ -412,5 +418,7 @@ int init_local(void) void cleanup_local(void) { links_unregister(cn_fd); - close(cn_fd); + if (cn_fd >= 0 && close(cn_fd)) + LOG_ERROR("Failed to close socket: %s", + strerror(errno)); } diff --git a/daemons/dmeventd/.exported_symbols b/daemons/dmeventd/.exported_symbols index 25690c8..fab74dc 100644 --- a/daemons/dmeventd/.exported_symbols +++ b/daemons/dmeventd/.exported_symbols @@ -1,3 +1,4 @@ init_fifos fini_fifos daemon_talk +dm_event_get_version diff --git a/daemons/dmeventd/Makefile.in b/daemons/dmeventd/Makefile.in index e99e089..1302a44 100644 --- a/daemons/dmeventd/Makefile.in +++ b/daemons/dmeventd/Makefile.in @@ -1,5 +1,5 @@ # -# Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved. +# Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. # # This file is part of the device-mapper userspace tools. # @@ -60,11 +60,11 @@ LIBS += -ldevmapper LVMLIBS += -ldevmapper-event $(PTHREAD_LIBS) dmeventd: $(LIB_SHARED) dmeventd.o - $(CC) $(CFLAGS) $(LDFLAGS) -L. -o $@ dmeventd.o \ + $(CC) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) -L. -o $@ dmeventd.o \ $(DL_LIBS) $(LVMLIBS) $(LIBS) -rdynamic dmeventd.static: $(LIB_STATIC) dmeventd.o $(interfacebuilddir)/libdevmapper.a - $(CC) $(CFLAGS) $(LDFLAGS) -static -L. -L$(interfacebuilddir) -o $@ \ + $(CC) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) -static -L. -L$(interfacebuilddir) -o $@ \ dmeventd.o $(DL_LIBS) $(LVMLIBS) $(LIBS) $(STATIC_LIBS) ifeq ("@PKGCONFIG@", "yes") @@ -105,4 +105,4 @@ install: install_include install_lib install_dmeventd install_device-mapper: install_include install_lib install_dmeventd -DISTCLEAN_TARGETS += libdevmapper-event.pc .exported_symbols_generated +DISTCLEAN_TARGETS += libdevmapper-event.pc diff --git a/daemons/dmeventd/dmeventd.c b/daemons/dmeventd/dmeventd.c index 2b454f9..13148c3 100644 --- a/daemons/dmeventd/dmeventd.c +++ b/daemons/dmeventd/dmeventd.c @@ -39,13 +39,27 @@ #include <arpa/inet.h> /* for htonl, ntohl */ #ifdef linux -# include <malloc.h> - -# define OOM_ADJ_FILE "/proc/self/oom_adj" +/* + * Kernel version 2.6.36 and higher has + * new OOM killer adjustment interface. + */ +# define OOM_ADJ_FILE_OLD "/proc/self/oom_adj" +# define OOM_ADJ_FILE "/proc/self/oom_score_adj" /* From linux/oom.h */ +/* Old interface */ # define OOM_DISABLE (-17) # define OOM_ADJUST_MIN (-16) +/* New interface */ +# define OOM_SCORE_ADJ_MIN (-1000) + +/* Systemd on-demand activation support */ +# define SD_ACTIVATION_ENV_VAR_NAME "SD_ACTIVATION" +# define SD_LISTEN_PID_ENV_VAR_NAME "LISTEN_PID" +# define SD_LISTEN_FDS_ENV_VAR_NAME "LISTEN_FDS" +# define SD_LISTEN_FDS_START 3 +# define SD_FD_FIFO_SERVER SD_LISTEN_FDS_START +# define SD_FD_FIFO_CLIENT (SD_LISTEN_FDS_START + 1) #endif @@ -95,9 +109,8 @@ static pthread_mutex_t _global_mutex; #define THREAD_STACK_SIZE (300*1024) -#define DEBUGLOG(fmt, args...) _debuglog(fmt, ## args) - int dmeventd_debug = 0; +static int _systemd_activation = 0; static int _foreground = 0; static int _restart = 0; static char **_initial_registrations = 0; @@ -203,24 +216,6 @@ static DM_LIST_INIT(_timeout_registry); static pthread_mutex_t _timeout_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t _timeout_cond = PTHREAD_COND_INITIALIZER; -static void _debuglog(const char *fmt, ...) -{ - time_t P; - va_list ap; - - if (!_foreground) - return; - - va_start(ap,fmt); - - time(&P); - fprintf(stderr, "dmeventd[%p]: %.15s ", (void *) pthread_self(), ctime(&P)+4 ); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - - va_end(ap); -} - /* Allocate/free the status structure for a monitoring thread. */ static struct thread_status *_alloc_thread_status(struct message_data *data, struct dso_data *dso_data) @@ -407,7 +402,9 @@ static int _fill_device_data(struct thread_status *ts) if (!dmt) return 0; - dm_task_set_uuid(dmt, ts->device.uuid); + if (!dm_task_set_uuid(dmt, ts->device.uuid)) + goto fail; + if (!dm_task_run(dmt)) goto fail; @@ -450,7 +447,7 @@ static int _get_status(struct message_data *message_data) { struct dm_event_daemon_message *msg = message_data->msg; struct thread_status *thread; - int i = 0, j = 0; + int i, j; int ret = -1; int count = dm_list_size(&_thread_registry); int size = 0, current = 0; @@ -585,6 +582,7 @@ static void _unregister_for_timeout(struct thread_status *thread) pthread_mutex_unlock(&_timeout_mutex); } +__attribute__((format(printf, 4, 5))) static void _no_intr_log(int level, const char *file, int line, const char *f, ...) { @@ -740,8 +738,10 @@ static void _monitor_unregister(void *arg) return; } thread->status = DM_THREAD_DONE; + pthread_mutex_lock(&_timeout_mutex); UNLINK_THREAD(thread); LINK(thread, &_thread_registry_unused); + pthread_mutex_unlock(&_timeout_mutex); _unlock_mutex(); } @@ -752,7 +752,10 @@ static struct dm_task *_get_device_status(struct thread_status *ts) if (!dmt) return NULL; - dm_task_set_uuid(dmt, ts->device.uuid); + if (!dm_task_set_uuid(dmt, ts->device.uuid)) { + dm_task_destroy(dmt); + return NULL; + } if (!dm_task_run(dmt)) { dm_task_destroy(dmt); @@ -996,10 +999,8 @@ static int _register_for_event(struct message_data *message_data) almost as good as dead already... */ if (thread_new->events & DM_EVENT_TIMEOUT) { ret = -_register_for_timeout(thread_new); - if (ret) { - _unlock_mutex(); - goto out; - } + if (ret) + goto outth; } if (!(thread = _lookup_thread_status(message_data))) { @@ -1025,6 +1026,7 @@ static int _register_for_event(struct message_data *message_data) /* Or event # into events bitfield. */ thread->events |= message_data->events.field; + outth: _unlock_mutex(); out: @@ -1076,8 +1078,10 @@ static int _unregister_for_event(struct message_data *message_data) * unlink and terminate its monitoring thread. */ if (!thread->events) { + pthread_mutex_lock(&_timeout_mutex); UNLINK_THREAD(thread); LINK(thread, &_thread_registry_unused); + pthread_mutex_unlock(&_timeout_mutex); } _unlock_mutex(); @@ -1099,15 +1103,19 @@ static int _registered_device(struct message_data *message_data, const char *id = message_data->id; const char *dso = thread->dso_data->dso_name; const char *dev = thread->device.uuid; + int r; unsigned events = ((thread->status == DM_THREAD_RUNNING) && (thread->events)) ? thread->events : thread-> events | DM_EVENT_REGISTRATION_PENDING; dm_free(msg->data); - msg->size = dm_asprintf(&(msg->data), fmt, id, dso, dev, events); + if ((r = dm_asprintf(&(msg->data), fmt, id, dso, dev, events)) < 0) { + msg->size = 0; + return -ENOMEM; + } - _unlock_mutex(); + msg->size = (uint32_t) r; return 0; } @@ -1140,6 +1148,7 @@ static int _want_registered_device(char *dso_name, char *device_uuid, static int _get_registered_dev(struct message_data *message_data, int next) { struct thread_status *thread, *hit = NULL; + int ret = -ENOENT; _lock_mutex(); @@ -1156,16 +1165,12 @@ static int _get_registered_dev(struct message_data *message_data, int next) * If we got a registered device and want the next one -> * fetch next conforming element off the list. */ - if (hit && !next) { - _unlock_mutex(); - return _registered_device(message_data, hit); - } + if (hit && !next) + goto reg; if (!hit) goto out; - thread = hit; - while (1) { if (dm_list_end(&_thread_registry, &thread->list)) goto out; @@ -1177,13 +1182,13 @@ static int _get_registered_dev(struct message_data *message_data, int next) } } - _unlock_mutex(); - return _registered_device(message_data, hit); + reg: + ret = _registered_device(message_data, hit); out: _unlock_mutex(); - - return -ENOENT; + + return ret; } static int _get_registered_device(struct message_data *message_data) @@ -1241,68 +1246,66 @@ static void _init_fifos(struct dm_event_fifos *fifos) /* Open fifos used for client communication. */ static int _open_fifos(struct dm_event_fifos *fifos) { - int orig_errno; + struct stat st; /* Create client fifo. */ (void) dm_prepare_selinux_context(fifos->client_path, S_IFIFO); if ((mkfifo(fifos->client_path, 0600) == -1) && errno != EEXIST) { - syslog(LOG_ERR, "%s: Failed to create client fifo.\n", __func__); - orig_errno = errno; + syslog(LOG_ERR, "%s: Failed to create client fifo %s: %m.\n", + __func__, fifos->client_path); (void) dm_prepare_selinux_context(NULL, 0); - stack; - return -orig_errno; + return 0; } /* Create server fifo. */ (void) dm_prepare_selinux_context(fifos->server_path, S_IFIFO); if ((mkfifo(fifos->server_path, 0600) == -1) && errno != EEXIST) { - syslog(LOG_ERR, "%s: Failed to create server fifo.\n", __func__); - orig_errno = errno; + syslog(LOG_ERR, "%s: Failed to create server fifo %s: %m.\n", + __func__, fifos->server_path); (void) dm_prepare_selinux_context(NULL, 0); - stack; - return -orig_errno; + return 0; } (void) dm_prepare_selinux_context(NULL, 0); - struct stat st; - /* Warn about wrong permissions if applicable */ if ((!stat(fifos->client_path, &st)) && (st.st_mode & 0777) != 0600) - syslog(LOG_WARNING, "Fixing wrong permissions on %s", + syslog(LOG_WARNING, "Fixing wrong permissions on %s: %m.\n", fifos->client_path); if ((!stat(fifos->server_path, &st)) && (st.st_mode & 0777) != 0600) - syslog(LOG_WARNING, "Fixing wrong permissions on %s", + syslog(LOG_WARNING, "Fixing wrong permissions on %s: %m.\n", fifos->server_path); /* If they were already there, make sure permissions are ok. */ if (chmod(fifos->client_path, 0600)) { - syslog(LOG_ERR, "Unable to set correct file permissions on %s", + syslog(LOG_ERR, "Unable to set correct file permissions on %s: %m.\n", fifos->client_path); - return -errno; + return 0; } if (chmod(fifos->server_path, 0600)) { - syslog(LOG_ERR, "Unable to set correct file permissions on %s", + syslog(LOG_ERR, "Unable to set correct file permissions on %s: %m.\n", fifos->server_path); - return -errno; + return 0; } /* Need to open read+write or we will block or fail */ if ((fifos->server = open(fifos->server_path, O_RDWR)) < 0) { - stack; - return -errno; + syslog(LOG_ERR, "Failed to open fifo server %s: %m.\n", + fifos->server_path); + return 0; } /* Need to open read+write for select() to work. */ if ((fifos->client = open(fifos->client_path, O_RDWR)) < 0) { - stack; - close(fifos->server); - return -errno; + syslog(LOG_ERR, "Failed to open fifo client %s: %m", fifos->client_path); + if (close(fifos->server)) + syslog(LOG_ERR, "Failed to close fifo server %s: %m", fifos->server_path); + return 0; } - return 0; + return 1; } /* @@ -1405,7 +1408,7 @@ static int _client_write(struct dm_event_fifos *fifos, static int _handle_request(struct dm_event_daemon_message *msg, struct message_data *message_data) { - static struct { + static struct request { unsigned int cmd; int (*f)(struct message_data *); } requests[] = { @@ -1420,7 +1423,7 @@ static int _handle_request(struct dm_event_daemon_message *msg, { DM_EVENT_CMD_GET_STATUS, _get_status}, }, *req; - for (req = requests; req < requests + sizeof(requests); req++) + for (req = requests; req < requests + sizeof(requests) / sizeof(struct request); req++) if (req->cmd == msg->cmd) return req->f(message_data); @@ -1432,17 +1435,16 @@ static int _do_process_request(struct dm_event_daemon_message *msg) { int ret; char *answer; - static struct message_data message_data; + struct message_data message_data = { .msg = msg }; /* Parse the message. */ - memset(&message_data, 0, sizeof(message_data)); - message_data.msg = msg; if (msg->cmd == DM_EVENT_CMD_HELLO || msg->cmd == DM_EVENT_CMD_DIE) { ret = 0; answer = msg->data; if (answer) { - msg->size = dm_asprintf(&(msg->data), "%s %s", answer, - msg->cmd == DM_EVENT_CMD_DIE ? "DYING" : "HELLO"); + msg->size = dm_asprintf(&(msg->data), "%s %s %d", answer, + msg->cmd == DM_EVENT_CMD_DIE ? "DYING" : "HELLO", + DM_EVENT_PROTOCOL_VERSION); dm_free(answer); } else { msg->size = 0; @@ -1467,9 +1469,7 @@ static int _do_process_request(struct dm_event_daemon_message *msg) static void _process_request(struct dm_event_fifos *fifos) { int die = 0; - struct dm_event_daemon_message msg; - - memset(&msg, 0, sizeof(msg)); + struct dm_event_daemon_message msg = { 0 }; /* * Read the request from the client (client_read, client_write @@ -1488,9 +1488,9 @@ static void _process_request(struct dm_event_fifos *fifos) if (!_client_write(fifos, &msg)) stack; - if (die) raise(9); - dm_free(msg.data); + + if (die) raise(9); } static void _process_initial_registrations(void) @@ -1501,9 +1501,10 @@ static void _process_initial_registrations(void) while ((reg = _initial_registrations[i])) { msg.cmd = DM_EVENT_CMD_REGISTER_FOR_EVENT; - msg.size = strlen(reg); - msg.data = reg; - _do_process_request(&msg); + if ((msg.size = strlen(reg))) { + msg.data = reg; + _do_process_request(&msg); + } ++ i; } } @@ -1513,6 +1514,7 @@ static void _cleanup_unused_threads(void) int ret; struct dm_list *l; struct thread_status *thread; + int join_ret = 0; _lock_mutex(); while ((l = dm_list_first(&_thread_registry_unused))) { @@ -1552,12 +1554,15 @@ static void _cleanup_unused_threads(void) if (thread->status == DM_THREAD_DONE) { dm_list_del(l); - pthread_join(thread->thread, NULL); + join_ret = pthread_join(thread->thread, NULL); _free_thread_status(thread); } } _unlock_mutex(); + + if (join_ret) + syslog(LOG_ERR, "Failed pthread_join: %s\n", strerror(join_ret)); } static void _sig_alarm(int signum __attribute__((unused))) @@ -1569,10 +1574,8 @@ static void _sig_alarm(int signum __attribute__((unused))) static void _init_thread_signals(void) { sigset_t my_sigset; - struct sigaction act; + struct sigaction act = { .sa_handler = _sig_alarm }; - memset(&act, 0, sizeof(act)); - act.sa_handler = _sig_alarm; sigaction(SIGALRM, &act, NULL); sigfillset(&my_sigset); @@ -1610,40 +1613,138 @@ static void _exit_handler(int sig __attribute__((unused))) } #ifdef linux +static int _set_oom_adj(const char *oom_adj_path, int val) +{ + FILE *fp; + + if (!(fp = fopen(oom_adj_path, "w"))) { + perror("oom_adj: fopen failed"); + return 0; + } + + fprintf(fp, "%i", val); + + if (dm_fclose(fp)) + perror("oom_adj: fclose failed"); + + return 1; +} + /* * Protection against OOM killer if kernel supports it */ -static int _set_oom_adj(int val) +static int _protect_against_oom_killer(void) { - FILE *fp; - struct stat st; if (stat(OOM_ADJ_FILE, &st) == -1) { - if (errno == ENOENT) - DEBUGLOG(OOM_ADJ_FILE " not found"); - else + if (errno != ENOENT) perror(OOM_ADJ_FILE ": stat failed"); - return 1; + + /* Try old oom_adj interface as a fallback */ + if (stat(OOM_ADJ_FILE_OLD, &st) == -1) { + if (errno == ENOENT) + perror(OOM_ADJ_FILE_OLD " not found"); + else + perror(OOM_ADJ_FILE_OLD ": stat failed"); + return 1; + } + + return _set_oom_adj(OOM_ADJ_FILE_OLD, OOM_DISABLE) || + _set_oom_adj(OOM_ADJ_FILE_OLD, OOM_ADJUST_MIN); } - if (!(fp = fopen(OOM_ADJ_FILE, "w"))) { - perror(OOM_ADJ_FILE ": fopen failed"); + return _set_oom_adj(OOM_ADJ_FILE, OOM_SCORE_ADJ_MIN); +} + +static int _handle_preloaded_fifo(int fd, const char *path) +{ + struct stat st_fd, st_path; + int flags; + + if ((flags = fcntl(fd, F_GETFD)) < 0) return 0; - } - fprintf(fp, "%i", val); - if (dm_fclose(fp)) - perror(OOM_ADJ_FILE ": fclose failed"); + if (flags & FD_CLOEXEC) + return 0; + + if (fstat(fd, &st_fd) < 0 || !S_ISFIFO(st_fd.st_mode)) + return 0; + + if (stat(path, &st_path) < 0 || + st_path.st_dev != st_fd.st_dev || + st_path.st_ino != st_fd.st_ino) + return 0; + + if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) < 0) + return 0; return 1; } + +static int _systemd_handover(struct dm_event_fifos *fifos) +{ + const char *e; + char *p; + unsigned long env_pid, env_listen_fds; + int r = 0; + + memset(fifos, 0, sizeof(*fifos)); + + /* SD_ACTIVATION must be set! */ + if (!(e = getenv(SD_ACTIVATION_ENV_VAR_NAME)) || strcmp(e, "1")) + goto out; + + /* LISTEN_PID must be equal to our PID! */ + if (!(e = getenv(SD_LISTEN_PID_ENV_VAR_NAME))) + goto out; + + errno = 0; + env_pid = strtoul(e, &p, 10); + if (errno || !p || *p || env_pid <= 0 || + getpid() != (pid_t) env_pid) + goto out; + + /* LISTEN_FDS must be 2 and the fds must be FIFOSs! */ + if (!(e = getenv(SD_LISTEN_FDS_ENV_VAR_NAME))) + goto out; + + errno = 0; + env_listen_fds = strtoul(e, &p, 10); + if (errno || !p || *p || env_listen_fds != 2) + goto out; + + /* Check and handle the FIFOs passed in */ + r = (_handle_preloaded_fifo(SD_FD_FIFO_SERVER, DM_EVENT_FIFO_SERVER) && + _handle_preloaded_fifo(SD_FD_FIFO_CLIENT, DM_EVENT_FIFO_CLIENT)); + + if (r) { + fifos->server = SD_FD_FIFO_SERVER; + fifos->server_path = DM_EVENT_FIFO_SERVER; + fifos->client = SD_FD_FIFO_CLIENT; + fifos->client_path = DM_EVENT_FIFO_CLIENT; + } + +out: + unsetenv(SD_ACTIVATION_ENV_VAR_NAME); + unsetenv(SD_LISTEN_PID_ENV_VAR_NAME); + unsetenv(SD_LISTEN_FDS_ENV_VAR_NAME); + return r; +} #endif -static void remove_lockfile(void) +static void _remove_files_on_exit(void) { if (unlink(DMEVENTD_PIDFILE)) perror(DMEVENTD_PIDFILE ": unlink failed"); + + if (!_systemd_activation) { + if (unlink(DM_EVENT_FIFO_CLIENT)) + perror(DM_EVENT_FIFO_CLIENT " : unlink failed"); + + if (unlink(DM_EVENT_FIFO_SERVER)) + perror(DM_EVENT_FIFO_SERVER " : unlink failed"); + } } static void _daemonize(void) @@ -1703,8 +1804,15 @@ static void _daemonize(void) else fd = rlim.rlim_cur; - for (--fd; fd >= 0; fd--) - close(fd); + for (--fd; fd >= 0; fd--) { +#ifdef linux + /* Do not close fds preloaded by systemd! */ + if (_systemd_activation && + (fd == SD_FD_FIFO_SERVER || fd == SD_FD_FIFO_CLIENT)) + continue; +#endif + (void) close(fd); + } if ((open("/dev/null", O_RDONLY) < 0) || (open("/dev/null", O_WRONLY) < 0) || @@ -1721,16 +1829,25 @@ static void restart(void) int i, count = 0; char *message; int length; + int version; /* Get the list of registrations from the running daemon. */ if (!init_fifos(&fifos)) { - fprintf(stderr, "Could not initiate communication with existing dmeventd.\n"); + fprintf(stderr, "WARNING: Could not initiate communication with existing dmeventd.\n"); + exit(EXIT_FAILURE); + } + + if (!dm_event_get_version(&fifos, &version)) { + fprintf(stderr, "WARNING: Could not communicate with existing dmeventd.\n"); + fini_fifos(&fifos); exit(EXIT_FAILURE); } - if (daemon_talk(&fifos, &msg, DM_EVENT_CMD_HELLO, NULL, NULL, 0, 0)) { - fprintf(stderr, "Could not communicate with existing dmeventd.\n"); + if (version < 1) { + fprintf(stderr, "WARNING: The running dmeventd instance is too old.\n" + "Protocol version %d (required: 1). Action cancelled.\n", + version); exit(EXIT_FAILURE); } @@ -1749,9 +1866,16 @@ static void restart(void) } } - _initial_registrations = dm_malloc(sizeof(char*) * (count + 1)); + if (!(_initial_registrations = dm_malloc(sizeof(char*) * (count + 1)))) { + fprintf(stderr, "Memory allocation registration failed.\n"); + exit(EXIT_FAILURE); + } + for (i = 0; i < count; ++i) { - _initial_registrations[i] = dm_strdup(message); + if (!(_initial_registrations[i] = dm_strdup(message))) { + fprintf(stderr, "Memory allocation for message failed.\n"); + exit(EXIT_FAILURE); + } message += strlen(message) + 1; } _initial_registrations[count] = 0; @@ -1761,17 +1885,28 @@ static void restart(void) exit(EXIT_FAILURE); } + /* + * Wait for daemon to die, detected by sending further DIE messages + * until one fails. + */ + for (i = 0; i < 10; ++i) { + if (daemon_talk(&fifos, &msg, DM_EVENT_CMD_DIE, "-", "-", 0, 0)) + break; /* yep, it's dead probably */ + usleep(10); + } + fini_fifos(&fifos); } static void usage(char *prog, FILE *file) { fprintf(file, "Usage:\n" - "%s [-V] [-h] [-d] [-d] [-d] [-f]\n\n" - " -V Show version of dmeventd\n" - " -h Show this help information\n" + "%s [-d [-d [-d]]] [-f] [-h] [-R] [-V] [-?]\n\n" " -d Log debug messages to syslog (-d, -dd, -ddd)\n" - " -f Don't fork, run in the foreground\n\n", prog); + " -f Don't fork, run in the foreground\n" + " -h -? Show this help information\n" + " -R Restart dmeventd\n" + " -V Show version of dmeventd\n\n", prog); } int main(int argc, char *argv[]) @@ -1803,7 +1938,6 @@ int main(int argc, char *argv[]) case 'V': printf("dmeventd version: %s\n", DM_LIB_VERSION); exit(1); - break; } } @@ -1818,6 +1952,10 @@ int main(int argc, char *argv[]) if (_restart) restart(); +#ifdef linux + _systemd_activation = _systemd_handover(&fifos); +#endif + if (!_foreground) _daemonize(); @@ -1827,17 +1965,19 @@ int main(int argc, char *argv[]) if (dm_create_lockfile(DMEVENTD_PIDFILE) == 0) exit(EXIT_FAILURE); - atexit(remove_lockfile); + atexit(_remove_files_on_exit); (void) dm_prepare_selinux_context(NULL, 0); /* Set the rest of the signals to cause '_exit_now' to be set */ + signal(SIGTERM, &_exit_handler); signal(SIGINT, &_exit_handler); signal(SIGHUP, &_exit_handler); signal(SIGQUIT, &_exit_handler); #ifdef linux - if (!_set_oom_adj(OOM_DISABLE) && !_set_oom_adj(OOM_ADJUST_MIN)) - syslog(LOG_ERR, "Failed to set oom_adj to protect against OOM killer"); + /* Systemd has adjusted oom killer for us already */ + if (!_systemd_activation && !_protect_against_oom_killer()) + syslog(LOG_ERR, "Failed to protect against OOM killer"); #endif _init_thread_signals(); @@ -1847,11 +1987,12 @@ int main(int argc, char *argv[]) //multilog_init_verbose(std_syslog, _LOG_DEBUG); //multilog_async(1); - _init_fifos(&fifos); + if (!_systemd_activation) + _init_fifos(&fifos); pthread_mutex_init(&_global_mutex, NULL); - if (_open_fifos(&fifos)) + if (!_systemd_activation && !_open_fifos(&fifos)) exit(EXIT_FIFO_FAILURE); /* Signal parent, letting them know we are ready to go. */ @@ -1865,11 +2006,13 @@ int main(int argc, char *argv[]) while (!_exit_now) { _process_request(&fifos); _cleanup_unused_threads(); + _lock_mutex(); if (!dm_list_empty(&_thread_registry) || !dm_list_empty(&_thread_registry_unused)) _thread_registries_empty = 0; else _thread_registries_empty = 1; + _unlock_mutex(); } _exit_dm_lib(); diff --git a/daemons/dmeventd/dmeventd.h b/daemons/dmeventd/dmeventd.h index 254758e..e21cf45 100644 --- a/daemons/dmeventd/dmeventd.h +++ b/daemons/dmeventd/dmeventd.h @@ -17,11 +17,8 @@ /* FIXME This stuff must be configurable. */ -#define DM_EVENT_DAEMON "/sbin/dmeventd" -#define DM_EVENT_LOCKFILE "/var/lock/dmeventd" -#define DM_EVENT_FIFO_CLIENT "/var/run/dmeventd-client" -#define DM_EVENT_FIFO_SERVER "/var/run/dmeventd-server" -#define DM_EVENT_PIDFILE "/var/run/dmeventd.pid" +#define DM_EVENT_FIFO_CLIENT DEFAULT_DM_RUN_DIR "/dmeventd-client" +#define DM_EVENT_FIFO_SERVER DEFAULT_DM_RUN_DIR "/dmeventd-server" #define DM_EVENT_DEFAULT_TIMEOUT 10 @@ -66,11 +63,13 @@ struct dm_event_fifos { #define EXIT_CHDIR_FAILURE 7 /* Implemented in libdevmapper-event.c, but not part of public API. */ +// FIXME misuse of bitmask as enum int daemon_talk(struct dm_event_fifos *fifos, struct dm_event_daemon_message *msg, int cmd, const char *dso_name, const char *dev_name, enum dm_event_mask evmask, uint32_t timeout); int init_fifos(struct dm_event_fifos *fifos); void fini_fifos(struct dm_event_fifos *fifos); +int dm_event_get_version(struct dm_event_fifos *fifos, int *version); #endif /* __DMEVENTD_DOT_H__ */ diff --git a/daemons/dmeventd/libdevmapper-event.c b/daemons/dmeventd/libdevmapper-event.c index bc8ad99..1f8fbef 100644 --- a/daemons/dmeventd/libdevmapper-event.c +++ b/daemons/dmeventd/libdevmapper-event.c @@ -59,14 +59,10 @@ struct dm_event_handler *dm_event_handler_create(void) { struct dm_event_handler *dmevh = NULL; - if (!(dmevh = dm_malloc(sizeof(*dmevh)))) + if (!(dmevh = dm_zalloc(sizeof(*dmevh)))) { + log_error("Failed to allocate event handler."); return NULL; - - dmevh->dmeventd_path = NULL; - dmevh->dso = dmevh->dev_name = dmevh->uuid = NULL; - dmevh->major = dmevh->minor = 0; - dmevh->mask = 0; - dmevh->timeout = 0; + } return dmevh; } @@ -245,6 +241,10 @@ static int _daemon_read(struct dm_event_fifos *fifos, log_error("Unable to read from event server"); return 0; } + if ((ret == 0) && (i > 4) && !bytes) { + log_error("No input from event server."); + return 0; + } } if (ret < 1) { log_error("Unable to read from event server."); @@ -309,7 +309,7 @@ static int _daemon_write(struct dm_event_fifos *fifos, } if (ret == 0) break; - read(fifos->server, drainbuf, 127); + ret = read(fifos->server, drainbuf, 127); } while (bytes < size) { @@ -424,30 +424,27 @@ static int _start_daemon(char *dmeventd_path, struct dm_event_fifos *fifos) fifos->client = open(fifos->client_path, O_WRONLY | O_NONBLOCK); if (fifos->client >= 0) { /* server is running and listening */ - - close(fifos->client); + if (close(fifos->client)) + log_sys_error("close", fifos->client_path); return 1; } else if (errno != ENXIO) { /* problem */ - - log_error("%s: Can't open client fifo %s: %s", - __func__, fifos->client_path, strerror(errno)); - stack; + log_sys_error("open", fifos->client_path); return 0; } start_server: /* server is not running */ - if (!strncmp(DMEVENTD_PATH, "/", 1) && stat(DMEVENTD_PATH, &statbuf)) { - log_error("Unable to find dmeventd."); - return_0; + if ((args[0][0] == '/') && stat(args[0], &statbuf)) { + log_sys_error("stat", args[0]); + return 0; } pid = fork(); if (pid < 0) - log_error("Unable to fork."); + log_sys_error("fork", ""); else if (!pid) { execvp(args[0], args); @@ -477,25 +474,23 @@ int init_fifos(struct dm_event_fifos *fifos) /* Open the fifo used to read from the daemon. */ if ((fifos->server = open(fifos->server_path, O_RDWR)) < 0) { - log_error("%s: open server fifo %s", - __func__, fifos->server_path); - stack; + log_sys_error("open", fifos->server_path); return 0; } /* Lock out anyone else trying to do communication with the daemon. */ if (flock(fifos->server, LOCK_EX) < 0) { - log_error("%s: flock %s", __func__, fifos->server_path); - close(fifos->server); + log_sys_error("flock", fifos->server_path); + if (close(fifos->server)) + log_sys_error("close", fifos->server_path); return 0; } /* if ((fifos->client = open(fifos->client_path, O_WRONLY | O_NONBLOCK)) < 0) {*/ if ((fifos->client = open(fifos->client_path, O_RDWR | O_NONBLOCK)) < 0) { - log_error("%s: Can't open client fifo %s: %s", - __func__, fifos->client_path, strerror(errno)); - close(fifos->server); - stack; + log_sys_error("open", fifos->client_path); + if (close(fifos->server)) + log_sys_error("close", fifos->server_path); return 0; } @@ -523,8 +518,10 @@ void fini_fifos(struct dm_event_fifos *fifos) if (flock(fifos->server, LOCK_UN)) log_error("flock unlock %s", fifos->server_path); - close(fifos->client); - close(fifos->server); + if (close(fifos->client)) + log_sys_error("close", fifos->client_path); + if (close(fifos->server)) + log_sys_error("close", fifos->server_path); } /* Get uuid of a device */ @@ -538,34 +535,46 @@ static struct dm_task *_get_device_info(const struct dm_event_handler *dmevh) return NULL; } - if (dmevh->uuid) - dm_task_set_uuid(dmt, dmevh->uuid); - else if (dmevh->dev_name) - dm_task_set_name(dmt, dmevh->dev_name); - else if (dmevh->major && dmevh->minor) { - dm_task_set_major(dmt, dmevh->major); - dm_task_set_minor(dmt, dmevh->minor); - } + if (dmevh->uuid) { + if (!dm_task_set_uuid(dmt, dmevh->uuid)) + goto_bad; + } else if (dmevh->dev_name) { + if (!dm_task_set_name(dmt, dmevh->dev_name)) + goto_bad; + } else if (dmevh->major && dmevh->minor) { + if (!dm_task_set_major(dmt, dmevh->major) || + !dm_task_set_minor(dmt, dmevh->minor)) + goto_bad; + } /* FIXME Add name or uuid or devno to messages */ if (!dm_task_run(dmt)) { log_error("_get_device_info: dm_task_run() failed"); - goto failed; + goto bad; } if (!dm_task_get_info(dmt, &info)) { log_error("_get_device_info: failed to get info for device"); - goto failed; + goto bad; } if (!info.exists) { - log_error("_get_device_info: device not found"); - goto failed; + log_error("_get_device_info: %s%s%s%.0d%s%.0d%s%s: device not found", + dmevh->uuid ? : "", + (!dmevh->uuid && dmevh->dev_name) ? dmevh->dev_name : "", + (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? "(" : "", + (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? dmevh->major : 0, + (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? ":" : "", + (!dmevh->uuid && !dmevh->dev_name && dmevh->minor > 0) ? dmevh->minor : 0, + (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) && dmevh->minor == 0 ? "0" : "", + (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? ") " : ""); + goto bad; } + return dmt; -failed: + bad: dm_task_destroy(dmt); return NULL; } @@ -715,17 +724,18 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next) uuid = dm_task_get_uuid(dmt); - if (!(ret = _do_event(next ? DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE : - DM_EVENT_CMD_GET_REGISTERED_DEVICE, dmevh->dmeventd_path, - &msg, dmevh->dso, uuid, dmevh->mask, 0))) { - /* FIXME this will probably horribly break if we get - ill-formatted reply */ - ret = _parse_message(&msg, &reply_dso, &reply_uuid, &reply_mask); - } else { + if (_do_event(next ? DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE : + DM_EVENT_CMD_GET_REGISTERED_DEVICE, dmevh->dmeventd_path, + &msg, dmevh->dso, uuid, dmevh->mask, 0)) { + log_debug("%s: device not registered.", dm_task_get_name(dmt)); ret = -ENOENT; goto fail; } + /* FIXME this will probably horribly break if we get + ill-formatted reply */ + ret = _parse_message(&msg, &reply_dso, &reply_uuid, &reply_mask); + dm_task_destroy(dmt); dmt = NULL; @@ -733,6 +743,10 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next) msg.data = NULL; _dm_event_handler_clear_dev_info(dmevh); + if (!reply_uuid) { + ret = -ENXIO; /* dmeventd probably gave us bogus uuid back */ + goto fail; + } dmevh->uuid = dm_strdup(reply_uuid); if (!dmevh->uuid) { ret = -ENOMEM; @@ -781,6 +795,36 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next) return ret; } +/* + * You can (and have to) call this at the stage of the protocol where + * daemon_talk(fifos, &msg, DM_EVENT_CMD_HELLO, NULL, NULL, 0, 0) + * + * would be normally sent. This call will parse the version reply from + * dmeventd, in addition to above call. It is not safe to call this at any + * other place in the protocol. + * + * This is an internal function, not exposed in the public API. + */ + +int dm_event_get_version(struct dm_event_fifos *fifos, int *version) { + char *p; + struct dm_event_daemon_message msg = { 0, 0, NULL }; + + if (daemon_talk(fifos, &msg, DM_EVENT_CMD_HELLO, NULL, NULL, 0, 0)) + return 0; + p = msg.data; + *version = 0; + + p = strchr(p, ' '); /* Message ID */ + if (!p) return 0; + p = strchr(p + 1, ' '); /* HELLO */ + if (!p) return 0; + p = strchr(p + 1, ' '); /* HELLO, once more */ + if (p) + *version = atoi(p); + return 1; +} + #if 0 /* left out for now */ static char *_skip_string(char *src, const int delimiter) diff --git a/daemons/dmeventd/libdevmapper-event.h b/daemons/dmeventd/libdevmapper-event.h index 0de20c1..7ce3f39 100644 --- a/daemons/dmeventd/libdevmapper-event.h +++ b/daemons/dmeventd/libdevmapper-event.h @@ -46,6 +46,7 @@ enum dm_event_mask { }; #define DM_EVENT_ALL_ERRORS DM_EVENT_ERROR_MASK +#define DM_EVENT_PROTOCOL_VERSION 1 struct dm_event_handler; @@ -81,6 +82,7 @@ void dm_event_handler_set_timeout(struct dm_event_handler *dmevh, int timeout); /* * Specify mask for events to monitor. */ +// FIXME misuse of bitmask as enum void dm_event_handler_set_event_mask(struct dm_event_handler *dmevh, enum dm_event_mask evmask); @@ -90,6 +92,7 @@ const char *dm_event_handler_get_uuid(const struct dm_event_handler *dmevh); int dm_event_handler_get_major(const struct dm_event_handler *dmevh); int dm_event_handler_get_minor(const struct dm_event_handler *dmevh); int dm_event_handler_get_timeout(const struct dm_event_handler *dmevh); +// FIXME misuse of bitmask as enum enum dm_event_mask dm_event_handler_get_event_mask(const struct dm_event_handler *dmevh); /* FIXME Review interface (what about this next thing?) */ @@ -103,6 +106,7 @@ int dm_event_unregister_handler(const struct dm_event_handler *dmevh); /* Prototypes for DSO interface, see dmeventd.c, struct dso_data for detailed descriptions. */ +// FIXME misuse of bitmask as enum void process_event(struct dm_task *dmt, enum dm_event_mask evmask, void **user); int register_device(const char *device_name, const char *uuid, int major, int minor, void **user); int unregister_device(const char *device_name, const char *uuid, int major, diff --git a/daemons/dmeventd/plugins/Makefile.in b/daemons/dmeventd/plugins/Makefile.in index 45176ad..b26e6d8 100644 --- a/daemons/dmeventd/plugins/Makefile.in +++ b/daemons/dmeventd/plugins/Makefile.in @@ -1,6 +1,6 @@ # # Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +# Copyright (C) 2004-2005, 2011 Red Hat, Inc. All rights reserved. # # This file is part of LVM2. # @@ -16,10 +16,31 @@ srcdir = @srcdir@ top_srcdir = @top_srcdir@ top_builddir = @top_builddir@ -SUBDIRS += lvm2 mirror snapshot +SUBDIRS += lvm2 + +ifneq ("@MIRRORS@", "none") + SUBDIRS += mirror +endif + +ifneq ("@SNAPSHOTS@", "none") + SUBDIRS += snapshot +endif + +ifneq ("@RAID@", "none") + SUBDIRS += raid +endif + +ifneq ("@THIN@", "none") + SUBDIRS += thin +endif + +ifeq ($(MAKECMDGOALS),distclean) + SUBDIRS = lvm2 mirror snapshot raid thin +endif include $(top_builddir)/make.tmpl -mirror: lvm2 snapshot: lvm2 - +mirror: lvm2 +raid: lvm2 +thin: lvm2 diff --git a/daemons/dmeventd/plugins/lvm2/.exported_symbols b/daemons/dmeventd/plugins/lvm2/.exported_symbols index ebe3d05..646e4cf 100644 --- a/daemons/dmeventd/plugins/lvm2/.exported_symbols +++ b/daemons/dmeventd/plugins/lvm2/.exported_symbols @@ -4,3 +4,4 @@ dmeventd_lvm2_lock dmeventd_lvm2_unlock dmeventd_lvm2_pool dmeventd_lvm2_run +dmeventd_lvm2_command diff --git a/daemons/dmeventd/plugins/lvm2/Makefile.in b/daemons/dmeventd/plugins/lvm2/Makefile.in index 46247aa..fcb2a0a 100644 --- a/daemons/dmeventd/plugins/lvm2/Makefile.in +++ b/daemons/dmeventd/plugins/lvm2/Makefile.in @@ -1,5 +1,5 @@ # -# Copyright (C) 2010 Red Hat, Inc. All rights reserved. +# Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved. # # This file is part of LVM2. # @@ -24,10 +24,8 @@ LIB_VERSION = $(LIB_VERSION_LVM) include $(top_builddir)/make.tmpl -LIBS += @LVM2CMD_LIB@ -ldevmapper $(PTHREAD_LIBS) +LIBS += @LVM2CMD_LIB@ -ldevmapper $(PTHREAD_LIBS) $(DAEMON_LIBS) install_lvm2: install_lib_shared install: install_lvm2 - -DISTCLEAN_TARGETS += .exported_symbols_generated diff --git a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c index 937d81d..5d5a46b 100644 --- a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c +++ b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c @@ -16,8 +16,6 @@ #include "log.h" #include "lvm2cmd.h" -#include "errors.h" -#include "libdevmapper-event.h" #include "dmeventd_lvm.h" #include <pthread.h> @@ -82,10 +80,7 @@ static void _temporary_log_fn(int level, void dmeventd_lvm2_lock(void) { - if (pthread_mutex_trylock(&_event_mutex)) { - syslog(LOG_NOTICE, "Another thread is handling an event. Waiting..."); - pthread_mutex_lock(&_event_mutex); - } + pthread_mutex_lock(&_event_mutex); } void dmeventd_lvm2_unlock(void) @@ -113,6 +108,7 @@ int dmeventd_lvm2_init(void) _mem_pool = NULL; goto out; } + lvm2_disable_dmeventd_monitoring(_lvm_handle); /* FIXME Temporary: move to dmeventd core */ lvm2_run(_lvm_handle, "_memlock_inc"); } @@ -150,3 +146,31 @@ int dmeventd_lvm2_run(const char *cmdline) return lvm2_run(_lvm_handle, cmdline); } +int dmeventd_lvm2_command(struct dm_pool *mem, char *buffer, size_t size, + const char *cmd, const char *device) +{ + char *vg = NULL, *lv = NULL, *layer; + int r; + + if (!dm_split_lvm_name(mem, device, &vg, &lv, &layer)) { + syslog(LOG_ERR, "Unable to determine VG name from %s.\n", + device); + return 0; + } + + /* strip off the mirror component designations */ + layer = strstr(lv, "_mlog"); + if (layer) + *layer = '\0'; + + r = dm_snprintf(buffer, size, "%s %s/%s", cmd, vg, lv); + + dm_pool_free(mem, vg); + + if (r < 0) { + syslog(LOG_ERR, "Unable to form LVM command. (too long).\n"); + return 0; + } + + return 1; +} diff --git a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h index 8efcb9b..1960c71 100644 --- a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h +++ b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h @@ -36,4 +36,7 @@ void dmeventd_lvm2_unlock(void); struct dm_pool *dmeventd_lvm2_pool(void); +int dmeventd_lvm2_command(struct dm_pool *mem, char *buffer, size_t size, + const char *cmd, const char *device); + #endif /* _DMEVENTD_LVMWRAP_H */ diff --git a/daemons/dmeventd/plugins/mirror/Makefile.in b/daemons/dmeventd/plugins/mirror/Makefile.in index 7f80629..85b33c9 100644 --- a/daemons/dmeventd/plugins/mirror/Makefile.in +++ b/daemons/dmeventd/plugins/mirror/Makefile.in @@ -1,6 +1,6 @@ # # Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004-2005, 2008-2010 Red Hat, Inc. All rights reserved. +# Copyright (C) 2004-2005, 2008-2011 Red Hat, Inc. All rights reserved. # # This file is part of LVM2. # @@ -30,10 +30,8 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow include $(top_builddir)/make.tmpl -LIBS += -ldevmapper-event-lvm2 -ldevmapper +LIBS += -ldevmapper-event-lvm2 -ldevmapper $(DAEMON_LIBS) install_lvm2: install_dm_plugin install: install_lvm2 - -DISTCLEAN_TARGETS += .exported_symbols_generated diff --git a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c index 3e97d87..e59feb4 100644 --- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c +++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved. + * Copyright (C) 2005-2012 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -18,6 +18,7 @@ #include "errors.h" #include "libdevmapper-event.h" #include "dmeventd_lvm.h" +#include "defaults.h" #include <syslog.h> /* FIXME Replace syslog with multilog */ /* FIXME Missing openlog? */ @@ -81,7 +82,8 @@ static int _get_mirror_event(char *params) if (!dm_split_words(params, 1, 0, &p)) goto out_parse; - if (!(num_devs = atoi(p))) + if (!(num_devs = atoi(p)) || + (num_devs > DEFAULT_MIRROR_MAX_IMAGES) || (num_devs < 0)) goto out_parse; p += strlen(p) + 1; @@ -90,6 +92,7 @@ static int _get_mirror_event(char *params) if (!args || dm_split_words(p, num_devs + 7, 0, args) < num_devs + 5) goto out_parse; + /* FIXME: Code differs from lib/mirror/mirrored.c */ dev_status_str = args[2 + num_devs]; log_argc = atoi(args[3 + num_devs]); log_status_str = args[3 + num_devs + log_argc]; @@ -121,7 +124,7 @@ static int _get_mirror_event(char *params) out: dm_free(args); return r; - + out_parse: dm_free(args); syslog(LOG_ERR, "Unable to parse mirror status string."); @@ -133,32 +136,15 @@ static int _remove_failed_devices(const char *device) int r; #define CMD_SIZE 256 /* FIXME Use system restriction */ char cmd_str[CMD_SIZE]; - char *vg = NULL, *lv = NULL, *layer = NULL; - - if (strlen(device) > 200) /* FIXME Use real restriction */ - return -ENAMETOOLONG; /* FIXME These return code distinctions are not used so remove them! */ - - if (!dm_split_lvm_name(dmeventd_lvm2_pool(), device, &vg, &lv, &layer)) { - syslog(LOG_ERR, "Unable to determine VG name from %s.", - device); - return -ENOMEM; /* FIXME Replace with generic error return - reason for failure has already got logged */ - } - - /* strip off the mirror component designations */ - layer = strstr(lv, "_mlog"); - if (layer) - *layer = '\0'; - /* FIXME Is any sanity-checking required on %s? */ - if (CMD_SIZE <= snprintf(cmd_str, CMD_SIZE, "lvconvert --config devices{ignore_suspended_devices=1} --repair --use-policies %s/%s", vg, lv)) { - /* this error should be caught above, but doesn't hurt to check again */ - syslog(LOG_ERR, "Unable to form LVM command: Device name too long."); + if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), cmd_str, sizeof(cmd_str), + "lvconvert --config devices{ignore_suspended_devices=1} " + "--repair --use-policies", device)) return -ENAMETOOLONG; /* FIXME Replace with generic error return - reason for failure has already got logged */ - } r = dmeventd_lvm2_run(cmd_str); - syslog(LOG_INFO, "Repair of mirrored LV %s/%s %s.", vg, lv, + syslog(LOG_INFO, "Repair of mirrored device %s %s.", device, (r == ECMD_PROCESSED) ? "finished successfully" : "failed"); return (r == ECMD_PROCESSED) ? 0 : -1; @@ -227,9 +213,12 @@ int register_device(const char *device, int minor __attribute__((unused)), void **unused __attribute__((unused))) { - int r = dmeventd_lvm2_init(); + if (!dmeventd_lvm2_init()) + return 0; + syslog(LOG_INFO, "Monitoring mirror device %s for events.", device); - return r; + + return 1; } int unregister_device(const char *device, @@ -241,5 +230,6 @@ int unregister_device(const char *device, syslog(LOG_INFO, "No longer monitoring mirror device %s for events.", device); dmeventd_lvm2_exit(); + return 1; } diff --git a/daemons/dmeventd/plugins/raid/.exported_symbols b/daemons/dmeventd/plugins/raid/.exported_symbols new file mode 100644 index 0000000..b88c705 --- /dev/null +++ b/daemons/dmeventd/plugins/raid/.exported_symbols @@ -0,0 +1,3 @@ +process_event +register_device +unregister_device diff --git a/daemons/dmeventd/plugins/raid/Makefile.in b/daemons/dmeventd/plugins/raid/Makefile.in new file mode 100644 index 0000000..a6b7788 --- /dev/null +++ b/daemons/dmeventd/plugins/raid/Makefile.in @@ -0,0 +1,36 @@ +# +# Copyright (C) 2011 Red Hat, Inc. All rights reserved. +# +# This file is part of LVM2. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +top_builddir = @top_builddir@ + +INCLUDES += -I$(top_srcdir)/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2 +CLDFLAGS += -L$(top_builddir)/tools -L$(top_builddir)/daemons/dmeventd/plugins/lvm2 + +SOURCES = dmeventd_raid.c + +LIB_NAME = libdevmapper-event-lvm2raid +LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX) +LIB_VERSION = $(LIB_VERSION_LVM) + +CFLOW_LIST = $(SOURCES) +CFLOW_LIST_TARGET = $(LIB_NAME).cflow + +include $(top_builddir)/make.tmpl + +LIBS += -ldevmapper-event-lvm2 -ldevmapper + +install_lvm2: install_dm_plugin + +install: install_lvm2 diff --git a/daemons/dmeventd/plugins/raid/dmeventd_raid.c b/daemons/dmeventd/plugins/raid/dmeventd_raid.c new file mode 100644 index 0000000..a3ecdc1 --- /dev/null +++ b/daemons/dmeventd/plugins/raid/dmeventd_raid.c @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "lib.h" + +#include "lvm2cmd.h" +#include "errors.h" +#include "libdevmapper-event.h" +#include "dmeventd_lvm.h" + +#include <syslog.h> /* FIXME Replace syslog with multilog */ +/* FIXME Missing openlog? */ +/* FIXME Replace most syslogs with log_error() style messages and add complete context. */ +/* FIXME Reformat to 80 char lines. */ + +/* + * run_repair is a close copy to + * plugins/mirror/dmeventd_mirror.c:_remove_failed_devices() + */ +static int run_repair(const char *device) +{ + int r; +#define CMD_SIZE 256 /* FIXME Use system restriction */ + char cmd_str[CMD_SIZE]; + + if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), cmd_str, sizeof(cmd_str), + "lvconvert --config devices{ignore_suspended_devices=1} " + "--repair --use-policies", device)) + return -1; + + r = dmeventd_lvm2_run(cmd_str); + + if (r != ECMD_PROCESSED) + syslog(LOG_INFO, "Repair of RAID device %s failed.", device); + + return (r == ECMD_PROCESSED) ? 0 : -1; +} + +static int _process_raid_event(char *params, const char *device) +{ + int i, n, failure = 0; + char *p, *a[4]; + char *raid_type; + char *num_devices; + char *health_chars; + char *resync_ratio; + + /* + * RAID parms: <raid_type> <#raid_disks> \ + * <health chars> <resync ratio> + */ + if (!dm_split_words(params, 4, 0, a)) { + syslog(LOG_ERR, "Failed to process status line for %s\n", + device); + return -EINVAL; + } + raid_type = a[0]; + num_devices = a[1]; + health_chars = a[2]; + resync_ratio = a[3]; + + if (!(n = atoi(num_devices))) { + syslog(LOG_ERR, "Failed to parse number of devices for %s: %s", + device, num_devices); + return -EINVAL; + } + + for (i = 0; i < n; i++) { + switch (health_chars[i]) { + case 'A': + /* Device is 'A'live and well */ + case 'a': + /* Device is 'a'live, but not yet in-sync */ + break; + case 'D': + syslog(LOG_ERR, + "Device #%d of %s array, %s, has failed.", + i, raid_type, device); + failure++; + break; + default: + /* Unhandled character returned from kernel */ + break; + } + if (failure) + return run_repair(device); + } + + p = strstr(resync_ratio, "/"); + if (!p) { + syslog(LOG_ERR, "Failed to parse resync_ratio for %s: %s", + device, resync_ratio); + return -EINVAL; + } + p[0] = '\0'; + syslog(LOG_INFO, "%s array, %s, is %s in-sync.", + raid_type, device, strcmp(resync_ratio, p+1) ? "not" : "now"); + + return 0; +} + +void process_event(struct dm_task *dmt, + enum dm_event_mask event __attribute__((unused)), + void **unused __attribute__((unused))) +{ + void *next = NULL; + uint64_t start, length; + char *target_type = NULL; + char *params; + const char *device = dm_task_get_name(dmt); + + dmeventd_lvm2_lock(); + + do { + next = dm_get_next_target(dmt, next, &start, &length, + &target_type, ¶ms); + + if (!target_type) { + syslog(LOG_INFO, "%s mapping lost.", device); + continue; + } + + if (strcmp(target_type, "raid")) { + syslog(LOG_INFO, "%s has non-raid portion.", device); + continue; + } + + if (_process_raid_event(params, device)) + syslog(LOG_ERR, "Failed to process event for %s", + device); + } while (next); + + dmeventd_lvm2_unlock(); +} + +int register_device(const char *device, + const char *uuid __attribute__((unused)), + int major __attribute__((unused)), + int minor __attribute__((unused)), + void **unused __attribute__((unused))) +{ + if (!dmeventd_lvm2_init()) + return 0; + + syslog(LOG_INFO, "Monitoring RAID device %s for events.", device); + + return 1; +} + +int unregister_device(const char *device, + const char *uuid __attribute__((unused)), + int major __attribute__((unused)), + int minor __attribute__((unused)), + void **unused __attribute__((unused))) +{ + syslog(LOG_INFO, "No longer monitoring RAID device %s for events.", + device); + dmeventd_lvm2_exit(); + + return 1; +} diff --git a/daemons/dmeventd/plugins/snapshot/Makefile.in b/daemons/dmeventd/plugins/snapshot/Makefile.in index b8414b2..a4cff15 100644 --- a/daemons/dmeventd/plugins/snapshot/Makefile.in +++ b/daemons/dmeventd/plugins/snapshot/Makefile.in @@ -1,6 +1,6 @@ # # Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. +# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. # # This file is part of the LVM2. # @@ -26,10 +26,8 @@ LIB_VERSION = $(LIB_VERSION_LVM) include $(top_builddir)/make.tmpl -LIBS += -ldevmapper-event-lvm2 -ldevmapper +LIBS += -ldevmapper-event-lvm2 -ldevmapper $(DAEMON_LIBS) install_lvm2: install_dm_plugin install: install_lvm2 - -DISTCLEAN_TARGETS += .exported_symbols_generated diff --git a/daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c b/daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c index 6fc9f56..205218a 100644 --- a/daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c +++ b/daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 Red Hat, Inc. All rights reserved. + * Copyright (C) 2007-2011 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -19,8 +19,6 @@ #include "libdevmapper-event.h" #include "dmeventd_lvm.h" -#include "lvm-string.h" - #include <sys/wait.h> #include <syslog.h> /* FIXME Replace syslog with multilog */ /* FIXME Missing openlog? */ @@ -40,6 +38,12 @@ struct snap_status { int max; }; +struct dso_state { + int percent_check; + int known_size; + char cmd_str[1024]; +}; + /* FIXME possibly reconcile this with target_percent when we gain access to regular LVM library here. */ static void _parse_snapshot_params(char *params, struct snap_status *status) @@ -115,26 +119,9 @@ static int _run(const char *cmd, ...) return 1; /* all good */ } -static int _extend(const char *device) +static int _extend(const char *cmd) { - char *vg = NULL, *lv = NULL, *layer = NULL; - char cmd_str[1024]; - int r = 0; - - if (!dm_split_lvm_name(dmeventd_lvm2_pool(), device, &vg, &lv, &layer)) { - syslog(LOG_ERR, "Unable to determine VG name from %s.", device); - return 0; - } - if (sizeof(cmd_str) <= snprintf(cmd_str, sizeof(cmd_str), - "lvextend --use-policies %s/%s", vg, lv)) { - syslog(LOG_ERR, "Unable to form LVM command: Device name too long."); - return 0; - } - - r = dmeventd_lvm2_run(cmd_str); - syslog(LOG_INFO, "Extension of snapshot %s/%s %s.", vg, lv, - (r == ECMD_PROCESSED) ? "finished successfully" : "failed"); - return r == ECMD_PROCESSED; + return dmeventd_lvm2_run(cmd) == ECMD_PROCESSED; } static void _umount(const char *device, int major, int minor) @@ -155,7 +142,8 @@ static void _umount(const char *device, int major, int minor) break; /* eof, likely */ /* words[0] is the mount point and words[1] is the device path */ - dm_split_words(buffer, 3, 0, words); + if (dm_split_words(buffer, 3, 0, words) < 2) + continue; /* find the major/minor of the device */ if (stat(words[0], &st)) @@ -164,9 +152,9 @@ static void _umount(const char *device, int major, int minor) if (S_ISBLK(st.st_mode) && major(st.st_rdev) == major && minor(st.st_rdev) == minor) { - syslog(LOG_ERR, "Unmounting invalid snapshot %s from %s.", device, words[1]); + syslog(LOG_ERR, "Unmounting invalid snapshot %s from %s.\n", device, words[1]); if (!_run(UMOUNT_COMMAND, "-fl", words[1], NULL)) - syslog(LOG_ERR, "Failed to umount snapshot %s from %s: %s.", + syslog(LOG_ERR, "Failed to umount snapshot %s from %s: %s.\n", device, words[1], strerror(errno)); } } @@ -185,10 +173,11 @@ void process_event(struct dm_task *dmt, char *params; struct snap_status status = { 0 }; const char *device = dm_task_get_name(dmt); - int percent, *percent_check = (int*)private; + int percent; + struct dso_state *state = *private; /* No longer monitoring, waiting for remove */ - if (!*percent_check) + if (!state->percent_check) return; dmeventd_lvm2_lock(); @@ -200,7 +189,6 @@ void process_event(struct dm_task *dmt, _parse_snapshot_params(params, &status); if (status.invalid) { - syslog(LOG_ERR, "Trying to umount invalid snapshot %s...\n", device); struct dm_info info; if (dm_task_get_info(dmt, &info)) { dmeventd_lvm2_unlock(); @@ -209,27 +197,35 @@ void process_event(struct dm_task *dmt, } /* else; too bad, but this is best-effort thing... */ } + /* Snapshot size had changed. Clear the threshold. */ + if (state->known_size != status.max) { + state->percent_check = CHECK_MINIMUM; + state->known_size = status.max; + } + /* * If the snapshot has been invalidated or we failed to parse * the status string. Report the full status string to syslog. */ if (status.invalid || !status.max) { syslog(LOG_ERR, "Snapshot %s changed state to: %s\n", device, params); - *percent_check = 0; + state->percent_check = 0; goto out; } percent = 100 * status.used / status.max; - if (percent >= *percent_check) { + if (percent >= state->percent_check) { /* Usage has raised more than CHECK_STEP since the last time. Run actions. */ - *percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP; + state->percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP; + if (percent >= WARNING_THRESH) /* Print a warning to syslog. */ syslog(LOG_WARNING, "Snapshot %s is now %i%% full.\n", device, percent); /* Try to extend the snapshot, in accord with user-set policies */ - if (!_extend(device)) - syslog(LOG_ERR, "Failed to extend snapshot %s.", device); + if (!_extend(state->cmd_str)) + syslog(LOG_ERR, "Failed to extend snapshot %s.\n", device); } + out: dmeventd_lvm2_unlock(); } @@ -240,23 +236,46 @@ int register_device(const char *device, int minor __attribute__((unused)), void **private) { - int *percent_check = (int*)private; - int r = dmeventd_lvm2_init(); + struct dso_state *state; + + if (!dmeventd_lvm2_init()) + goto out; - *percent_check = CHECK_MINIMUM; + if (!(state = dm_zalloc(sizeof(*state)))) + goto bad; + + if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), + state->cmd_str, sizeof(state->cmd_str), + "lvextend --use-policies", device)) + goto bad; + + state->percent_check = CHECK_MINIMUM; + state->known_size = 0; + *private = state; syslog(LOG_INFO, "Monitoring snapshot %s\n", device); - return r; + + return 1; +bad: + dm_free(state); + dmeventd_lvm2_exit(); +out: + syslog(LOG_ERR, "Failed to monitor snapshot %s.\n", device); + + return 0; } int unregister_device(const char *device, const char *uuid __attribute__((unused)), int major __attribute__((unused)), int minor __attribute__((unused)), - void **unused __attribute__((unused))) + void **private) { - syslog(LOG_INFO, "No longer monitoring snapshot %s\n", - device); + struct dso_state *state = *private; + + syslog(LOG_INFO, "No longer monitoring snapshot %s\n", device); + dm_free(state); dmeventd_lvm2_exit(); + return 1; } diff --git a/daemons/dmeventd/plugins/thin/.exported_symbols b/daemons/dmeventd/plugins/thin/.exported_symbols new file mode 100644 index 0000000..b88c705 --- /dev/null +++ b/daemons/dmeventd/plugins/thin/.exported_symbols @@ -0,0 +1,3 @@ +process_event +register_device +unregister_device diff --git a/daemons/dmeventd/plugins/thin/Makefile.in b/daemons/dmeventd/plugins/thin/Makefile.in new file mode 100644 index 0000000..e964ab5 --- /dev/null +++ b/daemons/dmeventd/plugins/thin/Makefile.in @@ -0,0 +1,36 @@ +# +# Copyright (C) 2011 Red Hat, Inc. All rights reserved. +# +# This file is part of LVM2. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +top_builddir = @top_builddir@ + +INCLUDES += -I$(top_srcdir)/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2 +CLDFLAGS += -L$(top_builddir)/tools -L$(top_builddir)/daemons/dmeventd/plugins/lvm2 + +SOURCES = dmeventd_thin.c + +LIB_NAME = libdevmapper-event-lvm2thin +LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX) +LIB_VERSION = $(LIB_VERSION_LVM) + +CFLOW_LIST = $(SOURCES) +CFLOW_LIST_TARGET = $(LIB_NAME).cflow + +include $(top_builddir)/make.tmpl + +LIBS += -ldevmapper-event-lvm2 -ldevmapper + +install_lvm2: install_dm_plugin + +install: install_lvm2 diff --git a/daemons/dmeventd/plugins/thin/dmeventd_thin.c b/daemons/dmeventd/plugins/thin/dmeventd_thin.c new file mode 100644 index 0000000..a1af4c0 --- /dev/null +++ b/daemons/dmeventd/plugins/thin/dmeventd_thin.c @@ -0,0 +1,447 @@ +/* + * Copyright (C) 2011-2012 Red Hat, Inc. All rights reserved. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "lib.h" + +#include "lvm2cmd.h" +#include "errors.h" +#include "libdevmapper-event.h" +#include "dmeventd_lvm.h" + +#include <sys/wait.h> +#include <syslog.h> /* FIXME Replace syslog with multilog */ +/* FIXME Missing openlog? */ + +/* First warning when thin is 80% full. */ +#define WARNING_THRESH 80 +/* Run a check every 5%. */ +#define CHECK_STEP 5 +/* Do not bother checking thins less than 50% full. */ +#define CHECK_MINIMUM 50 + +#define UMOUNT_COMMAND "/bin/umount" + +#define THIN_DEBUG 0 + +struct dso_state { + struct dm_pool *mem; + int metadata_percent_check; + int data_percent_check; + uint64_t known_metadata_size; + uint64_t known_data_size; + char cmd_str[1024]; +}; + + +/* TODO - move this mountinfo code into library to be reusable */ +#ifdef linux +# include "kdev_t.h" +#else +# define MAJOR(x) major((x)) +# define MINOR(x) minor((x)) +# define MKDEV(x,y) makedev((x),(y)) +#endif + +/* Macros to make string defines */ +#define TO_STRING_EXP(A) #A +#define TO_STRING(A) TO_STRING_EXP(A) + +static int _is_octal(int a) +{ + return (((a) & ~7) == '0'); +} + +/* Convert mangled mountinfo into normal ASCII string */ +static void _unmangle_mountinfo_string(const char *src, char *buf) +{ + if (!src) + return; + + while (*src) { + if ((*src == '\\') && + _is_octal(src[1]) && _is_octal(src[2]) && _is_octal(src[3])) { + *buf++ = 64 * (src[1] & 7) + 8 * (src[2] & 7) + (src[3] & 7); + src += 4; + } else + *buf++ = *src++; + } + *buf = '\0'; +} + +/* Parse one line of mountinfo */ +static int _parse_mountinfo_line(const char *line, unsigned *maj, unsigned *min, char *buf) +{ + char root[PATH_MAX + 1]; + char target[PATH_MAX + 1]; + + /* TODO: maybe detect availability of %ms glib support ? */ + if (sscanf(line, "%*u %*u %u:%u %" TO_STRING(PATH_MAX) + "s %" TO_STRING(PATH_MAX) "s", + maj, min, root, target) < 4) + return 0; + + _unmangle_mountinfo_string(target, buf); + +#if THIN_DEBUG + syslog(LOG_DEBUG, "Mounted %u:%u %s", *maj, *min, buf); +#endif + + return 1; +} + +/* Get dependencies for device, and try to find matching device */ +static int _has_deps(const char *name, int tp_major, int tp_minor, int *dev_minor) +{ + struct dm_task *dmt; + const struct dm_deps *deps; + struct dm_info info; + int major, minor; + int r = 0; + + if (!(dmt = dm_task_create(DM_DEVICE_DEPS))) + return 0; + + if (!dm_task_set_name(dmt, name)) + goto out; + + if (!dm_task_no_open_count(dmt)) + goto out; + + if (!dm_task_run(dmt)) + goto out; + + if (!dm_task_get_info(dmt, &info)) + goto out; + + if (!(deps = dm_task_get_deps(dmt))) + goto out; + + if (!info.exists || deps->count != 1) + goto out; + + major = (int) MAJOR(deps->device[0]); + minor = (int) MINOR(deps->device[0]); + if ((major != tp_major) || (minor != tp_minor)) + goto out; + + *dev_minor = info.minor; + +#if THIN_DEBUG + { + char dev_name[PATH_MAX]; + if (dm_device_get_name(major, minor, 0, dev_name, sizeof(dev_name))) + syslog(LOG_DEBUG, "Found %s (%u:%u) depends on %s", + name, major, *dev_minor, dev_name); + } +#endif + r = 1; +out: + dm_task_destroy(dmt); + + return r; +} + +/* Get all active devices */ +static int _find_all_devs(dm_bitset_t bs, int tp_major, int tp_minor) +{ + struct dm_task *dmt; + struct dm_names *names; + unsigned next = 0; + int minor, r = 1; + + if (!(dmt = dm_task_create(DM_DEVICE_LIST))) + return 0; + + if (!dm_task_run(dmt)) { + r = 0; + goto out; + } + + if (!(names = dm_task_get_names(dmt))) { + r = 0; + goto out; + } + + if (!names->dev) + goto out; + + do { + names = (struct dm_names *)((char *) names + next); + if (_has_deps(names->name, tp_major, tp_minor, &minor)) + dm_bit_set(bs, minor); + next = names->next; + } while (next); + +out: + dm_task_destroy(dmt); + + return r; +} + +static int _extend(struct dso_state *state) +{ +#if THIN_DEBUG + syslog(LOG_INFO, "dmeventd executes: %s.\n", state->cmd_str); +#endif + return (dmeventd_lvm2_run(state->cmd_str) == ECMD_PROCESSED); +} + +static int _run(const char *cmd, ...) +{ + va_list ap; + int argc = 1; /* for argv[0], i.e. cmd */ + int i = 0; + const char **argv; + pid_t pid = fork(); + int status; + + if (pid == 0) { /* child */ + va_start(ap, cmd); + while (va_arg(ap, const char *)) + ++argc; + va_end(ap); + + /* + 1 for the terminating NULL */ + argv = alloca(sizeof(const char *) * (argc + 1)); + + argv[0] = cmd; + va_start(ap, cmd); + while ((argv[++i] = va_arg(ap, const char *))); + va_end(ap); + + execvp(cmd, (char **)argv); + syslog(LOG_ERR, "Failed to execute %s: %s.\n", cmd, strerror(errno)); + exit(127); + } + + if (pid > 0) { /* parent */ + if (waitpid(pid, &status, 0) != pid) + return 0; /* waitpid failed */ + if (!WIFEXITED(status) || WEXITSTATUS(status)) + return 0; /* the child failed */ + } + + if (pid < 0) + return 0; /* fork failed */ + + return 1; /* all good */ +} + +/* + * Find all thin pool users and try to umount them. + * TODO: work with read-only thin pool support + */ +static void _umount(struct dm_task *dmt, const char *device) +{ + static const char mountinfo[] = "/proc/self/mountinfo"; + static const size_t MINORS = 4096; + FILE *minfo; + char buffer[4096]; + char target[PATH_MAX]; + struct dm_info info; + unsigned maj, min; + dm_bitset_t minors; /* Bitset for active thin pool minors */ + + if (!dm_task_get_info(dmt, &info)) + return; + + dmeventd_lvm2_unlock(); + + if (!(minors = dm_bitset_create(NULL, MINORS))) { + syslog(LOG_ERR, "Failed to allocate bitset. Not unmounting %s.\n", device); + goto out; + } + + if (!(minfo = fopen(mountinfo, "r"))) { + syslog(LOG_ERR, "Could not read %s. Not umounting %s.\n", mountinfo, device); + goto out; + } + + if (!_find_all_devs(minors, info.major, info.minor)) { + syslog(LOG_ERR, "Failed to detect mounted volumes for %s.\n", device); + goto out; + } + + while (!feof(minfo)) { + /* read mountinfo line */ + if (!fgets(buffer, sizeof(buffer), minfo)) + break; /* eof, likely */ + + if (_parse_mountinfo_line(buffer, &maj, &min, target) && + (maj == info.major) && dm_bit(minors, min)) { + syslog(LOG_INFO, "Unmounting thin volume %s from %s.\n", + device, target); + if (!_run(UMOUNT_COMMAND, "-fl", target, NULL)) + syslog(LOG_ERR, "Failed to umount thin %s from %s: %s.\n", + device, target, strerror(errno)); + } + } + + if (fclose(minfo)) + syslog(LOG_ERR, "Failed to close %s\n", mountinfo); + + dm_bitset_destroy(minors); +out: + dmeventd_lvm2_lock(); +} + +void process_event(struct dm_task *dmt, + enum dm_event_mask event __attribute__((unused)), + void **private) +{ + const char *device = dm_task_get_name(dmt); + int percent; + struct dso_state *state = *private; + struct dm_status_thin_pool *tps = NULL; + void *next = NULL; + uint64_t start, length; + char *target_type = NULL; + char *params; + +#if 0 + /* No longer monitoring, waiting for remove */ + if (!state->meta_percent_check && !state->data_percent_check) + return; +#endif + dmeventd_lvm2_lock(); + + dm_get_next_target(dmt, next, &start, &length, &target_type, ¶ms); + + if (!target_type || (strcmp(target_type, "thin-pool") != 0)) { + syslog(LOG_ERR, "Invalid target type.\n"); + goto out; + } + + if (!dm_get_status_thin_pool(state->mem, params, &tps)) { + syslog(LOG_ERR, "Failed to parse status.\n"); + _umount(dmt, device); + goto out; + } + +#if THIN_DEBUG + syslog(LOG_INFO, "%p: Got status %" PRIu64 " / %" PRIu64 + " %" PRIu64 " / %" PRIu64 ".\n", state, + tps->used_metadata_blocks, tps->total_metadata_blocks, + tps->used_data_blocks, tps->total_data_blocks); +#endif + + /* Thin pool size had changed. Clear the threshold. */ + if (state->known_metadata_size != tps->total_metadata_blocks) { + state->metadata_percent_check = CHECK_MINIMUM; + state->known_metadata_size = tps->total_metadata_blocks; + } + + if (state->known_data_size != tps->total_data_blocks) { + state->data_percent_check = CHECK_MINIMUM; + state->known_data_size = tps->total_data_blocks; + } + + percent = 100 * tps->used_metadata_blocks / tps->total_metadata_blocks; + if (percent >= state->metadata_percent_check) { + /* + * Usage has raised more than CHECK_STEP since the last + * time. Run actions. + */ + state->metadata_percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP; + + /* FIXME: extension of metadata needs to be written! */ + if (percent >= WARNING_THRESH) /* Print a warning to syslog. */ + syslog(LOG_WARNING, "Thin metadata %s is now %i%% full.\n", + device, percent); + /* Try to extend the metadata, in accord with user-set policies */ + if (!_extend(state)) { + syslog(LOG_ERR, "Failed to extend thin metadata %s.\n", + device); + _umount(dmt, device); + } + /* FIXME: hmm READ-ONLY switch should happen in error path */ + } + + percent = 100 * tps->used_data_blocks / tps->total_data_blocks; + if (percent >= state->data_percent_check) { + /* + * Usage has raised more than CHECK_STEP since + * the last time. Run actions. + */ + state->data_percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP; + + if (percent >= WARNING_THRESH) /* Print a warning to syslog. */ + syslog(LOG_WARNING, "Thin %s is now %i%% full.\n", device, percent); + /* Try to extend the thin data, in accord with user-set policies */ + if (!_extend(state)) { + syslog(LOG_ERR, "Failed to extend thin %s.\n", device); + state->data_percent_check = 0; + _umount(dmt, device); + } + /* FIXME: hmm READ-ONLY switch should happen in error path */ + } +out: + if (tps) + dm_pool_free(state->mem, tps); + + dmeventd_lvm2_unlock(); +} + +int register_device(const char *device, + const char *uuid __attribute__((unused)), + int major __attribute__((unused)), + int minor __attribute__((unused)), + void **private) +{ + struct dm_pool *statemem = NULL; + struct dso_state *state; + + if (!dmeventd_lvm2_init()) + goto bad; + + if (!(statemem = dm_pool_create("thin_pool_state", 2048)) || + !(state = dm_pool_zalloc(statemem, sizeof(*state))) || + !dmeventd_lvm2_command(statemem, state->cmd_str, + sizeof(state->cmd_str), + "lvextend --use-policies", + device)) { + if (statemem) + dm_pool_destroy(statemem); + dmeventd_lvm2_exit(); + goto bad; + } + + state->mem = statemem; + state->metadata_percent_check = CHECK_MINIMUM; + state->data_percent_check = CHECK_MINIMUM; + *private = state; + + syslog(LOG_INFO, "Monitoring thin %s.\n", device); + + return 1; +bad: + syslog(LOG_ERR, "Failed to monitor thin %s.\n", device); + + return 0; +} + +int unregister_device(const char *device, + const char *uuid __attribute__((unused)), + int major __attribute__((unused)), + int minor __attribute__((unused)), + void **private) +{ + struct dso_state *state = *private; + + syslog(LOG_INFO, "No longer monitoring thin %s.\n", device); + dm_pool_destroy(state->mem); + dmeventd_lvm2_exit(); + + return 1; +} diff --git a/daemons/lvmetad/Makefile.in b/daemons/lvmetad/Makefile.in new file mode 100644 index 0000000..35aa4ab --- /dev/null +++ b/daemons/lvmetad/Makefile.in @@ -0,0 +1,59 @@ +# +# Copyright (C) 2011-2012 Red Hat, Inc. +# +# This file is part of LVM2. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU Lesser General Public License v.2.1. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +top_builddir = @top_builddir@ + +SOURCES = lvmetad-core.c +SOURCES2 = testclient.c + +TARGETS = lvmetad lvmetad-testclient + +.PHONY: install_lvmetad + +CFLOW_LIST = $(SOURCES) +CFLOW_LIST_TARGET = $(LIB_NAME).cflow +CFLOW_TARGET = lvmetad + +include $(top_builddir)/make.tmpl + +INCLUDES += -I$(top_srcdir)/libdaemon/server +LVMLIBS = -ldaemonserver $(LVMINTERNAL_LIBS) -ldevmapper + +LIBS += $(PTHREAD_LIBS) + +LDFLAGS += -L$(top_builddir)/libdaemon/server +CLDFLAGS += -L$(top_builddir)/libdaemon/server + +lvmetad: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \ + $(top_builddir)/libdaemon/server/libdaemonserver.a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) \ + $(DL_LIBS) $(LVMLIBS) $(LIBS) -rdynamic + +# TODO: No idea. No idea how to test either. +#ifneq ("$(CFLOW_CMD)", "") +#CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES)) +#-include $(top_builddir)/libdm/libdevmapper.cflow +#-include $(top_builddir)/lib/liblvm-internal.cflow +#-include $(top_builddir)/lib/liblvm2cmd.cflow +#-include $(top_builddir)/daemons/dmeventd/$(LIB_NAME).cflow +#-include $(top_builddir)/daemons/dmeventd/plugins/mirror/$(LIB_NAME)-lvm2mirror.cflow +#endif + +install_lvmetad: lvmetad + $(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F) + +install_lvm2: install_lvmetad + +install: install_lvm2 diff --git a/daemons/lvmetad/lvmetad-client.h b/daemons/lvmetad/lvmetad-client.h new file mode 100644 index 0000000..fe8eedc --- /dev/null +++ b/daemons/lvmetad/lvmetad-client.h @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2011-2012 Red Hat, Inc. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LVM_LVMETAD_CLIENT_H +#define _LVM_LVMETAD_CLIENT_H + +#include "daemon-client.h" + +struct volume_group; + +/* Different types of replies we may get from lvmetad. */ + +typedef struct { + daemon_reply r; + const char **uuids; /* NULL terminated array */ +} lvmetad_uuidlist; + +typedef struct { + daemon_reply r; + struct dm_config_tree *cft; +} lvmetad_vg; + +/* Get a list of VG UUIDs that match a given VG name. */ +lvmetad_uuidlist lvmetad_lookup_vgname(daemon_handle h, const char *name); + +/* Get the metadata of a single VG, identified by UUID. */ +lvmetad_vg lvmetad_get_vg(daemon_handle h, const char *uuid); + +/* + * Add and remove PVs on demand. Udev-driven systems will use this interface + * instead of scanning. + */ +daemon_reply lvmetad_add_pv(daemon_handle h, const char *pv_uuid, const char *mda_content); +daemon_reply lvmetad_remove_pv(daemon_handle h, const char *pv_uuid); + +/* Trigger a full disk scan, throwing away all caches. XXX do we eventually want + * this? Probably not yet, anyway. + * daemon_reply lvmetad_rescan(daemon_handle h); + */ + +/* + * Update the version of metadata of a volume group. The VG has to be locked for + * writing for this, and the VG metadata here has to match whatever has been + * written to the disk (under this lock). This initially avoids the requirement + * for lvmetad to write to disk (in later revisions, lvmetad_supersede_vg may + * also do the writing, or we probably add another function to do that). + */ +daemon_reply lvmetad_supersede_vg(daemon_handle h, struct volume_group *vg); + +/* Wrappers to open/close connection */ + +static inline daemon_handle lvmetad_open(const char *socket) +{ + daemon_info lvmetad_info = { + .path = "lvmetad", + .socket = socket ?: DEFAULT_RUN_DIR "/lvmetad.socket", + .protocol = "lvmetad", + .protocol_version = 1, + .autostart = 0 + }; + + return daemon_open(lvmetad_info); +} + +static inline void lvmetad_close(daemon_handle h) +{ + return daemon_close(h); +} + +#endif diff --git a/daemons/lvmetad/lvmetad-core.c b/daemons/lvmetad/lvmetad-core.c new file mode 100644 index 0000000..0a1c884 --- /dev/null +++ b/daemons/lvmetad/lvmetad-core.c @@ -0,0 +1,1204 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define _XOPEN_SOURCE 500 /* pthread */ + +#include "configure.h" +#include "daemon-io.h" +#include "config-util.h" +#include "daemon-server.h" +#include "daemon-log.h" +#include "lvm-version.h" + +#include <assert.h> +#include <pthread.h> +#include <stdint.h> +#include <unistd.h> + +typedef struct { + log_state *log; /* convenience */ + const char *log_config; + + struct dm_hash_table *pvid_to_pvmeta; + struct dm_hash_table *device_to_pvid; /* shares locks with above */ + + struct dm_hash_table *vgid_to_metadata; + struct dm_hash_table *vgid_to_vgname; + struct dm_hash_table *vgname_to_vgid; + struct dm_hash_table *pvid_to_vgid; + struct { + struct dm_hash_table *vg; + pthread_mutex_t vg_lock_map; + pthread_mutex_t pvid_to_pvmeta; + pthread_mutex_t vgid_to_metadata; + pthread_mutex_t pvid_to_vgid; + } lock; + char token[128]; + pthread_mutex_t token_lock; +} lvmetad_state; + +static void destroy_metadata_hashes(lvmetad_state *s) +{ + struct dm_hash_node *n = NULL; + + n = dm_hash_get_first(s->vgid_to_metadata); + while (n) { + dm_config_destroy(dm_hash_get_data(s->vgid_to_metadata, n)); + n = dm_hash_get_next(s->vgid_to_metadata, n); + } + + n = dm_hash_get_first(s->pvid_to_pvmeta); + while (n) { + dm_config_destroy(dm_hash_get_data(s->pvid_to_pvmeta, n)); + n = dm_hash_get_next(s->pvid_to_pvmeta, n); + } + dm_hash_destroy(s->pvid_to_pvmeta); + dm_hash_destroy(s->vgid_to_metadata); + dm_hash_destroy(s->vgid_to_vgname); + dm_hash_destroy(s->vgname_to_vgid); + + n = dm_hash_get_first(s->device_to_pvid); + while (n) { + dm_free(dm_hash_get_data(s->device_to_pvid, n)); + n = dm_hash_get_next(s->device_to_pvid, n); + } + + dm_hash_destroy(s->device_to_pvid); + dm_hash_destroy(s->pvid_to_vgid); +} + +static void create_metadata_hashes(lvmetad_state *s) +{ + s->pvid_to_pvmeta = dm_hash_create(32); + s->device_to_pvid = dm_hash_create(32); + s->vgid_to_metadata = dm_hash_create(32); + s->vgid_to_vgname = dm_hash_create(32); + s->pvid_to_vgid = dm_hash_create(32); + s->vgname_to_vgid = dm_hash_create(32); +} + +static void lock_pvid_to_pvmeta(lvmetad_state *s) { + pthread_mutex_lock(&s->lock.pvid_to_pvmeta); } +static void unlock_pvid_to_pvmeta(lvmetad_state *s) { + pthread_mutex_unlock(&s->lock.pvid_to_pvmeta); } + +static void lock_vgid_to_metadata(lvmetad_state *s) { + pthread_mutex_lock(&s->lock.vgid_to_metadata); } +static void unlock_vgid_to_metadata(lvmetad_state *s) { + pthread_mutex_unlock(&s->lock.vgid_to_metadata); } + +static void lock_pvid_to_vgid(lvmetad_state *s) { + pthread_mutex_lock(&s->lock.pvid_to_vgid); } +static void unlock_pvid_to_vgid(lvmetad_state *s) { + pthread_mutex_unlock(&s->lock.pvid_to_vgid); } + +static response reply_fail(const char *reason) +{ + return daemon_reply_simple("failed", "reason = %s", reason, NULL); +} + +static response reply_unknown(const char *reason) +{ + return daemon_reply_simple("unknown", "reason = %s", reason, NULL); +} + +/* + * TODO: It may be beneficial to clean up the vg lock hash from time to time, + * since if we have many "rogue" requests for nonexistent things, we will keep + * allocating memory that we never release. Not good. + */ +static struct dm_config_tree *lock_vg(lvmetad_state *s, const char *id) { + pthread_mutex_t *vg; + struct dm_config_tree *cft; + + pthread_mutex_lock(&s->lock.vg_lock_map); + vg = dm_hash_lookup(s->lock.vg, id); + if (!vg) { + pthread_mutexattr_t rec; + pthread_mutexattr_init(&rec); + pthread_mutexattr_settype(&rec, PTHREAD_MUTEX_RECURSIVE_NP); + if (!(vg = malloc(sizeof(pthread_mutex_t)))) + return NULL; + pthread_mutex_init(vg, &rec); + if (!dm_hash_insert(s->lock.vg, id, vg)) { + free(vg); + return NULL; + } + } + /* We never remove items from s->lock.vg => the pointer remains valid. */ + pthread_mutex_unlock(&s->lock.vg_lock_map); + + DEBUGLOG(s, "locking VG %s", id); + pthread_mutex_lock(vg); + + /* Protect against structure changes of the vgid_to_metadata hash. */ + lock_vgid_to_metadata(s); + cft = dm_hash_lookup(s->vgid_to_metadata, id); + unlock_vgid_to_metadata(s); + return cft; +} + +static void unlock_vg(lvmetad_state *s, const char *id) { + pthread_mutex_t *vg; + + DEBUGLOG(s, "unlocking VG %s", id); + /* Protect the s->lock.vg structure from concurrent access. */ + pthread_mutex_lock(&s->lock.vg_lock_map); + if ((vg = dm_hash_lookup(s->lock.vg, id))) + pthread_mutex_unlock(vg); + pthread_mutex_unlock(&s->lock.vg_lock_map); +} + +static struct dm_config_node *pvs(struct dm_config_node *vg) +{ + struct dm_config_node *pv = dm_config_find_node(vg, "metadata/physical_volumes"); + if (pv) + pv = pv->child; + return pv; +} + +static void filter_metadata(struct dm_config_node *vg) { + struct dm_config_node *pv = pvs(vg); + while (pv) { + struct dm_config_node *item = pv->child; + while (item) { + /* Remove the advisory device nodes. */ + if (item->sib && !strcmp(item->sib->key, "device")) + item->sib = item->sib->sib; + item = item->sib; + } + pv = pv->sib; + } + vg->sib = NULL; /* Drop any trailing garbage. */ +} + +static void merge_pvmeta(struct dm_config_node *pv, struct dm_config_node *pvmeta) +{ + struct dm_config_node *tmp; + + if (!pvmeta) + return; + + tmp = pvmeta; + while (tmp->sib) { + /* drop the redundant ID and dev_size nodes */ + if (!strcmp(tmp->sib->key, "id") || !strcmp(tmp->sib->key, "dev_size")) + tmp->sib = tmp->sib->sib; + if (!tmp->sib) break; + tmp = tmp->sib; + tmp->parent = pv; + } + tmp->sib = pv->child; + pv->child = pvmeta; + pvmeta->parent = pv; +} + +/* Either the "big" vgs lock, or a per-vg lock needs to be held before entering + * this function. */ +static int update_pv_status(lvmetad_state *s, + struct dm_config_tree *cft, + struct dm_config_node *vg, int act) +{ + struct dm_config_node *pv; + int complete = 1; + const char *uuid; + struct dm_config_tree *pvmeta; + + lock_pvid_to_pvmeta(s); + + for (pv = pvs(vg); pv; pv = pv->sib) { + if (!(uuid = dm_config_find_str(pv->child, "id", NULL))) + continue; + + pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, uuid); + if (act) { + set_flag(cft, pv, "status", "MISSING", !pvmeta); + if (pvmeta) { + struct dm_config_node *pvmeta_cn = + dm_config_clone_node(cft, pvmeta->root->child, 1); + merge_pvmeta(pv, pvmeta_cn); + } + } + if (!pvmeta) { + complete = 0; + if (!act) { /* optimisation */ + unlock_pvid_to_pvmeta(s); + return complete; + } + } + } + unlock_pvid_to_pvmeta(s); + + return complete; +} + +static struct dm_config_node *make_pv_node(lvmetad_state *s, const char *pvid, + struct dm_config_tree *cft, + struct dm_config_node *parent, + struct dm_config_node *pre_sib) +{ + struct dm_config_tree *pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid); + const char *vgid = dm_hash_lookup(s->pvid_to_vgid, pvid), *vgname = NULL; + struct dm_config_node *pv; + struct dm_config_node *cn = NULL; + + if (!pvmeta) + return NULL; + + if (vgid) { + lock_vgid_to_metadata(s); // XXX + vgname = dm_hash_lookup(s->vgid_to_vgname, vgid); + unlock_vgid_to_metadata(s); + } + + /* Nick the pvmeta config tree. */ + if (!(pv = dm_config_clone_node(cft, pvmeta->root, 0))) + return 0; + + if (pre_sib) + pre_sib->sib = pv; + if (parent && !parent->child) + parent->child = pv; + pv->parent = parent; + pv->key = pvid; + + /* Add the "variable" bits to it. */ + + if (vgid && strcmp(vgid, "#orphan")) + cn = make_text_node(cft, "vgid", vgid, pv, cn); + if (vgname) + cn = make_text_node(cft, "vgname", vgname, pv, cn); + + return pv; +} + +static response pv_list(lvmetad_state *s, request r) +{ + struct dm_config_node *cn = NULL, *cn_pvs; + struct dm_hash_node *n; + const char *id; + response res; + + buffer_init( &res.buffer ); + + if (!(res.cft = dm_config_create())) + return res; /* FIXME error reporting */ + + /* The response field */ + res.cft->root = make_text_node(res.cft, "response", "OK", NULL, NULL); + cn_pvs = make_config_node(res.cft, "physical_volumes", NULL, res.cft->root); + + lock_pvid_to_pvmeta(s); + + for (n = dm_hash_get_first(s->pvid_to_pvmeta); n; + n = dm_hash_get_next(s->pvid_to_pvmeta, n)) { + id = dm_hash_get_key(s->pvid_to_pvmeta, n); + cn = make_pv_node(s, id, res.cft, cn_pvs, cn); + } + + unlock_pvid_to_pvmeta(s); + + return res; +} + +static response pv_lookup(lvmetad_state *s, request r) +{ + const char *pvid = daemon_request_str(r, "uuid", NULL); + int64_t devt = daemon_request_int(r, "device", 0); + response res; + struct dm_config_node *pv; + + buffer_init( &res.buffer ); + + if (!pvid && !devt) + return reply_fail("need PVID or device"); + + if (!(res.cft = dm_config_create())) + return reply_fail("out of memory"); + + if (!(res.cft->root = make_text_node(res.cft, "response", "OK", NULL, NULL))) + return reply_fail("out of memory"); + + lock_pvid_to_pvmeta(s); + if (!pvid && devt) + pvid = dm_hash_lookup_binary(s->device_to_pvid, &devt, sizeof(devt)); + + if (!pvid) { + WARN(s, "pv_lookup: could not find device %" PRIu64, devt); + unlock_pvid_to_pvmeta(s); + dm_config_destroy(res.cft); + return reply_unknown("device not found"); + } + + pv = make_pv_node(s, pvid, res.cft, NULL, res.cft->root); + if (!pv) { + unlock_pvid_to_pvmeta(s); + dm_config_destroy(res.cft); + return reply_unknown("PV not found"); + } + + pv->key = "physical_volume"; + unlock_pvid_to_pvmeta(s); + + return res; +} + +static response vg_list(lvmetad_state *s, request r) +{ + struct dm_config_node *cn, *cn_vgs, *cn_last = NULL; + struct dm_hash_node *n; + const char *id; + const char *name; + response res; + + buffer_init( &res.buffer ); + + if (!(res.cft = dm_config_create())) + goto bad; /* FIXME: better error reporting */ + + /* The response field */ + res.cft->root = cn = dm_config_create_node(res.cft, "response"); + if (!cn) + goto bad; /* FIXME */ + cn->parent = res.cft->root; + if (!(cn->v = dm_config_create_value(res.cft))) + goto bad; /* FIXME */ + + cn->v->type = DM_CFG_STRING; + cn->v->v.str = "OK"; + + cn_vgs = cn = cn->sib = dm_config_create_node(res.cft, "volume_groups"); + if (!cn_vgs) + goto bad; /* FIXME */ + + cn->parent = res.cft->root; + cn->v = NULL; + cn->child = NULL; + + lock_vgid_to_metadata(s); + + n = dm_hash_get_first(s->vgid_to_vgname); + while (n) { + id = dm_hash_get_key(s->vgid_to_vgname, n), + name = dm_hash_get_data(s->vgid_to_vgname, n); + + if (!(cn = dm_config_create_node(res.cft, id))) + goto bad; /* FIXME */ + + if (cn_last) + cn_last->sib = cn; + + cn->parent = cn_vgs; + cn->sib = NULL; + cn->v = NULL; + + if (!(cn->child = dm_config_create_node(res.cft, "name"))) + goto bad; /* FIXME */ + + cn->child->parent = cn; + cn->child->sib = 0; + if (!(cn->child->v = dm_config_create_value(res.cft))) + goto bad; /* FIXME */ + + cn->child->v->type = DM_CFG_STRING; + cn->child->v->v.str = name; + + if (!cn_vgs->child) + cn_vgs->child = cn; + cn_last = cn; + + n = dm_hash_get_next(s->vgid_to_vgname, n); + } + + unlock_vgid_to_metadata(s); +bad: + return res; +} + +static response vg_lookup(lvmetad_state *s, request r) +{ + struct dm_config_tree *cft; + struct dm_config_node *metadata, *n; + response res; + + const char *uuid = daemon_request_str(r, "uuid", NULL); + const char *name = daemon_request_str(r, "name", NULL); + + buffer_init( &res.buffer ); + + DEBUGLOG(s, "vg_lookup: uuid = %s, name = %s", uuid, name); + + if (!uuid || !name) { + lock_vgid_to_metadata(s); + if (name && !uuid) + uuid = dm_hash_lookup(s->vgname_to_vgid, name); + if (uuid && !name) + name = dm_hash_lookup(s->vgid_to_vgname, uuid); + unlock_vgid_to_metadata(s); + } + + DEBUGLOG(s, "vg_lookup: updated uuid = %s, name = %s", uuid, name); + + if (!uuid) + return reply_unknown("VG not found"); + + cft = lock_vg(s, uuid); + if (!cft || !cft->root) { + unlock_vg(s, uuid); + return reply_unknown("UUID not found"); + } + + metadata = cft->root; + if (!(res.cft = dm_config_create())) + goto bad; + + /* The response field */ + if (!(res.cft->root = n = dm_config_create_node(res.cft, "response"))) + goto bad; + + if (!(n->v = dm_config_create_value(cft))) + goto bad; + + n->parent = res.cft->root; + n->v->type = DM_CFG_STRING; + n->v->v.str = "OK"; + + if (!(n = n->sib = dm_config_create_node(res.cft, "name"))) + goto bad; + + if (!(n->v = dm_config_create_value(res.cft))) + goto bad; + + n->parent = res.cft->root; + n->v->type = DM_CFG_STRING; + n->v->v.str = name; + + /* The metadata section */ + if (!(n = n->sib = dm_config_clone_node(res.cft, metadata, 1))) + goto bad; + n->parent = res.cft->root; + res.error = 0; + unlock_vg(s, uuid); + + update_pv_status(s, res.cft, n, 1); /* FIXME report errors */ + + return res; +bad: + unlock_vg(s, uuid); + return reply_fail("out of memory"); +} + +static int compare_value(struct dm_config_value *a, struct dm_config_value *b) +{ + int r = 0; + + if (a->type > b->type) + return 1; + if (a->type < b->type) + return -1; + + switch (a->type) { + case DM_CFG_STRING: r = strcmp(a->v.str, b->v.str); break; + case DM_CFG_FLOAT: r = (a->v.f == b->v.f) ? 0 : (a->v.f > b->v.f) ? 1 : -1; break; + case DM_CFG_INT: r = (a->v.i == b->v.i) ? 0 : (a->v.i > b->v.i) ? 1 : -1; break; + case DM_CFG_EMPTY_ARRAY: return 0; + } + + if (r == 0 && a->next && b->next) + r = compare_value(a->next, b->next); + return r; +} + +static int compare_config(struct dm_config_node *a, struct dm_config_node *b) +{ + int result = 0; + if (a->v && b->v) + result = compare_value(a->v, b->v); + if (a->v && !b->v) + result = 1; + if (!a->v && b->v) + result = -1; + if (a->child && b->child) + result = compare_config(a->child, b->child); + + if (result) { + // DEBUGLOG("config inequality at %s / %s", a->key, b->key); + return result; + } + + if (a->sib && b->sib) + result = compare_config(a->sib, b->sib); + if (a->sib && !b->sib) + result = 1; + if (!a->sib && b->sib) + result = -1; + + return result; +} + +static int vg_remove_if_missing(lvmetad_state *s, const char *vgid); + +/* You need to be holding the pvid_to_vgid lock already to call this. */ +static int update_pvid_to_vgid(lvmetad_state *s, struct dm_config_tree *vg, + const char *vgid, int nuke_empty) +{ + struct dm_config_node *pv; + struct dm_hash_table *to_check; + struct dm_hash_node *n; + const char *pvid; + const char *vgid_old; + const char *check_vgid; + int r = 0; + + if (!vgid) + return 0; + + if (!(to_check = dm_hash_create(32))) + return 0; + + for (pv = pvs(vg->root); pv; pv = pv->sib) { + if (!(pvid = dm_config_find_str(pv->child, "id", NULL))) + continue; + + if (nuke_empty && + (vgid_old = dm_hash_lookup(s->pvid_to_vgid, pvid)) && + !dm_hash_insert(to_check, vgid_old, (void*) 1)) + goto out; + + if (!dm_hash_insert(s->pvid_to_vgid, pvid, (void*) vgid)) + goto out; + + DEBUGLOG(s, "moving PV %s to VG %s", pvid, vgid); + } + + for (n = dm_hash_get_first(to_check); n; + n = dm_hash_get_next(to_check, n)) { + check_vgid = dm_hash_get_key(to_check, n); + lock_vg(s, check_vgid); + vg_remove_if_missing(s, check_vgid); + unlock_vg(s, check_vgid); + } + + r = 1; + out: + dm_hash_destroy(to_check); + + return r; +} + +/* A pvid map lock needs to be held if update_pvids = 1. */ +static int remove_metadata(lvmetad_state *s, const char *vgid, int update_pvids) +{ + struct dm_config_tree *old; + const char *oldname; + lock_vgid_to_metadata(s); + old = dm_hash_lookup(s->vgid_to_metadata, vgid); + oldname = dm_hash_lookup(s->vgid_to_vgname, vgid); + unlock_vgid_to_metadata(s); + + if (!old) + return 0; + assert(oldname); + + if (update_pvids) + /* FIXME: What should happen when update fails */ + update_pvid_to_vgid(s, old, "#orphan", 0); + /* need to update what we have since we found a newer version */ + dm_hash_remove(s->vgid_to_metadata, vgid); + dm_hash_remove(s->vgid_to_vgname, vgid); + dm_hash_remove(s->vgname_to_vgid, oldname); + dm_config_destroy(old); + return 1; +} + +/* The VG must be locked. */ +static int vg_remove_if_missing(lvmetad_state *s, const char *vgid) +{ + struct dm_config_tree *vg; + struct dm_config_node *pv; + const char *vgid_check; + const char *pvid; + int missing = 1; + + if (!vgid) + return 0; + + if (!(vg = dm_hash_lookup(s->vgid_to_metadata, vgid))) + return 1; + + lock_pvid_to_pvmeta(s); + for (pv = pvs(vg->root); pv; pv = pv->sib) { + if (!(pvid = dm_config_find_str(pv->child, "id", NULL))) + continue; + + if ((vgid_check = dm_hash_lookup(s->pvid_to_vgid, pvid)) && + dm_hash_lookup(s->pvid_to_pvmeta, pvid) && + !strcmp(vgid, vgid_check)) + missing = 0; /* at least one PV is around */ + } + + if (missing) { + DEBUGLOG(s, "removing empty VG %s", vgid); + remove_metadata(s, vgid, 0); + } + + unlock_pvid_to_pvmeta(s); + + return 1; +} + +/* No locks need to be held. The pointers are never used outside of the scope of + * this function, so they can be safely destroyed after update_metadata returns + * (anything that might have been retained is copied). */ +static int update_metadata(lvmetad_state *s, const char *name, const char *_vgid, + struct dm_config_node *metadata, int64_t *oldseq) +{ + struct dm_config_tree *cft = NULL; + struct dm_config_tree *old; + int retval = 0; + int seq; + int haveseq = -1; + const char *oldname = NULL; + const char *vgid; + char *cfgname; + + lock_vgid_to_metadata(s); + old = dm_hash_lookup(s->vgid_to_metadata, _vgid); + lock_vg(s, _vgid); + unlock_vgid_to_metadata(s); + + seq = dm_config_find_int(metadata, "metadata/seqno", -1); + + if (old) { + haveseq = dm_config_find_int(old->root, "metadata/seqno", -1); + oldname = dm_hash_lookup(s->vgid_to_vgname, _vgid); + assert(oldname); + } + + if (seq < 0) + goto out; + + filter_metadata(metadata); /* sanitize */ + + if (oldseq) { + if (old) + *oldseq = haveseq; + else + *oldseq = seq; + } + + if (seq == haveseq) { + retval = 1; + if (compare_config(metadata, old->root)) + retval = 0; + DEBUGLOG(s, "Not updating metadata for %s at %d (%s)", _vgid, haveseq, + retval ? "ok" : "MISMATCH"); + if (!retval) { + DEBUGLOG_cft(s, "OLD: ", old->root); + DEBUGLOG_cft(s, "NEW: ", metadata); + } + goto out; + } + + if (seq < haveseq) { + DEBUGLOG(s, "Refusing to update metadata for %s (at %d) to %d", _vgid, haveseq, seq); + /* TODO: notify the client that their metadata is out of date? */ + retval = 1; + goto out; + } + + if (!(cft = dm_config_create()) || + !(cft->root = dm_config_clone_node(cft, metadata, 0))) { + ERROR(s, "Out of memory"); + goto out; + } + + vgid = dm_config_find_str(cft->root, "metadata/id", NULL); + + if (!vgid || !name) { + DEBUGLOG(s, "Name '%s' or uuid '%s' missing!", name, vgid); + goto out; + } + + lock_pvid_to_vgid(s); + + if (haveseq >= 0 && haveseq < seq) { + INFO(s, "Updating metadata for %s at %d to %d", _vgid, haveseq, seq); + /* temporarily orphan all of our PVs */ + remove_metadata(s, vgid, 1); + } + + lock_vgid_to_metadata(s); + DEBUGLOG(s, "Mapping %s to %s", vgid, name); + + retval = ((cfgname = dm_pool_strdup(dm_config_memory(cft), name)) && + dm_hash_insert(s->vgid_to_metadata, vgid, cft) && + dm_hash_insert(s->vgid_to_vgname, vgid, cfgname) && + dm_hash_insert(s->vgname_to_vgid, name, (void*) vgid)) ? 1 : 0; + unlock_vgid_to_metadata(s); + + if (retval) + /* FIXME: What should happen when update fails */ + retval = update_pvid_to_vgid(s, cft, vgid, 1); + + unlock_pvid_to_vgid(s); +out: + if (!retval && cft) + dm_config_destroy(cft); + unlock_vg(s, _vgid); + return retval; +} + +static response pv_gone(lvmetad_state *s, request r) +{ + const char *pvid = daemon_request_str(r, "uuid", NULL); + int64_t device = daemon_request_int(r, "device", 0); + struct dm_config_tree *pvmeta; + char *pvid_old; + + DEBUGLOG(s, "pv_gone: %s / %" PRIu64, pvid, device); + + lock_pvid_to_pvmeta(s); + if (!pvid && device > 0) + pvid = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device)); + if (!pvid) { + unlock_pvid_to_pvmeta(s); + return reply_unknown("device not in cache"); + } + + DEBUGLOG(s, "pv_gone (updated): %s / %" PRIu64, pvid, device); + + pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid); + pvid_old = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device)); + dm_hash_remove_binary(s->device_to_pvid, &device, sizeof(device)); + dm_hash_remove(s->pvid_to_pvmeta, pvid); + vg_remove_if_missing(s, dm_hash_lookup(s->pvid_to_vgid, pvid)); + unlock_pvid_to_pvmeta(s); + + if (pvid_old) + dm_free(pvid_old); + + if (pvmeta) { + dm_config_destroy(pvmeta); + return daemon_reply_simple("OK", NULL); + } else + return reply_unknown("PVID does not exist"); +} + +static response pv_clear_all(lvmetad_state *s, request r) +{ + DEBUGLOG(s, "pv_clear_all"); + + lock_pvid_to_pvmeta(s); + lock_vgid_to_metadata(s); + lock_pvid_to_vgid(s); + + destroy_metadata_hashes(s); + create_metadata_hashes(s); + + unlock_pvid_to_vgid(s); + unlock_vgid_to_metadata(s); + unlock_pvid_to_pvmeta(s); + + return daemon_reply_simple("OK", NULL); +} + +static response pv_found(lvmetad_state *s, request r) +{ + struct dm_config_node *metadata = dm_config_find_node(r.cft->root, "metadata"); + const char *pvid = daemon_request_str(r, "pvmeta/id", NULL); + const char *vgname = daemon_request_str(r, "vgname", NULL); + const char *vgid = daemon_request_str(r, "metadata/id", NULL); + struct dm_config_node *pvmeta = dm_config_find_node(r.cft->root, "pvmeta"); + uint64_t device; + struct dm_config_tree *cft, *pvmeta_old_dev = NULL, *pvmeta_old_pvid = NULL; + char *old; + const char *pvid_dup; + int complete = 0, orphan = 0; + int64_t seqno = -1, seqno_old = -1; + + if (!pvid) + return reply_fail("need PV UUID"); + if (!pvmeta) + return reply_fail("need PV metadata"); + + if (!dm_config_get_uint64(pvmeta, "pvmeta/device", &device)) + return reply_fail("need PV device number"); + + lock_pvid_to_pvmeta(s); + + if ((old = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device)))) { + pvmeta_old_dev = dm_hash_lookup(s->pvid_to_pvmeta, old); + dm_hash_remove(s->pvid_to_pvmeta, old); + } + pvmeta_old_pvid = dm_hash_lookup(s->pvid_to_pvmeta, pvid); + + DEBUGLOG(s, "pv_found %s, vgid = %s, device = %" PRIu64 ", old = %s", pvid, vgid, device, old); + + dm_free(old); + + if (!(cft = dm_config_create()) || + !(cft->root = dm_config_clone_node(cft, pvmeta, 0))) { + unlock_pvid_to_pvmeta(s); + return reply_fail("out of memory"); + } + + pvid_dup = dm_strdup(pvid); + if (!dm_hash_insert(s->pvid_to_pvmeta, pvid, cft) || + !dm_hash_insert_binary(s->device_to_pvid, &device, sizeof(device), (void*)pvid_dup)) { + unlock_pvid_to_pvmeta(s); + return reply_fail("out of memory"); + } + if (pvmeta_old_pvid) + dm_config_destroy(pvmeta_old_pvid); + if (pvmeta_old_dev && pvmeta_old_dev != pvmeta_old_pvid) + dm_config_destroy(pvmeta_old_dev); + + unlock_pvid_to_pvmeta(s); + + if (metadata) { + if (!vgid) + return reply_fail("need VG UUID"); + DEBUGLOG(s, "obtained vgid = %s, vgname = %s", vgid, vgname); + if (!vgname) + return reply_fail("need VG name"); + if (daemon_request_int(r, "metadata/seqno", -1) < 0) + return reply_fail("need VG seqno"); + + if (!update_metadata(s, vgname, vgid, metadata, &seqno_old)) + return reply_fail("metadata update failed"); + } else { + lock_pvid_to_vgid(s); + vgid = dm_hash_lookup(s->pvid_to_vgid, pvid); + unlock_pvid_to_vgid(s); + } + + if (vgid) { + if ((cft = lock_vg(s, vgid))) { + complete = update_pv_status(s, cft, cft->root, 0); + seqno = dm_config_find_int(cft->root, "metadata/seqno", -1); + } else if (!strcmp(vgid, "#orphan")) + orphan = 1; + else { + unlock_vg(s, vgid); + return reply_fail("non-orphan VG without metadata encountered"); + } + unlock_vg(s, vgid); + } + + return daemon_reply_simple("OK", + "status = %s", orphan ? "orphan" : + (complete ? "complete" : "partial"), + "vgid = %s", vgid ? vgid : "#orphan", + "seqno_before = %"PRId64, seqno_old, + "seqno_after = %"PRId64, seqno, + NULL); +} + +static response vg_update(lvmetad_state *s, request r) +{ + struct dm_config_node *metadata = dm_config_find_node(r.cft->root, "metadata"); + const char *vgid = daemon_request_str(r, "metadata/id", NULL); + const char *vgname = daemon_request_str(r, "vgname", NULL); + if (metadata) { + if (!vgid) + return reply_fail("need VG UUID"); + if (!vgname) + return reply_fail("need VG name"); + if (daemon_request_int(r, "metadata/seqno", -1) < 0) + return reply_fail("need VG seqno"); + + /* TODO defer metadata update here; add a separate vg_commit + * call; if client does not commit, die */ + if (!update_metadata(s, vgname, vgid, metadata, NULL)) + return reply_fail("metadata update failed"); + } + return daemon_reply_simple("OK", NULL); +} + +static response vg_remove(lvmetad_state *s, request r) +{ + const char *vgid = daemon_request_str(r, "uuid", NULL); + + if (!vgid) + return reply_fail("need VG UUID"); + + DEBUGLOG(s, "vg_remove: %s", vgid); + + lock_pvid_to_vgid(s); + remove_metadata(s, vgid, 1); + unlock_pvid_to_vgid(s); + + return daemon_reply_simple("OK", NULL); +} + +static void _dump_cft(struct buffer *buf, struct dm_hash_table *ht, const char *key_addr) +{ + struct dm_hash_node *n = dm_hash_get_first(ht); + while (n) { + struct dm_config_tree *cft = dm_hash_get_data(ht, n); + const char *key_backup = cft->root->key; + cft->root->key = dm_config_find_str(cft->root, key_addr, "unknown"); + dm_config_write_node(cft->root, buffer_line, buf); + cft->root->key = key_backup; + n = dm_hash_get_next(ht, n); + } +} + +static void _dump_pairs(struct buffer *buf, struct dm_hash_table *ht, const char *name, int int_key) +{ + char *append; + struct dm_hash_node *n = dm_hash_get_first(ht); + + buffer_append(buf, name); + buffer_append(buf, " {\n"); + + while (n) { + const char *key = dm_hash_get_key(ht, n), + *val = dm_hash_get_data(ht, n); + buffer_append(buf, " "); + if (int_key) + dm_asprintf(&append, "%d = \"%s\"", *(int*)key, val); + else + dm_asprintf(&append, "%s = \"%s\"", key, val); + if (append) + buffer_append(buf, append); + buffer_append(buf, "\n"); + dm_free(append); + n = dm_hash_get_next(ht, n); + } + buffer_append(buf, "}\n"); +} + +static response dump(lvmetad_state *s) +{ + response res; + struct buffer *b = &res.buffer; + + buffer_init(b); + + /* Lock everything so that we get a consistent dump. */ + + lock_vgid_to_metadata(s); + lock_pvid_to_pvmeta(s); + lock_pvid_to_vgid(s); + + buffer_append(b, "# VG METADATA\n\n"); + _dump_cft(b, s->vgid_to_metadata, "metadata/id"); + + buffer_append(b, "\n# PV METADATA\n\n"); + _dump_cft(b, s->pvid_to_pvmeta, "pvmeta/id"); + + buffer_append(b, "\n# VGID to VGNAME mapping\n\n"); + _dump_pairs(b, s->vgid_to_vgname, "vgid_to_vgname", 0); + + buffer_append(b, "\n# VGNAME to VGID mapping\n\n"); + _dump_pairs(b, s->vgname_to_vgid, "vgname_to_vgid", 0); + + buffer_append(b, "\n# PVID to VGID mapping\n\n"); + _dump_pairs(b, s->pvid_to_vgid, "pvid_to_vgid", 0); + + buffer_append(b, "\n# DEVICE to PVID mapping\n\n"); + _dump_pairs(b, s->device_to_pvid, "device_to_pvid", 1); + + unlock_pvid_to_vgid(s); + unlock_pvid_to_pvmeta(s); + unlock_vgid_to_metadata(s); + + return res; +} + +static response handler(daemon_state s, client_handle h, request r) +{ + lvmetad_state *state = s.private; + const char *rq = daemon_request_str(r, "request", "NONE"); + const char *token = daemon_request_str(r, "token", "NONE"); + + pthread_mutex_lock(&state->token_lock); + if (!strcmp(rq, "token_update")) { + strncpy(state->token, token, 128); + state->token[127] = 0; + pthread_mutex_unlock(&state->token_lock); + return daemon_reply_simple("OK", NULL); + } + + if (strcmp(token, state->token) && strcmp(rq, "dump")) { + pthread_mutex_unlock(&state->token_lock); + return daemon_reply_simple("token_mismatch", + "expected = %s", state->token, + "received = %s", token, + "reason = %s", "token mismatch", NULL); + } + pthread_mutex_unlock(&state->token_lock); + + /* + * TODO Add a stats call, with transaction count/rate, time since last + * update &c. + */ + if (!strcmp(rq, "pv_found")) + return pv_found(state, r); + + if (!strcmp(rq, "pv_gone")) + return pv_gone(state, r); + + if (!strcmp(rq, "pv_clear_all")) + return pv_clear_all(state, r); + + if (!strcmp(rq, "pv_lookup")) + return pv_lookup(state, r); + + if (!strcmp(rq, "vg_update")) + return vg_update(state, r); + + if (!strcmp(rq, "vg_remove")) + return vg_remove(state, r); + + if (!strcmp(rq, "vg_lookup")) + return vg_lookup(state, r); + + if (!strcmp(rq, "pv_list")) + return pv_list(state, r); + + if (!strcmp(rq, "vg_list")) + return vg_list(state, r); + + if (!strcmp(rq, "dump")) + return dump(state); + + return reply_fail("request not implemented"); +} + +static int init(daemon_state *s) +{ + pthread_mutexattr_t rec; + lvmetad_state *ls = s->private; + ls->log = s->log; + + pthread_mutexattr_init(&rec); + pthread_mutexattr_settype(&rec, PTHREAD_MUTEX_RECURSIVE_NP); + pthread_mutex_init(&ls->lock.pvid_to_pvmeta, &rec); + pthread_mutex_init(&ls->lock.vgid_to_metadata, &rec); + pthread_mutex_init(&ls->lock.pvid_to_vgid, NULL); + pthread_mutex_init(&ls->lock.vg_lock_map, NULL); + pthread_mutex_init(&ls->token_lock, NULL); + create_metadata_hashes(ls); + + ls->lock.vg = dm_hash_create(32); + ls->token[0] = 0; + + /* Set up stderr logging depending on the -l option. */ + if (!daemon_log_parse(ls->log, DAEMON_LOG_OUTLET_STDERR, ls->log_config, 1)) + return 0; + + DEBUGLOG(s, "initialised state: vgid_to_metadata = %p", ls->vgid_to_metadata); + if (!ls->pvid_to_vgid || !ls->vgid_to_metadata) + return 0; + + /* if (ls->initial_registrations) + _process_initial_registrations(ds->initial_registrations); */ + + return 1; +} + +static int fini(daemon_state *s) +{ + lvmetad_state *ls = s->private; + struct dm_hash_node *n; + + DEBUGLOG(s, "fini"); + + destroy_metadata_hashes(ls); + + /* Destroy the lock hashes now. */ + n = dm_hash_get_first(ls->lock.vg); + while (n) { + pthread_mutex_destroy(dm_hash_get_data(ls->lock.vg, n)); + free(dm_hash_get_data(ls->lock.vg, n)); + n = dm_hash_get_next(ls->lock.vg, n); + } + + dm_hash_destroy(ls->lock.vg); + return 1; +} + +static void usage(char *prog, FILE *file) +{ + fprintf(file, "Usage:\n" + "%s [-V] [-h] [-f] [-l {all|wire|debug}] [-s path]\n\n" + " -V Show version of lvmetad\n" + " -h Show this help information\n" + " -f Don't fork, run in the foreground\n" + " -l Logging message level (-l {all|wire|debug})\n" + " -s Set path to the socket to listen on\n\n", prog); +} + +int main(int argc, char *argv[]) +{ + signed char opt; + daemon_state s = { .private = NULL }; + lvmetad_state ls; + int _socket_override = 1; + + s.name = "lvmetad"; + s.private = &ls; + s.daemon_init = init; + s.daemon_fini = fini; + s.handler = handler; + s.socket_path = getenv("LVM_LVMETAD_SOCKET"); + if (!s.socket_path) { + _socket_override = 0; + s.socket_path = DEFAULT_RUN_DIR "/lvmetad.socket"; + } + s.pidfile = LVMETAD_PIDFILE; + s.protocol = "lvmetad"; + s.protocol_version = 1; + ls.log_config = ""; + + // use getopt_long + while ((opt = getopt(argc, argv, "?fhVl:s:")) != EOF) { + switch (opt) { + case 'h': + usage(argv[0], stdout); + exit(0); + case '?': + usage(argv[0], stderr); + exit(0); + case 'f': + s.foreground = 1; + break; + case 'l': + ls.log_config = optarg; + break; + case 's': // --socket + s.socket_path = optarg; + _socket_override = 1; + break; + case 'V': + printf("lvmetad version: " LVM_VERSION "\n"); + exit(1); + } + } + + if (s.foreground) { + if (!_socket_override) { + fprintf(stderr, "A socket path (-s) is required in foreground mode."); + exit(2); + } else { + s.pidfile = NULL; + } + } + + daemon_start(s); + return 0; +} diff --git a/daemons/lvmetad/test.sh b/daemons/lvmetad/test.sh new file mode 100755 index 0000000..f937562 --- /dev/null +++ b/daemons/lvmetad/test.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +export LD_LIBRARY_PATH="$1" + +test -n "$2" && { + rm -f /var/run/lvmetad.{socket,pid} + chmod +rx lvmetad + valgrind ./lvmetad -f & + PID=$! + sleep 1 + ./testclient + kill $PID + exit 0 +} + +sudo ./test.sh "$1" . diff --git a/daemons/lvmetad/testclient.c b/daemons/lvmetad/testclient.c new file mode 100644 index 0000000..c4cf7c5 --- /dev/null +++ b/daemons/lvmetad/testclient.c @@ -0,0 +1,127 @@ +#include "lvmetad-client.h" +#include "label.h" +#include "lvmcache.h" +#include "metadata.h" + +const char *uuid1 = "abcd-efgh"; +const char *uuid2 = "bbcd-efgh"; +const char *vgid = "yada-yada"; +const char *uuid3 = "cbcd-efgh"; + +const char *metadata2 = "{\n" + "id = \"yada-yada\"\n" + "seqno = 15\n" + "status = [\"READ\", \"WRITE\"]\n" + "flags = []\n" + "extent_size = 8192\n" + "physical_volumes {\n" + " pv0 {\n" + " id = \"abcd-efgh\"\n" + " }\n" + " pv1 {\n" + " id = \"bbcd-efgh\"\n" + " }\n" + " pv2 {\n" + " id = \"cbcd-efgh\"\n" + " }\n" + "}\n" + "}\n"; + +void _handle_reply(daemon_reply reply) { + const char *repl = daemon_reply_str(reply, "response", NULL); + const char *status = daemon_reply_str(reply, "status", NULL); + const char *vgid = daemon_reply_str(reply, "vgid", NULL); + + fprintf(stderr, "[C] REPLY: %s\n", repl); + if (!strcmp(repl, "failed")) + fprintf(stderr, "[C] REASON: %s\n", daemon_reply_str(reply, "reason", "unknown")); + if (vgid) + fprintf(stderr, "[C] VGID: %s\n", vgid); + if (status) + fprintf(stderr, "[C] STATUS: %s\n", status); + daemon_reply_destroy(reply); +} + +void _pv_add(daemon_handle h, const char *uuid, const char *metadata) +{ + daemon_reply reply = daemon_send_simple(h, "pv_add", "uuid = %s", uuid, + "metadata = %b", metadata, + NULL); + _handle_reply(reply); +} + +int scan(daemon_handle h, char *fn) { + struct device *dev = dev_cache_get(fn, NULL); + + struct label *label; + if (!label_read(dev, &label, 0)) { + fprintf(stderr, "[C] no label found on %s\n", fn); + return; + } + + char uuid[64]; + id_write_format(dev->pvid, uuid, 64); + fprintf(stderr, "[C] found PV: %s\n", uuid); + struct lvmcache_info *info = (struct lvmcache_info *) label->info; + struct physical_volume pv = { 0, }; + + if (!(info->fmt->ops->pv_read(info->fmt, dev_name(dev), &pv, 0))) { + fprintf(stderr, "[C] Failed to read PV %s", dev_name(dev)); + return; + } + + struct format_instance_ctx fic; + struct format_instance *fid = info->fmt->ops->create_instance(info->fmt, &fic); + struct metadata_area *mda; + struct volume_group *vg = NULL; + dm_list_iterate_items(mda, &info->mdas) { + struct volume_group *this = mda->ops->vg_read(fid, "", mda); + if (this && !vg || this->seqno > vg->seqno) + vg = this; + } + if (vg) { + char *buf = NULL; + /* TODO. This is not entirely correct, since export_vg_to_buffer + * adds trailing garbage to the buffer. We may need to use + * export_vg_to_config_tree and format the buffer ourselves. It + * does, however, work for now, since the garbage is well + * formatted and has no conflicting keys with the rest of the + * request. */ + export_vg_to_buffer(vg, &buf); + daemon_reply reply = + daemon_send_simple(h, "pv_add", "uuid = %s", uuid, + "metadata = %b", strchr(buf, '{'), + NULL); + _handle_reply(reply); + } +} + +void _dump_vg(daemon_handle h, const char *uuid) +{ + daemon_reply reply = daemon_send_simple(h, "vg_by_uuid", "uuid = %s", uuid, NULL); + fprintf(stderr, "[C] reply buffer: %s\n", reply.buffer); + daemon_reply_destroy(reply); +} + +int main(int argc, char **argv) { + daemon_handle h = lvmetad_open(); + + if (argc > 1) { + int i; + struct cmd_context *cmd = create_toolcontext(0, NULL, 0, 0); + for (i = 1; i < argc; ++i) { + const char *uuid = NULL; + scan(h, argv[i]); + } + destroy_toolcontext(cmd); + return 0; + } + + _pv_add(h, uuid1, NULL); + _pv_add(h, uuid2, metadata2); + _dump_vg(h, vgid); + _pv_add(h, uuid3, NULL); + + daemon_close(h); + return 0; +} |