diff options
Diffstat (limited to 'daemons/dmeventd')
22 files changed, 2726 insertions, 1824 deletions
diff --git a/daemons/dmeventd/.gitignore b/daemons/dmeventd/.gitignore new file mode 100644 index 0000000..71cca4c --- /dev/null +++ b/daemons/dmeventd/.gitignore @@ -0,0 +1 @@ +dmeventd diff --git a/daemons/dmeventd/Makefile.in b/daemons/dmeventd/Makefile.in index 1302a44..f7896e5 100644 --- a/daemons/dmeventd/Makefile.in +++ b/daemons/dmeventd/Makefile.in @@ -9,16 +9,26 @@ # # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA srcdir = @srcdir@ top_srcdir = @top_srcdir@ top_builddir = @top_builddir@ +abs_srcdir = @abs_srcdir@ SOURCES = libdevmapper-event.c SOURCES2 = dmeventd.c TARGETS = dmeventd +CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES) $(SOURCES2) \ + plugins/lvm2/dmeventd_lvm.c \ + plugins/mirror/dmeventd_mirror.c \ + plugins/raid/dmeventd_raid.c \ + plugins/snapshot/dmeventd_snapshot.c \ + plugins/thin/dmeventd_thin.c \ + plugins/vdo/dmeventd_vdo.c \ + ) +CFLOW_TARGET := $(TARGETS) .PHONY: install_lib_dynamic install_lib_static install_include \ install_pkgconfig install_dmeventd_dynamic install_dmeventd_static \ @@ -37,6 +47,7 @@ endif LIB_VERSION = $(LIB_VERSION_DM) LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX) +LIBS = $(PTHREAD_LIBS) -L$(interfacebuilddir) -ldevmapper CLEAN_TARGETS = dmeventd.static $(LIB_NAME).a @@ -46,7 +57,6 @@ endif CFLOW_LIST = $(SOURCES) CFLOW_LIST_TARGET = $(LIB_NAME).cflow -CFLOW_TARGET = dmeventd EXPORTED_HEADER = $(srcdir)/libdevmapper-event.h EXPORTED_FN_PREFIX = dm_event @@ -55,49 +65,47 @@ include $(top_builddir)/make.tmpl all: device-mapper device-mapper: $(TARGETS) +plugins.device-mapper: $(LIB_SHARED) -LIBS += -ldevmapper -LVMLIBS += -ldevmapper-event $(PTHREAD_LIBS) +CFLAGS_dmeventd.o += $(EXTRA_EXEC_CFLAGS) dmeventd: $(LIB_SHARED) dmeventd.o - $(CC) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) -L. -o $@ dmeventd.o \ - $(DL_LIBS) $(LVMLIBS) $(LIBS) -rdynamic + @echo " [CC] $@" + $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) dmeventd.o \ + -o $@ $(DL_LIBS) $(DMEVENT_LIBS) $(LIBS) -dmeventd.static: $(LIB_STATIC) dmeventd.o $(interfacebuilddir)/libdevmapper.a - $(CC) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) -static -L. -L$(interfacebuilddir) -o $@ \ - dmeventd.o $(DL_LIBS) $(LVMLIBS) $(LIBS) $(STATIC_LIBS) +dmeventd.static: $(LIB_STATIC) dmeventd.o + @echo " [CC] $@" + $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(STATIC_LDFLAGS) -static dmeventd.o \ + -o $@ $(DL_LIBS) $(DMEVENT_LIBS) $(LIBS) $(STATIC_LIBS) ifeq ("@PKGCONFIG@", "yes") INSTALL_LIB_TARGETS += install_pkgconfig endif -ifneq ("$(CFLOW_CMD)", "") -CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES)) --include $(top_builddir)/libdm/libdevmapper.cflow --include $(top_builddir)/lib/liblvm-internal.cflow --include $(top_builddir)/lib/liblvm2cmd.cflow --include $(top_builddir)/daemons/dmeventd/$(LIB_NAME).cflow --include $(top_builddir)/daemons/dmeventd/plugins/mirror/$(LIB_NAME)-lvm2mirror.cflow -endif - install_include: $(srcdir)/libdevmapper-event.h - $(INSTALL_DATA) -D $< $(includedir)/$(<F) + @echo " [INSTALL] $(<F)" + $(Q) $(INSTALL_DATA) -D $< $(includedir)/$(<F) install_pkgconfig: libdevmapper-event.pc - $(INSTALL_DATA) -D $< $(pkgconfigdir)/devmapper-event.pc + @echo " [INSTALL] $<" + $(Q) $(INSTALL_DATA) -D $< $(pkgconfigdir)/devmapper-event.pc install_lib_dynamic: install_lib_shared install_lib_static: $(LIB_STATIC) - $(INSTALL_DATA) -D $< $(usrlibdir)/$(<F) + @echo " [INSTALL] $<" + $(Q) $(INSTALL_DATA) -D $< $(usrlibdir)/$(<F) install_lib: $(INSTALL_LIB_TARGETS) install_dmeventd_dynamic: dmeventd - $(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F) + @echo " [INSTALL] $<" + $(Q) $(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F) install_dmeventd_static: dmeventd.static - $(INSTALL_PROGRAM) -D $< $(staticdir)/$(<F) + @echo " [INSTALL] $<" + $(Q) $(INSTALL_PROGRAM) -D $< $(staticdir)/$(<F) install_dmeventd: $(INSTALL_DMEVENTD_TARGETS) diff --git a/daemons/dmeventd/dmeventd.c b/daemons/dmeventd/dmeventd.c index 13148c3..490768b 100644 --- a/daemons/dmeventd/dmeventd.c +++ b/daemons/dmeventd/dmeventd.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. + * Copyright (C) 2005-2015 Red Hat, Inc. All rights reserved. * * This file is part of the device-mapper userspace tools. * @@ -9,36 +9,34 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /* * dmeventd - dm event daemon to monitor active mapped devices */ -#define _GNU_SOURCE -#define _FILE_OFFSET_BITS 64 -#include "configure.h" -#include "libdevmapper.h" #include "libdevmapper-event.h" #include "dmeventd.h" -//#include "libmultilog.h" -#include "dm-logging.h" + +#include "libdm/misc/dm-logging.h" +#include "base/memory/zalloc.h" #include <dlfcn.h> -#include <errno.h> #include <pthread.h> #include <sys/file.h> #include <sys/stat.h> #include <sys/wait.h> #include <sys/time.h> #include <sys/resource.h> -#include <unistd.h> #include <signal.h> #include <arpa/inet.h> /* for htonl, ntohl */ +#include <fcntl.h> /* for musl libc */ +#include <unistd.h> +#include <syslog.h> -#ifdef linux +#ifdef __linux__ /* * Kernel version 2.6.36 and higher has * new OOM killer adjustment interface. @@ -54,6 +52,7 @@ # define OOM_SCORE_ADJ_MIN (-1000) /* Systemd on-demand activation support */ +# define SD_RUNTIME_UNIT_FILE_DIR DEFAULT_DM_RUN_DIR "/systemd/system/" # define SD_ACTIVATION_ENV_VAR_NAME "SD_ACTIVATION" # define SD_LISTEN_PID_ENV_VAR_NAME "LISTEN_PID" # define SD_LISTEN_FDS_ENV_VAR_NAME "LISTEN_FDS" @@ -63,11 +62,9 @@ #endif -/* FIXME We use syslog for now, because multilog is not yet implemented */ -#include <syslog.h> - +#define DM_SIGNALED_EXIT 1 +#define DM_SCHEDULED_EXIT 2 static volatile sig_atomic_t _exit_now = 0; /* set to '1' when signal is given to exit */ -static volatile sig_atomic_t _thread_registries_empty = 1; /* registries are empty initially */ /* List (un)link macros. */ #define LINK(x, head) dm_list_add(head, &(x)->list) @@ -90,31 +87,69 @@ static volatile sig_atomic_t _thread_registries_empty = 1; /* registries are emp */ static pthread_mutex_t _global_mutex; -/* - There are three states a thread can attain (see struct - thread_status, field int status): - - - DM_THREAD_RUNNING: thread has started up and is either working or - waiting for events... transitions to either SHUTDOWN or DONE - - DM_THREAD_SHUTDOWN: thread is still doing something, but it is - supposed to terminate (and transition to DONE) as soon as it - finishes whatever it was doing at the point of flipping state to - SHUTDOWN... the thread is still on the thread list - - DM_THREAD_DONE: thread has terminated and has been moved over to - unused thread list, cleanup pending - */ -#define DM_THREAD_RUNNING 0 -#define DM_THREAD_SHUTDOWN 1 -#define DM_THREAD_DONE 2 +static const size_t THREAD_STACK_SIZE = 300 * 1024; -#define THREAD_STACK_SIZE (300*1024) +/* Default idle exit timeout 1 hour (in seconds) */ +static const time_t DMEVENTD_IDLE_EXIT_TIMEOUT = 60 * 60; -int dmeventd_debug = 0; +static int _debug_level = 0; +static int _use_syslog = 1; static int _systemd_activation = 0; static int _foreground = 0; static int _restart = 0; +static time_t _idle_since = 0; static char **_initial_registrations = 0; +/* FIXME Make configurable at runtime */ + +/* All libdm messages */ +__attribute__((format(printf, 5, 6))) +static void _libdm_log(int level, const char *file, int line, + int dm_errno_or_class, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + dm_event_log("#dm", level, file, line, dm_errno_or_class, format, ap); + va_end(ap); +} + +/* All dmeventd messages */ +#undef LOG_MESG +#define LOG_MESG(l, f, ln, e, x...) _dmeventd_log(l, f, ln, e, ## x) +__attribute__((format(printf, 5, 6))) +static void _dmeventd_log(int level, const char *file, int line, + int dm_errno_or_class, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + dm_event_log("dmeventd", level, file, line, dm_errno_or_class, format, ap); + va_end(ap); +} + +#ifdef DEBUG +# define DEBUGLOG log_debug +static const char *decode_cmd(uint32_t cmd) +{ + switch (cmd) { + case DM_EVENT_CMD_ACTIVE: return "ACTIVE"; + case DM_EVENT_CMD_REGISTER_FOR_EVENT: return "REGISTER_FOR_EVENT"; + case DM_EVENT_CMD_UNREGISTER_FOR_EVENT: return "UNREGISTER_FOR_EVENT"; + case DM_EVENT_CMD_GET_REGISTERED_DEVICE: return "GET_REGISTERED_DEVICE"; + case DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE: return "GET_NEXT_REGISTERED_DEVICE"; + case DM_EVENT_CMD_SET_TIMEOUT: return "SET_TIMEOUT"; + case DM_EVENT_CMD_GET_TIMEOUT: return "GET_TIMEOUT"; + case DM_EVENT_CMD_HELLO: return "HELLO"; + case DM_EVENT_CMD_DIE: return "DIE"; + case DM_EVENT_CMD_GET_STATUS: return "GET_STATUS"; + case DM_EVENT_CMD_GET_PARAMETERS: return "GET_PARAMETERS"; + default: return "unknown"; + } +} + +#else +# define DEBUGLOG(fmt, args...) do { } while (0) +#endif + /* Data kept about a DSO. */ struct dso_data { struct dm_list list; @@ -166,17 +201,20 @@ struct message_data { char *id; char *dso_name; /* Name of DSO. */ char *device_uuid; /* Mapped device path. */ - union { - char *str; /* Events string as fetched from message. */ - enum dm_event_mask field; /* Events bitfield. */ - } events; - union { - char *str; - uint32_t secs; - } timeout; + char *events_str; /* Events string as fetched from message. */ + enum dm_event_mask events_field; /* Events bitfield. */ + char *timeout_str; + uint32_t timeout_secs; struct dm_event_daemon_message *msg; /* Pointer to message buffer. */ }; +/* There are three states a thread can attain. */ +enum { + DM_THREAD_REGISTERING, /* Registering, transitions to RUNNING */ + DM_THREAD_RUNNING, /* Working on events, transitions to DONE */ + DM_THREAD_DONE /* Terminated and cleanup is pending */ +}; + /* * Housekeeping of thread+device states. * @@ -195,19 +233,21 @@ struct thread_status { char *name; int major, minor; } device; - uint32_t event_nr; /* event number */ int processing; /* Set when event is being processed */ - int status; /* see DM_THREAD_{RUNNING,SHUTDOWN,DONE} - constants above */ - enum dm_event_mask events; /* bitfield for event filter. */ - enum dm_event_mask current_events; /* bitfield for occured events. */ - struct dm_task *current_task; + int status; /* See DM_THREAD_{REGISTERING,RUNNING,DONE} */ + + int events; /* bitfield for event filter. */ + int current_events; /* bitfield for occured events. */ + struct dm_task *wait_task; + int pending; /* Set when event filter change is pending */ time_t next_time; uint32_t timeout; struct dm_list timeout_list; void *dso_private; /* dso per-thread status variable */ + /* TODO per-thread mutex */ }; + static DM_LIST_INIT(_thread_registry); static DM_LIST_INIT(_thread_registry_unused); @@ -216,125 +256,301 @@ static DM_LIST_INIT(_timeout_registry); static pthread_mutex_t _timeout_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t _timeout_cond = PTHREAD_COND_INITIALIZER; -/* Allocate/free the status structure for a monitoring thread. */ -static struct thread_status *_alloc_thread_status(struct message_data *data, - struct dso_data *dso_data) + +/********** + * DSO + **********/ + +/* DSO data allocate/free. */ +static void _free_dso_data(struct dso_data *data) { - struct thread_status *ret = (typeof(ret)) dm_zalloc(sizeof(*ret)); + free(data->dso_name); + free(data); +} + +static struct dso_data *_alloc_dso_data(struct message_data *data) +{ + struct dso_data *ret = (typeof(ret)) zalloc(sizeof(*ret)); if (!ret) - return NULL; + return_NULL; - if (!(ret->device.uuid = dm_strdup(data->device_uuid))) { - dm_free(ret); - return NULL; + if (!(ret->dso_name = strdup(data->dso_name))) { + free(ret); + return_NULL; + } + + return ret; +} + +/* DSO reference counting. */ +static void _lib_get(struct dso_data *data) +{ + data->ref_count++; +} + +static void _lib_put(struct dso_data *data) +{ + if (!--data->ref_count) { + dlclose(data->dso_handle); + UNLINK_DSO(data); + _free_dso_data(data); + + /* Close control device if there is no plugin in-use */ + if (dm_list_empty(&_dso_registry)) { + DEBUGLOG("Unholding control device."); + dm_hold_control_dev(0); + dm_lib_release(); + _idle_since = time(NULL); + } + } +} + +/* Find DSO data. */ +static struct dso_data *_lookup_dso(struct message_data *data) +{ + struct dso_data *dso_data, *ret = NULL; + + dm_list_iterate_items(dso_data, &_dso_registry) + if (!strcmp(data->dso_name, dso_data->dso_name)) { + ret = dso_data; + break; + } + + return ret; +} + +/* Lookup DSO symbols we need. */ +static int _lookup_symbol(void *dl, void **symbol, const char *name) +{ + if (!(*symbol = dlsym(dl, name))) + return_0; + + return 1; +} + +static int _lookup_symbols(void *dl, struct dso_data *data) +{ + return _lookup_symbol(dl, (void *) &data->process_event, + "process_event") && + _lookup_symbol(dl, (void *) &data->register_device, + "register_device") && + _lookup_symbol(dl, (void *) &data->unregister_device, + "unregister_device"); +} + +/* Load an application specific DSO. */ +static struct dso_data *_load_dso(struct message_data *data) +{ + void *dl; + struct dso_data *ret; + const char *dlerr; + + if (!(dl = dlopen(data->dso_name, RTLD_NOW))) { + dlerr = dlerror(); + goto_bad; + } + + if (!(ret = _alloc_dso_data(data))) { + dlclose(dl); + dlerr = "no memory"; + goto_bad; + } + + if (!(_lookup_symbols(dl, ret))) { + _free_dso_data(ret); + dlclose(dl); + dlerr = "symbols missing"; + goto_bad; } - ret->current_task = NULL; - ret->device.name = NULL; - ret->device.major = ret->device.minor = 0; - ret->dso_data = dso_data; - ret->events = data->events.field; - ret->timeout = data->timeout.secs; - dm_list_init(&ret->timeout_list); + /* Keep control device open until last user closes */ + if (dm_list_empty(&_dso_registry)) { + DEBUGLOG("Holding control device open."); + dm_hold_control_dev(1); + _idle_since = 0; + } + + /* + * Keep handle to close the library once + * we've got no references to it any more. + */ + ret->dso_handle = dl; + LINK_DSO(ret); return ret; +bad: + log_error("dmeventd %s dlopen failed: %s.", data->dso_name, dlerr); + data->msg->size = dm_asprintf(&(data->msg->data), "%s %s dlopen failed: %s", + data->id, data->dso_name, dlerr); + return NULL; } -static void _lib_put(struct dso_data *data); +/************ + * THREAD + ************/ + +/* Allocate/free the thread status structure for a monitoring thread. */ static void _free_thread_status(struct thread_status *thread) { + _lib_put(thread->dso_data); - if (thread->current_task) - dm_task_destroy(thread->current_task); - dm_free(thread->device.uuid); - dm_free(thread->device.name); - dm_free(thread); + if (thread->wait_task) + dm_task_destroy(thread->wait_task); + free(thread->device.uuid); + free(thread->device.name); + free(thread); } -/* Allocate/free DSO data. */ -static struct dso_data *_alloc_dso_data(struct message_data *data) +/* Note: events_field must not be 0, ensured by caller */ +static struct thread_status *_alloc_thread_status(const struct message_data *data, + struct dso_data *dso_data) { - struct dso_data *ret = (typeof(ret)) dm_zalloc(sizeof(*ret)); - - if (!ret) - return NULL; + struct thread_status *thread; - if (!(ret->dso_name = dm_strdup(data->dso_name))) { - dm_free(ret); + if (!(thread = zalloc(sizeof(*thread)))) { + log_error("Cannot create new thread, out of memory."); return NULL; } - return ret; + _lib_get(dso_data); + thread->dso_data = dso_data; + + if (!(thread->wait_task = dm_task_create(DM_DEVICE_WAITEVENT))) + goto_out; + + if (!dm_task_set_uuid(thread->wait_task, data->device_uuid)) + goto_out; + + if (!(thread->device.uuid = strdup(data->device_uuid))) + goto_out; + + /* Until real name resolved, use UUID */ + if (!(thread->device.name = strdup(data->device_uuid))) + goto_out; + + /* runs ioctl and may register lvm2 pluging */ + thread->processing = 1; + thread->status = DM_THREAD_REGISTERING; + + thread->events = data->events_field; + thread->pending = DM_EVENT_REGISTRATION_PENDING; + thread->timeout = data->timeout_secs; + dm_list_init(&thread->timeout_list); + + return thread; + +out: + _free_thread_status(thread); + + return NULL; } -/* Create a device monitoring thread. */ +/* + * Create a device monitoring thread. + * N.B. Error codes returned are positive. + */ static int _pthread_create_smallstack(pthread_t *t, void *(*fun)(void *), void *arg) { + int r; + pthread_t tmp; pthread_attr_t attr; - pthread_attr_init(&attr); + + /* + * From pthread_attr_init man page: + * POSIX.1-2001 documents an ENOMEM error for pthread_attr_init(); on + * Linux these functions always succeed (but portable and future-proof + * applications should nevertheless handle a possible error return). + */ + if ((r = pthread_attr_init(&attr)) != 0) { + log_sys_error("pthread_attr_init", ""); + return r; + } + /* * We use a smaller stack since it gets preallocated in its entirety */ - pthread_attr_setstacksize(&attr, THREAD_STACK_SIZE); - return pthread_create(t, &attr, fun, arg); -} + pthread_attr_setstacksize(&attr, THREAD_STACK_SIZE + getpagesize()); -static void _free_dso_data(struct dso_data *data) -{ - dm_free(data->dso_name); - dm_free(data); + /* + * If no-one will be waiting, we need to detach. + */ + if (!t) { + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + t = &tmp; + } + + if ((r = pthread_create(t, &attr, fun, arg))) + log_sys_error("pthread_create", ""); + + pthread_attr_destroy(&attr); + + return r; } /* * Fetch a string off src and duplicate it into *ptr. - * Pay attention to zero-length strings. + * Pay attention to zero-length and 'empty' strings ('-'). */ /* FIXME? move to libdevmapper to share with the client lib (need to make delimiter a parameter then) */ static int _fetch_string(char **ptr, char **src, const int delimiter) { - int ret = 0; + int ret = 1; char *p; size_t len; + *ptr = NULL; /* Empty field returns NULL pointer */ - if ((p = strchr(*src, delimiter))) - *p = 0; - - if ((*ptr = dm_strdup(*src))) { - if ((len = strlen(*ptr))) - *src += len; - else { - dm_free(*ptr); - *ptr = NULL; + if ((*src)[0] == '-') { + /* Could be empty field '-', handle without allocation */ + if ((*src)[1] == '\0') { + (*src)++; + goto out; + } else if ((*src)[1] == delimiter) { + (*src) += 2; + goto out; } - - (*src)++; - ret = 1; } - if (p) - *p = delimiter; - + if ((p = strchr(*src, delimiter))) { + if (*src < p) { + *p = 0; /* Temporary exit with \0 */ + if (!(*ptr = strdup(*src))) { + log_error("Failed to fetch item %s.", *src); + ret = 0; /* Allocation fail */ + } + *p = delimiter; + *src = p; + } + (*src)++; /* Skip delmiter, next field */ + } else if ((len = strlen(*src))) { + /* No delimiter, item ends with '\0' */ + if (!(*ptr = strdup(*src))) { + log_error("Failed to fetch last item %s.", *src); + ret = 0; /* Fail */ + } + *src += len + 1; + } +out: return ret; } /* Free message memory. */ static void _free_message(struct message_data *message_data) { - dm_free(message_data->id); - dm_free(message_data->dso_name); - - dm_free(message_data->device_uuid); - + free(message_data->id); + free(message_data->dso_name); + free(message_data->device_uuid); + free(message_data->events_str); + free(message_data->timeout_str); } /* Parse a register message from the client. */ static int _parse_message(struct message_data *message_data) { int ret = 0; - char *p = message_data->msg->data; struct dm_event_daemon_message *msg = message_data->msg; + char *p = msg->data; if (!msg->data) return 0; @@ -346,33 +562,23 @@ static int _parse_message(struct message_data *message_data) if (_fetch_string(&message_data->id, &p, ' ') && _fetch_string(&message_data->dso_name, &p, ' ') && _fetch_string(&message_data->device_uuid, &p, ' ') && - _fetch_string(&message_data->events.str, &p, ' ') && - _fetch_string(&message_data->timeout.str, &p, ' ')) { - if (message_data->events.str) { - enum dm_event_mask i = atoi(message_data->events.str); - - /* - * Free string representaion of events. - * Not needed an more. - */ - dm_free(message_data->events.str); - message_data->events.field = i; - } - if (message_data->timeout.str) { - uint32_t secs = atoi(message_data->timeout.str); - dm_free(message_data->timeout.str); - message_data->timeout.secs = secs ? secs : - DM_EVENT_DEFAULT_TIMEOUT; - } - + _fetch_string(&message_data->events_str, &p, ' ') && + _fetch_string(&message_data->timeout_str, &p, ' ')) { + if (message_data->events_str) + message_data->events_field = + atoi(message_data->events_str); + if (message_data->timeout_str) + message_data->timeout_secs = + atoi(message_data->timeout_str) + ? : DM_EVENT_DEFAULT_TIMEOUT; ret = 1; } - dm_free(msg->data); + free(msg->data); msg->data = NULL; - msg->size = 0; + return ret; -}; +} /* Global mutex to lock access to lists et al. See _global_mutex above. */ @@ -391,15 +597,9 @@ static int _fill_device_data(struct thread_status *ts) { struct dm_task *dmt; struct dm_info dmi; + int ret = 0; - if (!ts->device.uuid) - return 0; - - ts->device.name = NULL; - ts->device.major = ts->device.minor = 0; - - dmt = dm_task_create(DM_DEVICE_INFO); - if (!dmt) + if (!(dmt = dm_task_create(DM_DEVICE_INFO))) return 0; if (!dm_task_set_uuid(dmt, ts->device.uuid)) @@ -408,8 +608,8 @@ static int _fill_device_data(struct thread_status *ts) if (!dm_task_run(dmt)) goto fail; - ts->device.name = dm_strdup(dm_task_get_name(dmt)); - if (!ts->device.name) + free(ts->device.name); + if (!(ts->device.name = strdup(dm_task_get_name(dmt)))) goto fail; if (!dm_task_get_info(dmt, &dmi)) @@ -417,14 +617,37 @@ static int _fill_device_data(struct thread_status *ts) ts->device.major = dmi.major; ts->device.minor = dmi.minor; + dm_task_set_event_nr(ts->wait_task, dmi.event_nr); + ret = 1; +fail: dm_task_destroy(dmt); - return 1; - fail: - dm_task_destroy(dmt); - dm_free(ts->device.name); - return 0; + return ret; +} + +static struct dm_task *_get_device_status(struct thread_status *ts) +{ + struct dm_task *dmt = dm_task_create(DM_DEVICE_STATUS); + + if (!dmt) + return_NULL; + + if (!dm_task_set_uuid(dmt, ts->device.uuid)) { + dm_task_destroy(dmt); + return_NULL; + } + + /* Non-blocking status read */ + if (!dm_task_no_flush(dmt)) + log_warn("WARNING: Can't set no_flush for dm status."); + + if (!dm_task_run(dmt)) { + dm_task_destroy(dmt); + return_NULL; + } + + return dmt; } /* @@ -437,8 +660,8 @@ static struct thread_status *_lookup_thread_status(struct message_data *data) struct thread_status *thread; dm_list_iterate_items(thread, &_thread_registry) - if (!strcmp(data->device_uuid, thread->device.uuid)) - return thread; + if (!strcmp(data->device_uuid, thread->device.uuid)) + return thread; return NULL; } @@ -447,20 +670,20 @@ static int _get_status(struct message_data *message_data) { struct dm_event_daemon_message *msg = message_data->msg; struct thread_status *thread; - int i, j; - int ret = -1; - int count = dm_list_size(&_thread_registry); - int size = 0, current = 0; - char *buffers[count]; + int i = 0, j; + int ret = -ENOMEM; + int count; + int size = 0, current; + size_t len; + char **buffers; char *message; - dm_free(msg->data); + if (!message_data->id) + return -EINVAL; - for (i = 0; i < count; ++i) - buffers[i] = NULL; - - i = 0; _lock_mutex(); + count = dm_list_size(&_thread_registry); + buffers = alloca(sizeof(char*) * count); dm_list_iterate_items(thread, &_thread_registry) { if ((current = dm_asprintf(buffers + i, "0:%d %s %s %u %" PRIu32 ";", i, thread->dso_data->dso_name, @@ -469,33 +692,50 @@ static int _get_status(struct message_data *message_data) _unlock_mutex(); goto out; } - ++ i; - size += current; + ++i; + size += current; /* count with trailing '\0' */ } _unlock_mutex(); - msg->size = size + strlen(message_data->id) + 1; - msg->data = dm_malloc(msg->size); - if (!msg->data) + len = strlen(message_data->id); + msg->size = size + len + 1; + free(msg->data); + if (!(msg->data = malloc(msg->size))) goto out; - *msg->data = 0; - message = msg->data; - strcpy(message, message_data->id); - message += strlen(message_data->id); - *message = ' '; - message ++; + memcpy(msg->data, message_data->id, len); + message = msg->data + len; + *message++ = ' '; for (j = 0; j < i; ++j) { - strcpy(message, buffers[j]); - message += strlen(buffers[j]); + len = strlen(buffers[j]); + memcpy(message, buffers[j], len); + message += len; } ret = 0; out: for (j = 0; j < i; ++j) - dm_free(buffers[j]); + free(buffers[j]); + return ret; +} + +static int _get_parameters(struct message_data *message_data) { + struct dm_event_daemon_message *msg = message_data->msg; + int size; + + free(msg->data); + if ((size = dm_asprintf(&msg->data, "%s pid=%d daemon=%s exec_method=%s", + message_data->id, getpid(), + _foreground ? "no" : "yes", + _systemd_activation ? "systemd" : "direct")) < 0) { + stack; + return -ENOMEM; + } + + msg->size = (uint32_t) size; + return 0; } /* Cleanup at exit. */ @@ -514,23 +754,46 @@ static void _exit_timeout(void *unused __attribute__((unused))) /* Wake up monitor threads every so often. */ static void *_timeout_thread(void *unused __attribute__((unused))) { - struct timespec timeout; + struct thread_status *thread; + struct timespec timeout, real_time; time_t curr_time; + int ret; - timeout.tv_nsec = 0; + DEBUGLOG("Timeout thread starting."); pthread_cleanup_push(_exit_timeout, NULL); pthread_mutex_lock(&_timeout_mutex); while (!dm_list_empty(&_timeout_registry)) { - struct thread_status *thread; - timeout.tv_sec = 0; + timeout.tv_nsec = 0; +#ifndef HAVE_REALTIME curr_time = time(NULL); +#else + if (clock_gettime(CLOCK_REALTIME, &real_time)) { + log_error("Failed to read clock_gettime()."); + break; + } + /* 10ms back to the future */ + curr_time = real_time.tv_sec + ((real_time.tv_nsec > (1000000000 - 10000000)) ? 1 : 0); +#endif dm_list_iterate_items_gen(thread, &_timeout_registry, timeout_list) { if (thread->next_time <= curr_time) { thread->next_time = curr_time + thread->timeout; - pthread_kill(thread->thread, SIGALRM); + _lock_mutex(); + if (thread->processing) { + /* Cannot signal processing monitoring thread */ + log_debug("Skipping SIGALRM to processing Thr %x for timeout.", + (int) thread->thread); + } else { + DEBUGLOG("Sending SIGALRM to Thr %x for timeout.", + (int) thread->thread); + ret = pthread_kill(thread->thread, SIGALRM); + if (ret && (ret != ESRCH)) + log_error("Unable to wakeup Thr %x for timeout: %s.", + (int) thread->thread, strerror(ret)); + } + _unlock_mutex(); } if (thread->next_time < timeout.tv_sec || !timeout.tv_sec) @@ -541,6 +804,7 @@ static void *_timeout_thread(void *unused __attribute__((unused))) &timeout); } + DEBUGLOG("Timeout thread finished."); pthread_cleanup_pop(1); return NULL; @@ -552,20 +816,16 @@ static int _register_for_timeout(struct thread_status *thread) pthread_mutex_lock(&_timeout_mutex); - thread->next_time = time(NULL) + thread->timeout; - if (dm_list_empty(&thread->timeout_list)) { + thread->next_time = time(NULL) + thread->timeout; dm_list_add(&_timeout_registry, &thread->timeout_list); if (_timeout_running) pthread_cond_signal(&_timeout_cond); } - if (!_timeout_running) { - pthread_t timeout_id; - - if (!(ret = -_pthread_create_smallstack(&timeout_id, _timeout_thread, NULL))) - _timeout_running = 1; - } + if (!_timeout_running && + !(ret = _pthread_create_smallstack(NULL, _timeout_thread, NULL))) + _timeout_running = 1; pthread_mutex_unlock(&_timeout_mutex); @@ -578,106 +838,86 @@ static void _unregister_for_timeout(struct thread_status *thread) if (!dm_list_empty(&thread->timeout_list)) { dm_list_del(&thread->timeout_list); dm_list_init(&thread->timeout_list); + if (dm_list_empty(&_timeout_registry)) + /* No more work -> wakeup to finish quickly */ + pthread_cond_signal(&_timeout_cond); } pthread_mutex_unlock(&_timeout_mutex); } -__attribute__((format(printf, 4, 5))) -static void _no_intr_log(int level, const char *file, int line, - const char *f, ...) +#ifdef DEBUG_SIGNALS +/* Print list of signals within a signal set */ +static void _print_sigset(const char *prefix, const sigset_t *sigset) { - va_list ap; - - if (errno == EINTR) - return; - if (level > _LOG_WARN) - return; - - va_start(ap, f); - - if (level < _LOG_WARN) - vfprintf(stderr, f, ap); - else - vprintf(f, ap); - - va_end(ap); - - if (level < _LOG_WARN) - fprintf(stderr, "\n"); - else - fprintf(stdout, "\n"); -} + int sig, cnt = 0; -static sigset_t _unblock_sigalrm(void) -{ - sigset_t set, old; + for (sig = 1; sig < NSIG; sig++) + if (!sigismember(sigset, sig)) { + cnt++; + log_debug("%s%d (%s)", prefix, sig, strsignal(sig)); + } - sigemptyset(&set); - sigaddset(&set, SIGALRM); - pthread_sigmask(SIG_UNBLOCK, &set, &old); - return old; + if (!cnt) + log_debug("%s<empty signal set>", prefix); } +#endif -#define DM_WAIT_RETRY 0 -#define DM_WAIT_INTR 1 -#define DM_WAIT_FATAL 2 +enum { + DM_WAIT_RETRY, + DM_WAIT_INTR, + DM_WAIT_FATAL +}; /* Wait on a device until an event occurs. */ -static int _event_wait(struct thread_status *thread, struct dm_task **task) +static int _event_wait(struct thread_status *thread) { - sigset_t set; + sigset_t set, old; int ret = DM_WAIT_RETRY; - struct dm_task *dmt; struct dm_info info; - *task = 0; - - if (!(dmt = dm_task_create(DM_DEVICE_WAITEVENT))) - return DM_WAIT_RETRY; - - thread->current_task = dmt; - - if (!dm_task_set_uuid(dmt, thread->device.uuid) || - !dm_task_set_event_nr(dmt, thread->event_nr)) - goto out; + /* TODO: audit libdm thread usage */ /* * This is so that you can break out of waiting on an event, * either for a timeout event, or to cancel the thread. */ - set = _unblock_sigalrm(); - dm_log_init(_no_intr_log); - errno = 0; - if (dm_task_run(dmt)) { - thread->current_events |= DM_EVENT_DEVICE_ERROR; - ret = DM_WAIT_INTR; + sigemptyset(&old); + sigemptyset(&set); + sigaddset(&set, SIGALRM); + if (pthread_sigmask(SIG_UNBLOCK, &set, &old) != 0) { + log_sys_error("pthread_sigmask", "unblock alarm"); + return ret; /* What better */ + } - if ((ret = dm_task_get_info(dmt, &info))) - thread->event_nr = info.event_nr; - } else if (thread->events & DM_EVENT_TIMEOUT && errno == EINTR) { - thread->current_events |= DM_EVENT_TIMEOUT; + if (dm_task_run(thread->wait_task)) { + thread->current_events |= DM_EVENT_DEVICE_ERROR; ret = DM_WAIT_INTR; - } else if (thread->status == DM_THREAD_SHUTDOWN && errno == EINTR) { - ret = DM_WAIT_FATAL; + /* Update event_nr */ + if (dm_task_get_info(thread->wait_task, &info)) + dm_task_set_event_nr(thread->wait_task, info.event_nr); } else { - syslog(LOG_NOTICE, "dm_task_run failed, errno = %d, %s", - errno, strerror(errno)); - if (errno == ENXIO) { - syslog(LOG_ERR, "%s disappeared, detaching", - thread->device.name); + switch (dm_task_get_errno(thread->wait_task)) { + case ENXIO: + log_error("%s disappeared, detaching.", + thread->device.name); ret = DM_WAIT_FATAL; + break; + case EINTR: + thread->current_events |= DM_EVENT_TIMEOUT; + ret = DM_WAIT_INTR; + break; + default: + log_sys_error("dm_task_run", "waitevent"); } } - pthread_sigmask(SIG_SETMASK, &set, NULL); - dm_log_init(NULL); + if (pthread_sigmask(SIG_SETMASK, &old, NULL) != 0) + log_sys_error("pthread_sigmask", "block alarm"); - out: - if (ret == DM_WAIT_FATAL || ret == DM_WAIT_RETRY) { - dm_task_destroy(dmt); - thread->current_task = NULL; - } else - *task = dmt; +#ifdef DEBUG_SIGNALS + _print_sigset("dmeventd blocking ", &old); +#endif + DEBUGLOG("Completed waitevent task for %s.", thread->device.name); return ret; } @@ -703,9 +943,27 @@ static int _do_unregister_device(struct thread_status *thread) } /* Process an event in the DSO. */ -static void _do_process_event(struct thread_status *thread, struct dm_task *task) +static void _do_process_event(struct thread_status *thread) { - thread->dso_data->process_event(task, thread->current_events, &(thread->dso_private)); + struct dm_task *task; + + /* NOTE: timeout event gets status */ + task = (thread->current_events & DM_EVENT_TIMEOUT) + ? _get_device_status(thread) : thread->wait_task; + + if (!task) + log_error("Lost event in Thr %x.", (int)thread->thread); + else { + thread->dso_data->process_event(task, thread->current_events, &(thread->dso_private)); + if (task != thread->wait_task) + dm_task_destroy(task); + } +} + +static void _thread_unused(struct thread_status *thread) +{ + UNLINK_THREAD(thread); + LINK(thread, &_thread_registry_unused); } /* Thread cleanup handler to unregister device. */ @@ -713,134 +971,112 @@ static void _monitor_unregister(void *arg) { struct thread_status *thread = arg, *thread_iter; - if (!_do_unregister_device(thread)) - syslog(LOG_ERR, "%s: %s unregister failed\n", __func__, - thread->device.name); - if (thread->current_task) - dm_task_destroy(thread->current_task); - thread->current_task = NULL; - - _lock_mutex(); - if (thread->events & DM_EVENT_TIMEOUT) { - /* _unregister_for_timeout locks another mutex, we - don't want to deadlock so we release our mutex for - a bit */ - _unlock_mutex(); - _unregister_for_timeout(thread); - _lock_mutex(); - } - /* we may have been relinked to unused registry since we were - called, so check that */ - dm_list_iterate_items(thread_iter, &_thread_registry_unused) + dm_list_iterate_items(thread_iter, &_thread_registry) if (thread_iter == thread) { - thread->status = DM_THREAD_DONE; - _unlock_mutex(); - return; + /* Relink to _unused */ + _thread_unused(thread); + break; } - thread->status = DM_THREAD_DONE; - pthread_mutex_lock(&_timeout_mutex); - UNLINK_THREAD(thread); - LINK(thread, &_thread_registry_unused); - pthread_mutex_unlock(&_timeout_mutex); - _unlock_mutex(); -} -static struct dm_task *_get_device_status(struct thread_status *ts) -{ - struct dm_task *dmt = dm_task_create(DM_DEVICE_STATUS); + thread->events = 0; /* Filter is now empty */ + thread->pending = 0; /* Event pending resolved */ + thread->processing = 1; /* Process unregistering */ - if (!dmt) - return NULL; + _unlock_mutex(); - if (!dm_task_set_uuid(dmt, ts->device.uuid)) { - dm_task_destroy(dmt); - return NULL; - } + DEBUGLOG("Unregistering monitor for %s.", thread->device.name); + _unregister_for_timeout(thread); - if (!dm_task_run(dmt)) { - dm_task_destroy(dmt); - return NULL; - } + /* coverity[missing_lock] no missing lock here */ + if ((thread->status != DM_THREAD_REGISTERING) && + !_do_unregister_device(thread)) + log_error("%s: %s unregister failed.", __func__, + thread->device.name); - return dmt; + DEBUGLOG("Marking Thr %x as DONE and unused.", (int)thread->thread); + + _lock_mutex(); + thread->status = DM_THREAD_DONE; /* Last access to thread memory! */ + _unlock_mutex(); } /* Device monitoring thread. */ static void *_monitor_thread(void *arg) { struct thread_status *thread = arg; - int wait_error = 0; - struct dm_task *task; + int ret; + sigset_t pendmask; pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL); pthread_cleanup_push(_monitor_unregister, thread); - /* Wait for do_process_request() to finish its task. */ - _lock_mutex(); - thread->status = DM_THREAD_RUNNING; - _unlock_mutex(); - - /* Loop forever awaiting/analyzing device events. */ - while (1) { - thread->current_events = 0; + if (!_fill_device_data(thread)) { + log_error("Failed to fill device data for %s.", thread->device.uuid); + _lock_mutex(); + goto out; + } - wait_error = _event_wait(thread, &task); - if (wait_error == DM_WAIT_RETRY) - continue; + if (!_do_register_device(thread)) { + log_error("Failed to register device %s.", thread->device.name); + _lock_mutex(); + goto out; + } - if (wait_error == DM_WAIT_FATAL) - break; + _lock_mutex(); + thread->status = DM_THREAD_RUNNING; + thread->processing = 0; - /* Timeout occurred, task is not filled properly. - * We get device status here for processing it in DSO. - */ - if (wait_error == DM_WAIT_INTR && - thread->current_events & DM_EVENT_TIMEOUT) { - dm_task_destroy(task); - task = _get_device_status(thread); - /* FIXME: syslog fail here ? */ - if (!(thread->current_task = task)) - continue; - } + /* Loop awaiting/analyzing device events. */ + while (thread->events) { - /* - * We know that wait succeeded and stored a - * pointer to dm_task with device status into task. - */ + thread->pending = 0; /* Event is no longer pending... */ /* - * Check against filter. + * Check against bitmask filter. * * If there's current events delivered from _event_wait() AND * the device got registered for those events AND * those events haven't been processed yet, call * the DSO's process_event() handler. */ - _lock_mutex(); - if (thread->status == DM_THREAD_SHUTDOWN) { - _unlock_mutex(); - break; - } - _unlock_mutex(); - if (thread->events & thread->current_events) { - _lock_mutex(); - thread->processing = 1; + thread->processing = 1; /* Cannot be removed/signaled */ _unlock_mutex(); - _do_process_event(thread, task); - dm_task_destroy(task); - thread->current_task = NULL; + _do_process_event(thread); + thread->current_events = 0; /* Current events processed */ _lock_mutex(); thread->processing = 0; - _unlock_mutex(); + + /* + * Thread can terminate itself from plugin via SIGALRM + * Timer thread will not send signal while processing + * TODO: maybe worth API change and return value for + * _do_process_event() instead of this signal solution + */ + if (sigpending(&pendmask) < 0) + log_sys_error("sigpending", ""); + else if (sigismember(&pendmask, SIGALRM)) + break; } else { - dm_task_destroy(task); - thread->current_task = NULL; + _unlock_mutex(); + + if ((ret = _event_wait(thread)) == DM_WAIT_RETRY) + usleep(100); /* Avoid busy loop, wait without mutex */ + + _lock_mutex(); + + if (ret == DM_WAIT_FATAL) + break; } } +out: + /* ';' fixes gcc compilation problem with older pthread macros + * "label at end of compound statement" */ + ; + /* coverity[lock_order] _global_mutex is kept locked */ pthread_cleanup_pop(1); return NULL; @@ -852,107 +1088,80 @@ static int _create_thread(struct thread_status *thread) return _pthread_create_smallstack(&thread->thread, _monitor_thread, thread); } -static int _terminate_thread(struct thread_status *thread) +/* Update events - needs to be locked */ +static int _update_events(struct thread_status *thread, int events) { - return pthread_kill(thread->thread, SIGALRM); -} + int ret = 0; -/* DSO reference counting. Call with _global_mutex locked! */ -static void _lib_get(struct dso_data *data) -{ - data->ref_count++; -} + if (thread->events == events) + return 0; /* Nothing has changed */ -static void _lib_put(struct dso_data *data) -{ - if (!--data->ref_count) { - dlclose(data->dso_handle); - UNLINK_DSO(data); - _free_dso_data(data); - } -} + thread->events = events; + thread->pending = DM_EVENT_REGISTRATION_PENDING; -/* Find DSO data. */ -static struct dso_data *_lookup_dso(struct message_data *data) -{ - struct dso_data *dso_data, *ret = NULL; + /* Only non-processing threads can be notified */ + if (!thread->processing) { + DEBUGLOG("Sending SIGALRM to wakeup Thr %x.", (int)thread->thread); - dm_list_iterate_items(dso_data, &_dso_registry) - if (!strcmp(data->dso_name, dso_data->dso_name)) { - _lib_get(dso_data); - ret = dso_data; - break; + /* Notify thread waiting in ioctl (to speed-up) */ + if ((ret = pthread_kill(thread->thread, SIGALRM))) { + if (ret == ESRCH) + thread->events = 0; /* thread is gone */ + else + log_error("Unable to wakeup thread: %s", + strerror(ret)); + } } - return ret; + /* Threads with no events has to be moved to unused */ + if (!thread->events) + _thread_unused(thread); + + return -ret; } -/* Lookup DSO symbols we need. */ -static int _lookup_symbol(void *dl, void **symbol, const char *name) +/* Return success on daemon active check. */ +static int _active(struct message_data *message_data) { - if ((*symbol = dlsym(dl, name))) - return 1; - return 0; } -static int lookup_symbols(void *dl, struct dso_data *data) +/* + * Unregister for an event. + * + * Only one caller at a time here as with register_for_event(). + */ +static int _unregister_for_event(struct message_data *message_data) { - return _lookup_symbol(dl, (void *) &data->process_event, - "process_event") && - _lookup_symbol(dl, (void *) &data->register_device, - "register_device") && - _lookup_symbol(dl, (void *) &data->unregister_device, - "unregister_device"); -} + struct thread_status *thread; + int ret; -/* Load an application specific DSO. */ -static struct dso_data *_load_dso(struct message_data *data) -{ - void *dl; - struct dso_data *ret = NULL; + /* + * Clear event in bitfield and deactivate + * monitoring thread in case bitfield is 0. + */ + _lock_mutex(); - if (!(dl = dlopen(data->dso_name, RTLD_NOW))) { - const char *dlerr = dlerror(); - syslog(LOG_ERR, "dmeventd %s dlopen failed: %s", data->dso_name, - dlerr); - data->msg->size = - dm_asprintf(&(data->msg->data), "%s %s dlopen failed: %s", - data->id, data->dso_name, dlerr); - return NULL; + if (!(thread = _lookup_thread_status(message_data))) { + _unlock_mutex(); + return -ENODEV; } - if (!(ret = _alloc_dso_data(data))) { - dlclose(dl); - return NULL; - } + /* AND mask event ~# from events bitfield. */ + ret = _update_events(thread, (thread->events & ~message_data->events_field)); - if (!(lookup_symbols(dl, ret))) { - _free_dso_data(ret); - dlclose(dl); - return NULL; - } + _unlock_mutex(); - /* - * Keep handle to close the library once - * we've got no references to it any more. - */ - ret->dso_handle = dl; - _lib_get(ret); + /* If there are no events, thread is later garbage + * collected by _cleanup_unused_threads */ + if (message_data->events_field & DM_EVENT_TIMEOUT) + _unregister_for_timeout(thread); - _lock_mutex(); - LINK_DSO(ret); - _unlock_mutex(); + DEBUGLOG("Unregistered event for %s.", thread->device.name); return ret; } -/* Return success on daemon active check. */ -static int _active(struct message_data *message_data) -{ - return 0; -} - /* * Register for an event. * @@ -962,131 +1171,61 @@ static int _active(struct message_data *message_data) static int _register_for_event(struct message_data *message_data) { int ret = 0; - struct thread_status *thread, *thread_new = NULL; + struct thread_status *thread; struct dso_data *dso_data; if (!(dso_data = _lookup_dso(message_data)) && !(dso_data = _load_dso(message_data))) { stack; #ifdef ELIBACC - ret = -ELIBACC; + ret = ELIBACC; #else - ret = -ENODEV; + ret = ENODEV; #endif - goto out; - } - - /* Preallocate thread status struct to avoid deadlock. */ - if (!(thread_new = _alloc_thread_status(message_data, dso_data))) { - stack; - ret = -ENOMEM; - goto out; - } - - if (!_fill_device_data(thread_new)) { - stack; - ret = -ENODEV; - goto out; + return ret; } _lock_mutex(); - /* If creation of timeout thread fails (as it may), we fail - here completely. The client is responsible for either - retrying later or trying to register without timeout - events. However, if timeout thread cannot be started, it - usually means we are so starved on resources that we are - almost as good as dead already... */ - if (thread_new->events & DM_EVENT_TIMEOUT) { - ret = -_register_for_timeout(thread_new); - if (ret) - goto outth; - } - - if (!(thread = _lookup_thread_status(message_data))) { + if ((thread = _lookup_thread_status(message_data))) { + /* OR event # into events bitfield. */ + ret = _update_events(thread, (thread->events | message_data->events_field)); + } else { _unlock_mutex(); - if (!(ret = _do_register_device(thread_new))) - goto out; + /* Only creating thread during event processing + * Remaining initialization happens within monitoring thread */ + if (!(thread = _alloc_thread_status(message_data, dso_data))) { + stack; + return -ENOMEM; + } - thread = thread_new; - thread_new = NULL; + if ((ret = _create_thread(thread))) { + stack; + _free_thread_status(thread); + return -ret; + } - /* Try to create the monitoring thread for this device. */ _lock_mutex(); - if ((ret = -_create_thread(thread))) { - _unlock_mutex(); - _do_unregister_device(thread); - _free_thread_status(thread); - goto out; - } else - LINK_THREAD(thread); + /* Note: same uuid can't be added in parallel */ + LINK_THREAD(thread); } - /* Or event # into events bitfield. */ - thread->events |= message_data->events.field; - - outth: _unlock_mutex(); - out: - /* - * Deallocate thread status after releasing - * the lock in case we haven't used it. - */ - if (thread_new) - _free_thread_status(thread_new); - - return ret; -} - -/* - * Unregister for an event. - * - * Only one caller at a time here as with register_for_event(). - */ -static int _unregister_for_event(struct message_data *message_data) -{ - int ret = 0; - struct thread_status *thread; - - /* - * Clear event in bitfield and deactivate - * monitoring thread in case bitfield is 0. - */ - _lock_mutex(); - - if (!(thread = _lookup_thread_status(message_data))) { - _unlock_mutex(); - ret = -ENODEV; - goto out; - } - - if (thread->status == DM_THREAD_DONE) { - /* the thread has terminated while we were not - watching */ - _unlock_mutex(); - return 0; - } - - thread->events &= ~message_data->events.field; - - if (!(thread->events & DM_EVENT_TIMEOUT)) - _unregister_for_timeout(thread); - /* - * In case there's no events to monitor on this device -> - * unlink and terminate its monitoring thread. - */ - if (!thread->events) { - pthread_mutex_lock(&_timeout_mutex); - UNLINK_THREAD(thread); - LINK(thread, &_thread_registry_unused); - pthread_mutex_unlock(&_timeout_mutex); + /* If creation of timeout thread fails (as it may), we fail + here completely. The client is responsible for either + retrying later or trying to register without timeout + events. However, if timeout thread cannot be started, it + usually means we are so starved on resources that we are + almost as good as dead already... */ + if ((message_data->events_field & DM_EVENT_TIMEOUT) && + (ret = _register_for_timeout(thread))) { + stack; + _unregister_for_event(message_data); } - _unlock_mutex(); - out: - return ret; + return -ret; } /* @@ -1097,50 +1236,39 @@ static int _unregister_for_event(struct message_data *message_data) static int _registered_device(struct message_data *message_data, struct thread_status *thread) { - struct dm_event_daemon_message *msg = message_data->msg; - - const char *fmt = "%s %s %s %u"; - const char *id = message_data->id; - const char *dso = thread->dso_data->dso_name; - const char *dev = thread->device.uuid; int r; - unsigned events = ((thread->status == DM_THREAD_RUNNING) - && (thread->events)) ? thread->events : thread-> - events | DM_EVENT_REGISTRATION_PENDING; + struct dm_event_daemon_message *msg = message_data->msg; - dm_free(msg->data); + free(msg->data); - if ((r = dm_asprintf(&(msg->data), fmt, id, dso, dev, events)) < 0) { - msg->size = 0; + if ((r = dm_asprintf(&(msg->data), "%s %s %s %u", + message_data->id, + thread->dso_data->dso_name, + thread->device.uuid, + thread->events | thread->pending)) < 0) return -ENOMEM; - } msg->size = (uint32_t) r; + DEBUGLOG("Registered %s.", msg->data); return 0; } static int _want_registered_device(char *dso_name, char *device_uuid, - struct thread_status *thread) + struct thread_status *thread) { /* If DSO names and device paths are equal. */ if (dso_name && device_uuid) return !strcmp(dso_name, thread->dso_data->dso_name) && - !strcmp(device_uuid, thread->device.uuid) && - (thread->status == DM_THREAD_RUNNING || - (thread->events & DM_EVENT_REGISTRATION_PENDING)); + !strcmp(device_uuid, thread->device.uuid); /* If DSO names are equal. */ if (dso_name) - return !strcmp(dso_name, thread->dso_data->dso_name) && - (thread->status == DM_THREAD_RUNNING || - (thread->events & DM_EVENT_REGISTRATION_PENDING)); + return !strcmp(dso_name, thread->dso_data->dso_name); /* If device paths are equal. */ if (device_uuid) - return !strcmp(device_uuid, thread->device.uuid) && - (thread->status == DM_THREAD_RUNNING || - (thread->events & DM_EVENT_REGISTRATION_PENDING)); + return !strcmp(device_uuid, thread->device.uuid); return 1; } @@ -1150,6 +1278,9 @@ static int _get_registered_dev(struct message_data *message_data, int next) struct thread_status *thread, *hit = NULL; int ret = -ENOENT; + DEBUGLOG("Get%s dso:%s uuid:%s.", next ? "" : "Next", + message_data->dso_name, + message_data->device_uuid); _lock_mutex(); /* Iterate list of threads checking if we want a particular one. */ @@ -1168,8 +1299,22 @@ static int _get_registered_dev(struct message_data *message_data, int next) if (hit && !next) goto reg; - if (!hit) + /* + * If we didn't get a match, try the threads waiting to be deleted. + * FIXME Do something similar if 'next' is set. + */ + if (!hit && !next) + dm_list_iterate_items(thread, &_thread_registry_unused) + if (_want_registered_device(message_data->dso_name, + message_data->device_uuid, thread)) { + hit = thread; + goto reg; + } + + if (!hit) { + DEBUGLOG("Get%s not registered", next ? "" : "Next"); goto out; + } while (1) { if (dm_list_end(&_thread_registry, &thread->list)) @@ -1206,11 +1351,20 @@ static int _set_timeout(struct message_data *message_data) struct thread_status *thread; _lock_mutex(); - if ((thread = _lookup_thread_status(message_data))) - thread->timeout = message_data->timeout.secs; + thread = _lookup_thread_status(message_data); _unlock_mutex(); - return thread ? 0 : -ENODEV; + if (!thread) + return -ENODEV; + + /* Lets reprogram timer */ + pthread_mutex_lock(&_timeout_mutex); + thread->timeout = message_data->timeout_secs; + thread->next_time = 0; + pthread_cond_signal(&_timeout_cond); + pthread_mutex_unlock(&_timeout_mutex); + + return 0; } static int _get_timeout(struct message_data *message_data) @@ -1218,94 +1372,105 @@ static int _get_timeout(struct message_data *message_data) struct thread_status *thread; struct dm_event_daemon_message *msg = message_data->msg; - dm_free(msg->data); - _lock_mutex(); - if ((thread = _lookup_thread_status(message_data))) { - msg->size = - dm_asprintf(&(msg->data), "%s %" PRIu32, message_data->id, - thread->timeout); - } else { - msg->data = NULL; - msg->size = 0; - } + thread = _lookup_thread_status(message_data); _unlock_mutex(); - return thread ? 0 : -ENODEV; -} + if (!thread) + return -ENODEV; -/* Initialize a fifos structure with path names. */ -static void _init_fifos(struct dm_event_fifos *fifos) -{ - memset(fifos, 0, sizeof(*fifos)); + free(msg->data); + msg->size = dm_asprintf(&(msg->data), "%s %" PRIu32, + message_data->id, thread->timeout); - fifos->client_path = DM_EVENT_FIFO_CLIENT; - fifos->server_path = DM_EVENT_FIFO_SERVER; + return (msg->data && msg->size) ? 0 : -ENOMEM; } -/* Open fifos used for client communication. */ -static int _open_fifos(struct dm_event_fifos *fifos) +static int _open_fifo(const char *path) { struct stat st; + int fd = -1; + + /* + * FIXME Explicitly verify the code's requirement that path is secure: + * - All parent directories owned by root without group/other write access unless sticky. + */ - /* Create client fifo. */ - (void) dm_prepare_selinux_context(fifos->client_path, S_IFIFO); - if ((mkfifo(fifos->client_path, 0600) == -1) && errno != EEXIST) { - syslog(LOG_ERR, "%s: Failed to create client fifo %s: %m.\n", - __func__, fifos->client_path); - (void) dm_prepare_selinux_context(NULL, 0); - return 0; + /* If path exists, only use it if it is root-owned fifo mode 0600 */ + if ((lstat(path, &st) < 0)) { + if (errno != ENOENT) { + log_sys_error("stat", path); + return -1; + } + } else if (!S_ISFIFO(st.st_mode) || st.st_uid || + (st.st_mode & (S_IEXEC | S_IRWXG | S_IRWXO))) { + log_warn("WARNING: %s has wrong attributes: Replacing.", path); + if (unlink(path)) { + log_sys_error("unlink", path); + return -1; + } } - /* Create server fifo. */ - (void) dm_prepare_selinux_context(fifos->server_path, S_IFIFO); - if ((mkfifo(fifos->server_path, 0600) == -1) && errno != EEXIST) { - syslog(LOG_ERR, "%s: Failed to create server fifo %s: %m.\n", - __func__, fifos->server_path); + /* Create fifo. */ + (void) dm_prepare_selinux_context(path, S_IFIFO); + if ((mkfifo(path, 0600) == -1) && errno != EEXIST) { + log_sys_error("mkfifo", path); (void) dm_prepare_selinux_context(NULL, 0); - return 0; + goto fail; } (void) dm_prepare_selinux_context(NULL, 0); - /* Warn about wrong permissions if applicable */ - if ((!stat(fifos->client_path, &st)) && (st.st_mode & 0777) != 0600) - syslog(LOG_WARNING, "Fixing wrong permissions on %s: %m.\n", - fifos->client_path); - - if ((!stat(fifos->server_path, &st)) && (st.st_mode & 0777) != 0600) - syslog(LOG_WARNING, "Fixing wrong permissions on %s: %m.\n", - fifos->server_path); - - /* If they were already there, make sure permissions are ok. */ - if (chmod(fifos->client_path, 0600)) { - syslog(LOG_ERR, "Unable to set correct file permissions on %s: %m.\n", - fifos->client_path); - return 0; + /* Need to open read+write or we will block or fail */ + if ((fd = open(path, O_RDWR)) < 0) { + log_sys_error("open", path); + goto fail; } - if (chmod(fifos->server_path, 0600)) { - syslog(LOG_ERR, "Unable to set correct file permissions on %s: %m.\n", - fifos->server_path); - return 0; + /* Warn about wrong permissions if applicable */ + if (fstat(fd, &st)) { + log_sys_error("fstat", path); + goto fail; } - /* Need to open read+write or we will block or fail */ - if ((fifos->server = open(fifos->server_path, O_RDWR)) < 0) { - syslog(LOG_ERR, "Failed to open fifo server %s: %m.\n", - fifos->server_path); - return 0; + if (!S_ISFIFO(st.st_mode) || st.st_uid || + (st.st_mode & (S_IEXEC | S_IRWXG | S_IRWXO))) { + log_error("%s: fifo has incorrect attributes", path); + goto fail; } - /* Need to open read+write for select() to work. */ - if ((fifos->client = open(fifos->client_path, O_RDWR)) < 0) { - syslog(LOG_ERR, "Failed to open fifo client %s: %m", fifos->client_path); - if (close(fifos->server)) - syslog(LOG_ERR, "Failed to close fifo server %s: %m", fifos->server_path); - return 0; + if (fcntl(fd, F_SETFD, FD_CLOEXEC)) { + log_sys_error("fcntl(FD_CLOEXEC)", path); + goto fail; } + return fd; + +fail: + if ((fd >= 0) && close(fd)) + log_sys_error("close", path); + + return -1; +} + +/* Open fifos used for client communication. */ +static int _open_fifos(struct dm_event_fifos *fifos) +{ + /* Create client fifo. */ + if ((fifos->client = _open_fifo(fifos->client_path)) < 0) + goto fail; + + /* Create server fifo. */ + if ((fifos->server = _open_fifo(fifos->server_path)) < 0) + goto fail; + return 1; + +fail: + if (fifos->client >= 0 && close(fifos->client)) + log_sys_error("close", fifos->client_path); + + return 0; } /* @@ -1313,7 +1478,7 @@ static int _open_fifos(struct dm_event_fifos *fifos) * and a complete message is read. Must not block indefinitely. */ static int _client_read(struct dm_event_fifos *fifos, - struct dm_event_daemon_message *msg) + struct dm_event_daemon_message *msg) { struct timeval t; unsigned bytes = 0; @@ -1334,34 +1499,34 @@ static int _client_read(struct dm_event_fifos *fifos, t.tv_usec = 0; ret = select(fifos->client + 1, &fds, NULL, NULL, &t); - if (!ret && !bytes) /* nothing to read */ - return 0; - - if (!ret) /* trying to finish read */ - continue; + if (!ret && bytes) + continue; /* trying to finish read */ - if (ret < 0) /* error */ - return 0; + if (ret <= 0) /* nothing to read */ + goto bad; ret = read(fifos->client, buf + bytes, size - bytes); bytes += ret > 0 ? ret : 0; - if (header && (bytes == 2 * sizeof(uint32_t))) { + if (!msg->data && (bytes == 2 * sizeof(uint32_t))) { msg->cmd = ntohl(header[0]); - msg->size = ntohl(header[1]); - buf = msg->data = dm_malloc(msg->size); - size = msg->size; bytes = 0; - header = 0; + + if (!(size = msg->size = ntohl(header[1]))) + break; + + if (!(buf = msg->data = malloc(msg->size))) + goto bad; } } - if (bytes != size) { - dm_free(msg->data); - msg->data = NULL; - msg->size = 0; - } + if (bytes == size) + return 1; - return bytes == size; +bad: + free(msg->data); + msg->data = NULL; + + return 0; } /* @@ -1370,33 +1535,45 @@ static int _client_read(struct dm_event_fifos *fifos, static int _client_write(struct dm_event_fifos *fifos, struct dm_event_daemon_message *msg) { + uint32_t temp[2]; unsigned bytes = 0; int ret = 0; fd_set fds; - size_t size = 2 * sizeof(uint32_t) + msg->size; - uint32_t *header = alloca(size); + size_t size = 2 * sizeof(uint32_t) + ((msg->data) ? msg->size : 0); + uint32_t *header = malloc(size); char *buf = (char *)header; - header[0] = htonl(msg->cmd); - header[1] = htonl(msg->size); - if (msg->data) - memcpy(buf + 2 * sizeof(uint32_t), msg->data, msg->size); + if (!header) { + /* Reply with ENOMEM message */ + header = temp; + size = sizeof(temp); + header[0] = htonl(-ENOMEM); + header[1] = 0; + } else { + header[0] = htonl(msg->cmd); + header[1] = htonl((msg->data) ? msg->size : 0); + if (msg->data) + memcpy(buf + 2 * sizeof(uint32_t), msg->data, msg->size); + } - errno = 0; - while (bytes < size && errno != EIO) { + while (bytes < size) { do { /* Watch client write FIFO to be ready for output. */ FD_ZERO(&fds); FD_SET(fifos->server, &fds); - } while (select(fifos->server + 1, NULL, &fds, NULL, NULL) != - 1); + } while (select(fifos->server + 1, NULL, &fds, NULL, NULL) != 1); - ret = write(fifos->server, buf + bytes, size - bytes); - bytes += ret > 0 ? ret : 0; + if ((ret = write(fifos->server, buf + bytes, size - bytes)) > 0) + bytes += ret; + else if (errno == EIO) + break; } - return bytes == size; + if (header != temp) + free(header); + + return (bytes == size); } /* @@ -1408,26 +1585,37 @@ static int _client_write(struct dm_event_fifos *fifos, static int _handle_request(struct dm_event_daemon_message *msg, struct message_data *message_data) { - static struct request { - unsigned int cmd; - int (*f)(struct message_data *); - } requests[] = { - { DM_EVENT_CMD_REGISTER_FOR_EVENT, _register_for_event}, - { DM_EVENT_CMD_UNREGISTER_FOR_EVENT, _unregister_for_event}, - { DM_EVENT_CMD_GET_REGISTERED_DEVICE, _get_registered_device}, - { DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE, - _get_next_registered_device}, - { DM_EVENT_CMD_SET_TIMEOUT, _set_timeout}, - { DM_EVENT_CMD_GET_TIMEOUT, _get_timeout}, - { DM_EVENT_CMD_ACTIVE, _active}, - { DM_EVENT_CMD_GET_STATUS, _get_status}, - }, *req; - - for (req = requests; req < requests + sizeof(requests) / sizeof(struct request); req++) - if (req->cmd == msg->cmd) - return req->f(message_data); - - return -EINVAL; + switch (msg->cmd) { + case DM_EVENT_CMD_REGISTER_FOR_EVENT: + if (!message_data->events_field) + return -EINVAL; + return _register_for_event(message_data); + case DM_EVENT_CMD_UNREGISTER_FOR_EVENT: + return _unregister_for_event(message_data); + case DM_EVENT_CMD_GET_REGISTERED_DEVICE: + return _get_registered_device(message_data); + case DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE: + return _get_next_registered_device(message_data); + case DM_EVENT_CMD_SET_TIMEOUT: + return _set_timeout(message_data); + case DM_EVENT_CMD_GET_TIMEOUT: + return _get_timeout(message_data); + case DM_EVENT_CMD_ACTIVE: + return _active(message_data); + case DM_EVENT_CMD_GET_STATUS: + return _get_status(message_data); + /* dmeventd parameters of running dmeventd, + * returns 'pid=<pid> daemon=<no/yes> exec_method=<direct/systemd>' + * pid - pidfile of running dmeventd + * daemon - running as a daemon or not (foreground)? + * exec_method - "direct" if executed directly or + * "systemd" if executed via systemd + */ + case DM_EVENT_CMD_GET_PARAMETERS: + return _get_parameters(message_data); + default: + return -EINVAL; + } } /* Process a request passed from the communication thread. */ @@ -1443,12 +1631,9 @@ static int _do_process_request(struct dm_event_daemon_message *msg) answer = msg->data; if (answer) { msg->size = dm_asprintf(&(msg->data), "%s %s %d", answer, - msg->cmd == DM_EVENT_CMD_DIE ? "DYING" : "HELLO", - DM_EVENT_PROTOCOL_VERSION); - dm_free(answer); - } else { - msg->size = 0; - msg->data = NULL; + (msg->cmd == DM_EVENT_CMD_DIE) ? "DYING" : "HELLO", + DM_EVENT_PROTOCOL_VERSION); + free(answer); } } else if (msg->cmd != DM_EVENT_CMD_ACTIVE && !_parse_message(&message_data)) { stack; @@ -1468,9 +1653,8 @@ static int _do_process_request(struct dm_event_daemon_message *msg) /* Only one caller at a time. */ static void _process_request(struct dm_event_fifos *fifos) { - int die = 0; struct dm_event_daemon_message msg = { 0 }; - + int cmd; /* * Read the request from the client (client_read, client_write * give true on success and false on failure). @@ -1478,8 +1662,9 @@ static void _process_request(struct dm_event_fifos *fifos) if (!_client_read(fifos, &msg)) return; - if (msg.cmd == DM_EVENT_CMD_DIE) - die = 1; + cmd = msg.cmd; + + DEBUGLOG(">>> CMD:%s (0x%x) processing...", decode_cmd(cmd), cmd); /* _do_process_request fills in msg (if memory allows for data, otherwise just cmd and size = 0) */ @@ -1488,86 +1673,72 @@ static void _process_request(struct dm_event_fifos *fifos) if (!_client_write(fifos, &msg)) stack; - dm_free(msg.data); + DEBUGLOG("<<< CMD:%s (0x%x) completed (result %d).", decode_cmd(cmd), cmd, msg.cmd); + + free(msg.data); - if (die) raise(9); + if (cmd == DM_EVENT_CMD_DIE) { + if (unlink(DMEVENTD_PIDFILE)) + log_sys_error("unlink", DMEVENTD_PIDFILE); + _exit(0); + } } static void _process_initial_registrations(void) { - int i = 0; + int i; char *reg; - struct dm_event_daemon_message msg = { 0, 0, NULL }; + struct dm_event_daemon_message msg = { 0 }; - while ((reg = _initial_registrations[i])) { + for (i = 0; (reg = _initial_registrations[i]); ++i) { msg.cmd = DM_EVENT_CMD_REGISTER_FOR_EVENT; if ((msg.size = strlen(reg))) { msg.data = reg; _do_process_request(&msg); } - ++ i; } } static void _cleanup_unused_threads(void) { - int ret; struct dm_list *l; struct thread_status *thread; - int join_ret = 0; + int ret; _lock_mutex(); + while ((l = dm_list_first(&_thread_registry_unused))) { thread = dm_list_item(l, struct thread_status); - if (thread->processing) - break; /* cleanup on the next round */ + if (thread->status != DM_THREAD_DONE) { + if (thread->processing) + break; /* cleanup on the next round */ - if (thread->status == DM_THREAD_RUNNING) { - thread->status = DM_THREAD_SHUTDOWN; - break; + /* Signal possibly sleeping thread */ + ret = pthread_kill(thread->thread, SIGALRM); + if (!ret || (ret != ESRCH)) + break; /* check again on the next round */ + + /* thread is likely gone */ } - if (thread->status == DM_THREAD_SHUTDOWN) { - if (!thread->events) { - /* turn codes negative -- should we be returning this? */ - ret = _terminate_thread(thread); - - if (ret == ESRCH) { - thread->status = DM_THREAD_DONE; - } else if (ret) { - syslog(LOG_ERR, - "Unable to terminate thread: %s\n", - strerror(-ret)); - stack; - } - break; - } + dm_list_del(l); + _unlock_mutex(); - dm_list_del(l); - syslog(LOG_ERR, - "thread can't be on unused list unless !thread->events"); - thread->status = DM_THREAD_RUNNING; - LINK_THREAD(thread); + DEBUGLOG("Destroying Thr %x.", (int)thread->thread); - continue; - } + if (pthread_join(thread->thread, NULL)) + log_sys_error("pthread_join", ""); - if (thread->status == DM_THREAD_DONE) { - dm_list_del(l); - join_ret = pthread_join(thread->thread, NULL); - _free_thread_status(thread); - } + _free_thread_status(thread); + _lock_mutex(); } _unlock_mutex(); - - if (join_ret) - syslog(LOG_ERR, "Failed pthread_join: %s\n", strerror(join_ret)); } static void _sig_alarm(int signum __attribute__((unused))) { - pthread_testcancel(); + /* empty SIG_IGN */; } /* Init thread signal handling. */ @@ -1576,7 +1747,8 @@ static void _init_thread_signals(void) sigset_t my_sigset; struct sigaction act = { .sa_handler = _sig_alarm }; - sigaction(SIGALRM, &act, NULL); + if (sigaction(SIGALRM, &act, NULL)) + log_sys_debug("sigaction", "SIGLARM"); sigfillset(&my_sigset); /* These are used for exiting */ @@ -1585,7 +1757,8 @@ static void _init_thread_signals(void) sigdelset(&my_sigset, SIGHUP); sigdelset(&my_sigset, SIGQUIT); - pthread_sigmask(SIG_BLOCK, &my_sigset, NULL); + if (pthread_sigmask(SIG_BLOCK, &my_sigset, NULL)) + log_sys_error("pthread_sigmask", "SIG_BLOCK"); } /* @@ -1597,35 +1770,23 @@ static void _init_thread_signals(void) */ static void _exit_handler(int sig __attribute__((unused))) { - /* - * We exit when '_exit_now' is set. - * That is, when a signal has been received. - * - * We can not simply set '_exit_now' unless all - * threads are done processing. - */ - if (!_thread_registries_empty) { - syslog(LOG_ERR, "There are still devices being monitored."); - syslog(LOG_ERR, "Refusing to exit."); - } else - _exit_now = 1; - + _exit_now = DM_SIGNALED_EXIT; } -#ifdef linux +#ifdef __linux__ static int _set_oom_adj(const char *oom_adj_path, int val) { FILE *fp; if (!(fp = fopen(oom_adj_path, "w"))) { - perror("oom_adj: fopen failed"); + log_sys_error("open", oom_adj_path); return 0; } fprintf(fp, "%i", val); if (dm_fclose(fp)) - perror("oom_adj: fclose failed"); + log_sys_error("fclose", oom_adj_path); return 1; } @@ -1639,14 +1800,11 @@ static int _protect_against_oom_killer(void) if (stat(OOM_ADJ_FILE, &st) == -1) { if (errno != ENOENT) - perror(OOM_ADJ_FILE ": stat failed"); + log_sys_error("stat", OOM_ADJ_FILE); /* Try old oom_adj interface as a fallback */ if (stat(OOM_ADJ_FILE_OLD, &st) == -1) { - if (errno == ENOENT) - perror(OOM_ADJ_FILE_OLD " not found"); - else - perror(OOM_ADJ_FILE_OLD ": stat failed"); + log_sys_error("stat", OOM_ADJ_FILE_OLD); return 1; } @@ -1689,8 +1847,6 @@ static int _systemd_handover(struct dm_event_fifos *fifos) unsigned long env_pid, env_listen_fds; int r = 0; - memset(fifos, 0, sizeof(*fifos)); - /* SD_ACTIVATION must be set! */ if (!(e = getenv(SD_ACTIVATION_ENV_VAR_NAME)) || strcmp(e, "1")) goto out; @@ -1731,19 +1887,20 @@ out: unsetenv(SD_LISTEN_FDS_ENV_VAR_NAME); return r; } + #endif static void _remove_files_on_exit(void) { if (unlink(DMEVENTD_PIDFILE)) - perror(DMEVENTD_PIDFILE ": unlink failed"); + log_sys_error("unlink", DMEVENTD_PIDFILE); if (!_systemd_activation) { if (unlink(DM_EVENT_FIFO_CLIENT)) - perror(DM_EVENT_FIFO_CLIENT " : unlink failed"); + log_sys_error("unlink", DM_EVENT_FIFO_CLIENT); if (unlink(DM_EVENT_FIFO_SERVER)) - perror(DM_EVENT_FIFO_SERVER " : unlink failed"); + log_sys_error("unlink", DM_EVENT_FIFO_SERVER); } } @@ -1765,9 +1922,8 @@ static void _daemonize(void) switch (pid = fork()) { case -1: - perror("fork failed:"); + log_sys_error("fork", ""); exit(EXIT_FAILURE); - case 0: /* Child */ break; @@ -1805,7 +1961,7 @@ static void _daemonize(void) fd = rlim.rlim_cur; for (--fd; fd >= 0; fd--) { -#ifdef linux +#ifdef __linux__ /* Do not close fds preloaded by systemd! */ if (_systemd_activation && (fd == SD_FD_FIFO_SERVER || fd == SD_FD_FIFO_CLIENT)) @@ -1814,6 +1970,7 @@ static void _daemonize(void) (void) close(fd); } + /* coverity[leaked_handle] dont't care */ if ((open("/dev/null", O_RDONLY) < 0) || (open("/dev/null", O_WRONLY) < 0) || (open("/dev/null", O_WRONLY) < 0)) @@ -1822,17 +1979,75 @@ static void _daemonize(void) setsid(); } -static void restart(void) +static int _reinstate_registrations(struct dm_event_fifos *fifos) +{ + static const char _failed_parsing_msg[] = "Failed to parse existing event registration.\n"; + static const char *_delim = " "; + struct dm_event_daemon_message msg = { 0 }; + char *endp, *dso_name, *dev_name, *mask, *timeout; + unsigned long mask_value, timeout_value; + int i, ret; + + ret = daemon_talk(fifos, &msg, DM_EVENT_CMD_HELLO, NULL, NULL, 0, 0); + free(msg.data); + msg.data = NULL; + + if (ret) { + fprintf(stderr, "Failed to communicate with new instance of dmeventd.\n"); + return 0; + } + + for (i = 0; _initial_registrations[i]; ++i) { + if (!(strtok(_initial_registrations[i], _delim)) || + !(dso_name = strtok(NULL, _delim)) || + !(dev_name = strtok(NULL, _delim)) || + !(mask = strtok(NULL, _delim)) || + !(timeout = strtok(NULL, _delim))) { + fputs(_failed_parsing_msg, stderr); + continue; + } + + errno = 0; + mask_value = strtoul(mask, &endp, 10); + if (errno || !endp || *endp) { + fputs(_failed_parsing_msg, stderr); + continue; + } + + errno = 0; + timeout_value = strtoul(timeout, &endp, 10); + if (errno || !endp || *endp) { + fputs(_failed_parsing_msg, stderr); + continue; + } + + if (daemon_talk(fifos, &msg, DM_EVENT_CMD_REGISTER_FOR_EVENT, + dso_name, + dev_name, + (enum dm_event_mask) mask_value, + timeout_value)) + fprintf(stderr, "Failed to reinstate monitoring for device %s.\n", dev_name); + } + + return 1; +} + +static void _restart_dmeventd(void) { - struct dm_event_fifos fifos; - struct dm_event_daemon_message msg = { 0, 0, NULL }; + struct dm_event_fifos fifos = { + .client = -1, + .server = -1, + /* FIXME Make these either configurable or depend directly on dmeventd_path */ + .client_path = DM_EVENT_FIFO_CLIENT, + .server_path = DM_EVENT_FIFO_SERVER + }; + struct dm_event_daemon_message msg = { 0 }; int i, count = 0; char *message; - int length; int version; + const char *e; /* Get the list of registrations from the running daemon. */ - if (!init_fifos(&fifos)) { fprintf(stderr, "WARNING: Could not initiate communication with existing dmeventd.\n"); exit(EXIT_FAILURE); @@ -1840,71 +2055,108 @@ static void restart(void) if (!dm_event_get_version(&fifos, &version)) { fprintf(stderr, "WARNING: Could not communicate with existing dmeventd.\n"); - fini_fifos(&fifos); - exit(EXIT_FAILURE); + goto bad; } if (version < 1) { fprintf(stderr, "WARNING: The running dmeventd instance is too old.\n" - "Protocol version %d (required: 1). Action cancelled.\n", - version); - exit(EXIT_FAILURE); + "Protocol version %d (required: 1). Action cancelled.\n", + version); + goto bad; } - if (daemon_talk(&fifos, &msg, DM_EVENT_CMD_GET_STATUS, "-", "-", 0, 0)) { - exit(EXIT_FAILURE); - } + if (daemon_talk(&fifos, &msg, DM_EVENT_CMD_GET_STATUS, "-", "-", 0, 0)) + goto bad; - message = msg.data; - message = strchr(message, ' '); - ++ message; - length = strlen(msg.data); - for (i = 0; i < length; ++i) { + message = strchr(msg.data, ' ') + 1; + for (i = 0; msg.data[i]; ++i) if (msg.data[i] == ';') { msg.data[i] = 0; ++count; } - } - if (!(_initial_registrations = dm_malloc(sizeof(char*) * (count + 1)))) { + if (!(_initial_registrations = zalloc(sizeof(char*) * (count + 1)))) { fprintf(stderr, "Memory allocation registration failed.\n"); - exit(EXIT_FAILURE); + goto bad; } for (i = 0; i < count; ++i) { - if (!(_initial_registrations[i] = dm_strdup(message))) { + if (!(_initial_registrations[i] = strdup(message))) { fprintf(stderr, "Memory allocation for message failed.\n"); - exit(EXIT_FAILURE); + goto bad; } message += strlen(message) + 1; } - _initial_registrations[count] = 0; + + if (version >= 2) { + if (daemon_talk(&fifos, &msg, DM_EVENT_CMD_GET_PARAMETERS, "-", "-", 0, 0)) { + fprintf(stderr, "Failed to acquire parameters from old dmeventd.\n"); + goto bad; + } + if (strstr(msg.data, "exec_method=systemd")) + _systemd_activation = 1; + } +#ifdef __linux__ + /* + * If the protocol version is old, just assume that if systemd is running, + * the dmeventd is also run as a systemd service via fifo activation. + */ + if (version < 2) { + /* This check is copied from sd-daemon.c. */ + struct stat st; + if (!lstat(SD_RUNTIME_UNIT_FILE_DIR, &st) && !!S_ISDIR(st.st_mode)) + _systemd_activation = 1; + } +#endif if (daemon_talk(&fifos, &msg, DM_EVENT_CMD_DIE, "-", "-", 0, 0)) { fprintf(stderr, "Old dmeventd refused to die.\n"); - exit(EXIT_FAILURE); + goto bad; } - /* - * Wait for daemon to die, detected by sending further DIE messages - * until one fails. - */ + if (!_systemd_activation && + ((e = getenv(SD_ACTIVATION_ENV_VAR_NAME)) && strcmp(e, "1"))) + _systemd_activation = 1; + for (i = 0; i < 10; ++i) { - if (daemon_talk(&fifos, &msg, DM_EVENT_CMD_DIE, "-", "-", 0, 0)) - break; /* yep, it's dead probably */ + if ((access(DMEVENTD_PIDFILE, F_OK) == -1) && (errno == ENOENT)) + break; usleep(10); } + if (!_systemd_activation) { + fini_fifos(&fifos); + return; + } + + /* Reopen fifos. */ + fini_fifos(&fifos); + if (!init_fifos(&fifos)) { + fprintf(stderr, "Could not initiate communication with new instance of dmeventd.\n"); + exit(EXIT_FAILURE); + } + + if (!_reinstate_registrations(&fifos)) { + fprintf(stderr, "Failed to reinstate monitoring with new instance of dmeventd.\n"); + goto bad; + } + fini_fifos(&fifos); + exit(EXIT_SUCCESS); +bad: + fini_fifos(&fifos); + exit(EXIT_FAILURE); } -static void usage(char *prog, FILE *file) +static void _usage(char *prog, FILE *file) { fprintf(file, "Usage:\n" - "%s [-d [-d [-d]]] [-f] [-h] [-R] [-V] [-?]\n\n" + "%s [-d [-d [-d]]] [-f] [-h] [-l] [-R] [-V] [-?]\n\n" " -d Log debug messages to syslog (-d, -dd, -ddd)\n" " -f Don't fork, run in the foreground\n" - " -h -? Show this help information\n" + " -h Show this help information\n" + " -l Log to stdout,stderr instead of syslog\n" + " -? Show this help information on stderr\n" " -R Restart dmeventd\n" " -V Show version of dmeventd\n\n", prog); } @@ -1912,20 +2164,24 @@ static void usage(char *prog, FILE *file) int main(int argc, char *argv[]) { signed char opt; - struct dm_event_fifos fifos; - //struct sys_log logdata = {DAEMON_NAME, LOG_DAEMON}; - + struct dm_event_fifos fifos = { + .client = -1, + .server = -1, + .client_path = DM_EVENT_FIFO_CLIENT, + .server_path = DM_EVENT_FIFO_SERVER + }; + time_t now, idle_exit_timeout = DMEVENTD_IDLE_EXIT_TIMEOUT; opterr = 0; optind = 0; - while ((opt = getopt(argc, argv, "?fhVdR")) != EOF) { + while ((opt = getopt(argc, argv, "?fhVdlR")) != EOF) { switch (opt) { case 'h': - usage(argv[0], stdout); - exit(0); + _usage(argv[0], stdout); + exit(EXIT_SUCCESS); case '?': - usage(argv[0], stderr); - exit(0); + _usage(argv[0], stderr); + exit(EXIT_SUCCESS); case 'R': _restart++; break; @@ -1933,33 +2189,44 @@ int main(int argc, char *argv[]) _foreground++; break; case 'd': - dmeventd_debug++; + _debug_level++; + break; + case 'l': + _use_syslog = 0; break; case 'V': printf("dmeventd version: %s\n", DM_LIB_VERSION); - exit(1); + exit(EXIT_SUCCESS); } } + if (!_foreground && !_use_syslog) { + printf("WARNING: Ignoring logging to stdout, needs options -f\n"); + _use_syslog = 1; + } /* * Switch to C locale to avoid reading large locale-archive file * used by some glibc (on some distributions it takes over 100MB). * Daemon currently needs to use mlockall(). */ - if (setenv("LANG", "C", 1)) - perror("Cannot set LANG to C"); + if (setenv("LC_ALL", "C", 1)) + perror("Cannot set LC_ALL to C"); if (_restart) - restart(); + _restart_dmeventd(); -#ifdef linux +#ifdef __linux__ _systemd_activation = _systemd_handover(&fifos); #endif if (!_foreground) _daemonize(); - openlog("dmeventd", LOG_PID, LOG_DAEMON); + if (_use_syslog) + openlog("dmeventd", LOG_PID, LOG_DAEMON); + + dm_event_log_set(_debug_level, _use_syslog); + dm_log_with_errno_init(_libdm_log); (void) dm_prepare_selinux_context(DMEVENTD_PIDFILE, S_IFREG); if (dm_create_lockfile(DMEVENTD_PIDFILE) == 0) @@ -1974,23 +2241,16 @@ int main(int argc, char *argv[]) signal(SIGHUP, &_exit_handler); signal(SIGQUIT, &_exit_handler); -#ifdef linux +#ifdef __linux__ /* Systemd has adjusted oom killer for us already */ if (!_systemd_activation && !_protect_against_oom_killer()) - syslog(LOG_ERR, "Failed to protect against OOM killer"); + log_warn("WARNING: Failed to protect against OOM killer."); #endif _init_thread_signals(); - //multilog_clear_logging(); - //multilog_add_type(std_syslog, &logdata); - //multilog_init_verbose(std_syslog, _LOG_DEBUG); - //multilog_async(1); - - if (!_systemd_activation) - _init_fifos(&fifos); - - pthread_mutex_init(&_global_mutex, NULL); + if (pthread_mutex_init(&_global_mutex, NULL)) + exit(EXIT_FAILURE); if (!_systemd_activation && !_open_fifos(&fifos)) exit(EXIT_FIFO_FAILURE); @@ -1998,29 +2258,61 @@ int main(int argc, char *argv[]) /* Signal parent, letting them know we are ready to go. */ if (!_foreground) kill(getppid(), SIGTERM); - syslog(LOG_NOTICE, "dmeventd ready for processing."); + + log_notice("dmeventd ready for processing."); + + _idle_since = time(NULL); if (_initial_registrations) _process_initial_registrations(); - while (!_exit_now) { + for (;;) { + if (_idle_since) { + if (_exit_now) { + if (_exit_now == DM_SCHEDULED_EXIT) + break; /* Only prints shutdown message */ + log_info("dmeventd detected break while being idle " + "for %ld second(s), exiting.", + (long) (time(NULL) - _idle_since)); + break; + } + if (idle_exit_timeout) { + now = time(NULL); + if (now < _idle_since) + _idle_since = now; /* clock change? */ + now -= _idle_since; + if (now >= idle_exit_timeout) { + log_info("dmeventd was idle for %ld second(s), " + "exiting.", (long) now); + break; + } + } + } else if (_exit_now == DM_SIGNALED_EXIT) { + _exit_now = DM_SCHEDULED_EXIT; + /* + * When '_exit_now' is set, signal has been received, + * but can not simply exit unless all + * threads are done processing. + */ + log_info("dmeventd received break, scheduling exit."); + } _process_request(&fifos); _cleanup_unused_threads(); - _lock_mutex(); - if (!dm_list_empty(&_thread_registry) - || !dm_list_empty(&_thread_registry_unused)) - _thread_registries_empty = 0; - else - _thread_registries_empty = 1; - _unlock_mutex(); } - _exit_dm_lib(); - pthread_mutex_destroy(&_global_mutex); - syslog(LOG_NOTICE, "dmeventd shutting down."); - closelog(); + log_notice("dmeventd shutting down."); + + if (fifos.client >= 0 && close(fifos.client)) + log_sys_error("client close", fifos.client_path); + if (fifos.server >= 0 && close(fifos.server)) + log_sys_error("server close", fifos.server_path); + + if (_use_syslog) + closelog(); + + _exit_dm_lib(); exit(EXIT_SUCCESS); } diff --git a/daemons/dmeventd/dmeventd.h b/daemons/dmeventd/dmeventd.h index e21cf45..afe0b0c 100644 --- a/daemons/dmeventd/dmeventd.h +++ b/daemons/dmeventd/dmeventd.h @@ -9,7 +9,7 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __DMEVENTD_DOT_H__ @@ -34,6 +34,7 @@ enum dm_event_command { DM_EVENT_CMD_HELLO, DM_EVENT_CMD_DIE, DM_EVENT_CMD_GET_STATUS, + DM_EVENT_CMD_GET_PARAMETERS, }; /* Message passed between client and daemon. */ diff --git a/daemons/dmeventd/libdevmapper-event.c b/daemons/dmeventd/libdevmapper-event.c index 1f8fbef..b5ae37f 100644 --- a/daemons/dmeventd/libdevmapper-event.c +++ b/daemons/dmeventd/libdevmapper-event.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. + * Copyright (C) 2005-2015 Red Hat, Inc. All rights reserved. * * This file is part of the device-mapper userspace tools. * @@ -9,27 +9,28 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "dmlib.h" #include "libdevmapper-event.h" -//#include "libmultilog.h" #include "dmeventd.h" +#include "libdm/misc/dm-logging.h" +#include "base/memory/zalloc.h" + +#include "lib/misc/intl.h" -#include <errno.h> #include <fcntl.h> -#include <stdio.h> -#include <stdint.h> -#include <stdlib.h> -#include <string.h> #include <sys/file.h> #include <sys/types.h> #include <sys/stat.h> -#include <unistd.h> #include <sys/wait.h> #include <arpa/inet.h> /* for htonl, ntohl */ +#include <pthread.h> +#include <syslog.h> +#include <unistd.h> +static int _debug_level = 0; +static int _use_syslog = 0; static int _sequence_nr = 0; struct dm_event_handler { @@ -49,17 +50,17 @@ struct dm_event_handler { static void _dm_event_handler_clear_dev_info(struct dm_event_handler *dmevh) { - dm_free(dmevh->dev_name); - dm_free(dmevh->uuid); + free(dmevh->dev_name); + free(dmevh->uuid); dmevh->dev_name = dmevh->uuid = NULL; dmevh->major = dmevh->minor = 0; } struct dm_event_handler *dm_event_handler_create(void) { - struct dm_event_handler *dmevh = NULL; + struct dm_event_handler *dmevh; - if (!(dmevh = dm_zalloc(sizeof(*dmevh)))) { + if (!(dmevh = zalloc(sizeof(*dmevh)))) { log_error("Failed to allocate event handler."); return NULL; } @@ -70,9 +71,9 @@ struct dm_event_handler *dm_event_handler_create(void) void dm_event_handler_destroy(struct dm_event_handler *dmevh) { _dm_event_handler_clear_dev_info(dmevh); - dm_free(dmevh->dso); - dm_free(dmevh->dmeventd_path); - dm_free(dmevh); + free(dmevh->dso); + free(dmevh->dmeventd_path); + free(dmevh); } int dm_event_handler_set_dmeventd_path(struct dm_event_handler *dmevh, const char *dmeventd_path) @@ -80,10 +81,9 @@ int dm_event_handler_set_dmeventd_path(struct dm_event_handler *dmevh, const cha if (!dmeventd_path) /* noop */ return 0; - dm_free(dmevh->dmeventd_path); + free(dmevh->dmeventd_path); - dmevh->dmeventd_path = dm_strdup(dmeventd_path); - if (!dmevh->dmeventd_path) + if (!(dmevh->dmeventd_path = strdup(dmeventd_path))) return -ENOMEM; return 0; @@ -93,10 +93,10 @@ int dm_event_handler_set_dso(struct dm_event_handler *dmevh, const char *path) { if (!path) /* noop */ return 0; - dm_free(dmevh->dso); - dmevh->dso = dm_strdup(path); - if (!dmevh->dso) + free(dmevh->dso); + + if (!(dmevh->dso = strdup(path))) return -ENOMEM; return 0; @@ -109,9 +109,9 @@ int dm_event_handler_set_dev_name(struct dm_event_handler *dmevh, const char *de _dm_event_handler_clear_dev_info(dmevh); - dmevh->dev_name = dm_strdup(dev_name); - if (!dmevh->dev_name) + if (!(dmevh->dev_name = strdup(dev_name))) return -ENOMEM; + return 0; } @@ -122,9 +122,9 @@ int dm_event_handler_set_uuid(struct dm_event_handler *dmevh, const char *uuid) _dm_event_handler_clear_dev_info(dmevh); - dmevh->uuid = dm_strdup(uuid); - if (!dmevh->uuid) + if (!(dmevh->uuid = strdup(uuid))) return -ENOMEM; + return 0; } @@ -201,7 +201,7 @@ static int _check_message_id(struct dm_event_daemon_message *msg) if ((sscanf(msg->data, "%d:%d", &pid, &seq_nr) != 2) || (pid != getpid()) || (seq_nr != _sequence_nr)) { log_error("Ignoring out-of-sequence reply from dmeventd. " - "Expected %d:%d but received %s", getpid(), + "Expected %d:%d but received %s.", getpid(), _sequence_nr, msg->data); return 0; } @@ -224,7 +224,6 @@ static int _daemon_read(struct dm_event_fifos *fifos, unsigned bytes = 0; int ret, i; fd_set fds; - struct timeval tval = { 0, 0 }; size_t size = 2 * sizeof(uint32_t); /* status + size */ uint32_t *header = alloca(size); char *buf = (char *)header; @@ -232,66 +231,69 @@ static int _daemon_read(struct dm_event_fifos *fifos, while (bytes < size) { for (i = 0, ret = 0; (i < 20) && (ret < 1); i++) { /* Watch daemon read FIFO for input. */ + struct timeval tval = { .tv_sec = 1 }; FD_ZERO(&fds); FD_SET(fifos->server, &fds); - tval.tv_sec = 1; - ret = select(fifos->server + 1, &fds, NULL, NULL, - &tval); + ret = select(fifos->server + 1, &fds, NULL, NULL, &tval); if (ret < 0 && errno != EINTR) { - log_error("Unable to read from event server"); - return 0; + log_error("Unable to read from event server."); + goto bad; } if ((ret == 0) && (i > 4) && !bytes) { log_error("No input from event server."); - return 0; + goto bad; } } if (ret < 1) { log_error("Unable to read from event server."); - return 0; + goto bad; } ret = read(fifos->server, buf + bytes, size); if (ret < 0) { if ((errno == EINTR) || (errno == EAGAIN)) continue; - else { - log_error("Unable to read from event server."); - return 0; - } + + log_error("Unable to read from event server."); + goto bad; } bytes += ret; - if (header && (bytes == 2 * sizeof(uint32_t))) { + if (!msg->data && (bytes == 2 * sizeof(uint32_t))) { msg->cmd = ntohl(header[0]); - msg->size = ntohl(header[1]); - buf = msg->data = dm_malloc(msg->size); - size = msg->size; bytes = 0; - header = 0; + + if (!(size = msg->size = ntohl(header[1]))) + break; + + if (!(buf = msg->data = malloc(msg->size))) { + log_error("Unable to allocate message data."); + return 0; + } } } - if (bytes != size) { - dm_free(msg->data); - msg->data = NULL; - } - return bytes == size; + if (bytes == size) + return 1; + +bad: + free(msg->data); + msg->data = NULL; + + return 0; } /* Write message to daemon. */ static int _daemon_write(struct dm_event_fifos *fifos, struct dm_event_daemon_message *msg) { - unsigned bytes = 0; - int ret = 0; + int ret; fd_set fds; - + size_t bytes = 0; size_t size = 2 * sizeof(uint32_t) + msg->size; uint32_t *header = alloca(size); char *buf = (char *)header; char drainbuf[128]; - struct timeval tval = { 0, 0 }; header[0] = htonl(msg->cmd); header[1] = htonl(msg->size); @@ -299,17 +301,25 @@ static int _daemon_write(struct dm_event_fifos *fifos, /* drain the answer fifo */ while (1) { + struct timeval tval = { .tv_usec = 100 }; FD_ZERO(&fds); FD_SET(fifos->server, &fds); - tval.tv_usec = 100; ret = select(fifos->server + 1, &fds, NULL, NULL, &tval); - if ((ret < 0) && (errno != EINTR)) { - log_error("Unable to talk to event daemon"); + if (ret < 0) { + if (errno == EINTR) + continue; + log_error("Unable to talk to event daemon."); return 0; } if (ret == 0) break; - ret = read(fifos->server, drainbuf, 127); + ret = read(fifos->server, drainbuf, sizeof(drainbuf)); + if (ret < 0) { + if ((errno == EINTR) || (errno == EAGAIN)) + continue; + log_error("Unable to talk to event daemon."); + return 0; + } } while (bytes < size) { @@ -319,7 +329,7 @@ static int _daemon_write(struct dm_event_fifos *fifos, FD_SET(fifos->client, &fds); ret = select(fifos->client + 1, NULL, &fds, NULL, NULL); if ((ret < 0) && (errno != EINTR)) { - log_error("Unable to talk to event daemon"); + log_error("Unable to talk to event daemon."); return 0; } } while (ret < 1); @@ -328,10 +338,9 @@ static int _daemon_write(struct dm_event_fifos *fifos, if (ret < 0) { if ((errno == EINTR) || (errno == EAGAIN)) continue; - else { - log_error("Unable to talk to event daemon"); - return 0; - } + + log_error("Unable to talk to event daemon."); + return 0; } bytes += ret; @@ -345,9 +354,6 @@ int daemon_talk(struct dm_event_fifos *fifos, const char *dso_name, const char *dev_name, enum dm_event_mask evmask, uint32_t timeout) { - const char *dso = dso_name ? dso_name : "-"; - const char *dev = dev_name ? dev_name : "-"; - const char *fmt = "%d:%d %s %s %u %" PRIu32; int msg_size; memset(msg, 0, sizeof(*msg)); @@ -355,14 +361,17 @@ int daemon_talk(struct dm_event_fifos *fifos, * Set command and pack the arguments * into ASCII message string. */ - msg->cmd = cmd; - if (cmd == DM_EVENT_CMD_HELLO) - fmt = "%d:%d HELLO"; - if ((msg_size = dm_asprintf(&(msg->data), fmt, getpid(), _sequence_nr, - dso, dev, evmask, timeout)) < 0) { - log_error("_daemon_talk: message allocation failed"); + if ((msg_size = + ((cmd == DM_EVENT_CMD_HELLO) ? + dm_asprintf(&(msg->data), "%d:%d HELLO", getpid(), _sequence_nr) : + dm_asprintf(&(msg->data), "%d:%d %s %s %u %" PRIu32, + getpid(), _sequence_nr, + dso_name ? : "-", dev_name ? : "-", evmask, timeout))) + < 0) { + log_error("_daemon_talk: message allocation failed."); return -ENOMEM; } + msg->cmd = cmd; msg->size = msg_size; /* @@ -371,15 +380,14 @@ int daemon_talk(struct dm_event_fifos *fifos, */ if (!_daemon_write(fifos, msg)) { stack; - dm_free(msg->data); - msg->data = 0; + free(msg->data); + msg->data = NULL; return -EIO; } do { - - dm_free(msg->data); - msg->data = 0; + free(msg->data); + msg->data = NULL; if (!_daemon_read(fifos, msg)) { stack; @@ -412,28 +420,56 @@ static int _start_daemon(char *dmeventd_path, struct dm_event_fifos *fifos) char default_dmeventd_path[] = DMEVENTD_PATH; char *args[] = { dmeventd_path ? : default_dmeventd_path, NULL }; - if (stat(fifos->client_path, &statbuf)) - goto start_server; + /* + * FIXME Explicitly verify the code's requirement that client_path is secure: + * - All parent directories owned by root without group/other write access unless sticky. + */ - if (!S_ISFIFO(statbuf.st_mode)) { - log_error("%s is not a fifo.", fifos->client_path); + /* If client fifo path exists, only use it if it is root-owned fifo mode 0600 */ + if ((lstat(fifos->client_path, &statbuf) < 0)) { + if (errno == ENOENT) + /* Jump ahead if fifo does not already exist. */ + goto start_server; + else { + log_sys_error("stat", fifos->client_path); + return 0; + } + } else if (!S_ISFIFO(statbuf.st_mode)) { + log_error("%s must be a fifo.", fifos->client_path); + return 0; + } else if (statbuf.st_uid) { + log_error("%s must be owned by uid 0.", fifos->client_path); + return 0; + } else if (statbuf.st_mode & (S_IEXEC | S_IRWXG | S_IRWXO)) { + log_error("%s must have mode 0600.", fifos->client_path); return 0; } /* Anyone listening? If not, errno will be ENXIO */ fifos->client = open(fifos->client_path, O_WRONLY | O_NONBLOCK); if (fifos->client >= 0) { + /* Should never happen if all the above checks passed. */ + if ((fstat(fifos->client, &statbuf) < 0) || + !S_ISFIFO(statbuf.st_mode) || statbuf.st_uid || + (statbuf.st_mode & (S_IEXEC | S_IRWXG | S_IRWXO))) { + log_error("%s is no longer a secure root-owned fifo with mode 0600.", fifos->client_path); + if (close(fifos->client)) + log_sys_debug("close", fifos->client_path); + return 0; + } + /* server is running and listening */ if (close(fifos->client)) - log_sys_error("close", fifos->client_path); + log_sys_debug("close", fifos->client_path); return 1; - } else if (errno != ENXIO) { + } + if (errno != ENXIO && errno != ENOENT) { /* problem */ log_sys_error("open", fifos->client_path); return 0; } - start_server: +start_server: /* server is not running */ if ((args[0][0] == '/') && stat(args[0], &statbuf)) { @@ -448,11 +484,11 @@ static int _start_daemon(char *dmeventd_path, struct dm_event_fifos *fifos) else if (!pid) { execvp(args[0], args); - log_error("Unable to exec dmeventd: %s", strerror(errno)); + log_error("Unable to exec dmeventd: %s.", strerror(errno)); _exit(EXIT_FAILURE); } else { if (waitpid(pid, &status, 0) < 0) - log_error("Unable to start dmeventd: %s", + log_error("Unable to start dmeventd: %s.", strerror(errno)); else if (WEXITSTATUS(status)) log_error("Unable to start dmeventd."); @@ -468,10 +504,6 @@ int init_fifos(struct dm_event_fifos *fifos) /* FIXME? Is fifo the most suitable method? Why not share comms/daemon code with something else e.g. multipath? */ - /* FIXME Make these either configurable or depend directly on dmeventd_path */ - fifos->client_path = DM_EVENT_FIFO_CLIENT; - fifos->server_path = DM_EVENT_FIFO_SERVER; - /* Open the fifo used to read from the daemon. */ if ((fifos->server = open(fifos->server_path, O_RDWR)) < 0) { log_sys_error("open", fifos->server_path); @@ -481,32 +513,27 @@ int init_fifos(struct dm_event_fifos *fifos) /* Lock out anyone else trying to do communication with the daemon. */ if (flock(fifos->server, LOCK_EX) < 0) { log_sys_error("flock", fifos->server_path); - if (close(fifos->server)) - log_sys_error("close", fifos->server_path); - return 0; + goto bad; } /* if ((fifos->client = open(fifos->client_path, O_WRONLY | O_NONBLOCK)) < 0) {*/ if ((fifos->client = open(fifos->client_path, O_RDWR | O_NONBLOCK)) < 0) { log_sys_error("open", fifos->client_path); - if (close(fifos->server)) - log_sys_error("close", fifos->server_path); - return 0; + goto bad; } return 1; +bad: + if (close(fifos->server)) + log_sys_debug("close", fifos->server_path); + fifos->server = -1; + + return 0; } /* Initialize client. */ static int _init_client(char *dmeventd_path, struct dm_event_fifos *fifos) { - /* init fifos */ - memset(fifos, 0, sizeof(*fifos)); - - /* FIXME Make these either configurable or depend directly on dmeventd_path */ - fifos->client_path = DM_EVENT_FIFO_CLIENT; - fifos->server_path = DM_EVENT_FIFO_SERVER; - if (!_start_daemon(dmeventd_path, fifos)) return_0; @@ -515,13 +542,16 @@ static int _init_client(char *dmeventd_path, struct dm_event_fifos *fifos) void fini_fifos(struct dm_event_fifos *fifos) { - if (flock(fifos->server, LOCK_UN)) - log_error("flock unlock %s", fifos->server_path); + if (fifos->client >= 0 && close(fifos->client)) + log_sys_debug("close", fifos->client_path); - if (close(fifos->client)) - log_sys_error("close", fifos->client_path); - if (close(fifos->server)) - log_sys_error("close", fifos->server_path); + if (fifos->server >= 0) { + if (flock(fifos->server, LOCK_UN)) + log_sys_debug("flock unlock", fifos->server_path); + + if (close(fifos->server)) + log_sys_debug("close", fifos->server_path); + } } /* Get uuid of a device */ @@ -531,7 +561,7 @@ static struct dm_task *_get_device_info(const struct dm_event_handler *dmevh) struct dm_info info; if (!(dmt = dm_task_create(DM_DEVICE_INFO))) { - log_error("_get_device_info: dm_task creation for info failed"); + log_error("_get_device_info: dm_task creation for info failed."); return NULL; } @@ -549,19 +579,19 @@ static struct dm_task *_get_device_info(const struct dm_event_handler *dmevh) /* FIXME Add name or uuid or devno to messages */ if (!dm_task_run(dmt)) { - log_error("_get_device_info: dm_task_run() failed"); + log_error("_get_device_info: dm_task_run() failed."); goto bad; } if (!dm_task_get_info(dmt, &info)) { - log_error("_get_device_info: failed to get info for device"); + log_error("_get_device_info: failed to get info for device."); goto bad; } if (!info.exists) { - log_error("_get_device_info: %s%s%s%.0d%s%.0d%s%s: device not found", - dmevh->uuid ? : "", - (!dmevh->uuid && dmevh->dev_name) ? dmevh->dev_name : "", + log_error("_get_device_info: %s%s%s%.0d%s%.0d%s%s: device not found.", + dmevh->uuid ? : "", + (!dmevh->uuid && dmevh->dev_name) ? dmevh->dev_name : "", (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? "(" : "", (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? dmevh->major : 0, (!dmevh->uuid && !dmevh->dev_name && dmevh->major > 0) ? ":" : "", @@ -571,7 +601,6 @@ static struct dm_task *_get_device_info(const struct dm_event_handler *dmevh) goto bad; } - return dmt; bad: @@ -585,21 +614,27 @@ static int _do_event(int cmd, char *dmeventd_path, struct dm_event_daemon_messag enum dm_event_mask evmask, uint32_t timeout) { int ret; - struct dm_event_fifos fifos; + struct dm_event_fifos fifos = { + .client = -1, + .server = -1, + /* FIXME Make these either configurable or depend directly on dmeventd_path */ + .client_path = DM_EVENT_FIFO_CLIENT, + .server_path = DM_EVENT_FIFO_SERVER + }; if (!_init_client(dmeventd_path, &fifos)) { - stack; - return -ESRCH; + ret = -ESRCH; + goto_out; } ret = daemon_talk(&fifos, msg, DM_EVENT_CMD_HELLO, NULL, NULL, 0, 0); - dm_free(msg->data); + free(msg->data); msg->data = 0; if (!ret) ret = daemon_talk(&fifos, msg, cmd, dso_name, dev_name, evmask, timeout); - +out: /* what is the opposite of init? */ fini_fifos(&fifos); @@ -612,24 +647,30 @@ int dm_event_register_handler(const struct dm_event_handler *dmevh) int ret = 1, err; const char *uuid; struct dm_task *dmt; - struct dm_event_daemon_message msg = { 0, 0, NULL }; + struct dm_event_daemon_message msg = { 0 }; - if (!(dmt = _get_device_info(dmevh))) { - stack; - return 0; - } + if (!(dmt = _get_device_info(dmevh))) + return_0; uuid = dm_task_get_uuid(dmt); + if (!strstr(dmevh->dso, "libdevmapper-event-lvm2thin.so") && + !strstr(dmevh->dso, "libdevmapper-event-lvm2vdo.so") && + !strstr(dmevh->dso, "libdevmapper-event-lvm2snapshot.so") && + !strstr(dmevh->dso, "libdevmapper-event-lvm2mirror.so") && + !strstr(dmevh->dso, "libdevmapper-event-lvm2raid.so")) + log_warn("WARNING: %s: dmeventd plugins are deprecated.", dmevh->dso); + + if ((err = _do_event(DM_EVENT_CMD_REGISTER_FOR_EVENT, dmevh->dmeventd_path, &msg, dmevh->dso, uuid, dmevh->mask, dmevh->timeout)) < 0) { - log_error("%s: event registration failed: %s", + log_error("%s: event registration failed: %s.", dm_task_get_name(dmt), msg.data ? msg.data : strerror(-err)); ret = 0; } - dm_free(msg.data); + free(msg.data); dm_task_destroy(dmt); @@ -641,24 +682,22 @@ int dm_event_unregister_handler(const struct dm_event_handler *dmevh) int ret = 1, err; const char *uuid; struct dm_task *dmt; - struct dm_event_daemon_message msg = { 0, 0, NULL }; + struct dm_event_daemon_message msg = { 0 }; - if (!(dmt = _get_device_info(dmevh))) { - stack; - return 0; - } + if (!(dmt = _get_device_info(dmevh))) + return_0; uuid = dm_task_get_uuid(dmt); if ((err = _do_event(DM_EVENT_CMD_UNREGISTER_FOR_EVENT, dmevh->dmeventd_path, &msg, dmevh->dso, uuid, dmevh->mask, dmevh->timeout)) < 0) { - log_error("%s: event deregistration failed: %s", + log_error("%s: event deregistration failed: %s.", dm_task_get_name(dmt), msg.data ? msg.data : strerror(-err)); ret = 0; } - dm_free(msg.data); + free(msg.data); dm_task_destroy(dmt); @@ -670,15 +709,11 @@ int dm_event_unregister_handler(const struct dm_event_handler *dmevh) static char *_fetch_string(char **src, const int delimiter) { char *p, *ret; + size_t len = (p = strchr(*src, delimiter)) ? + (size_t)(p - *src) : strlen(*src); - if ((p = strchr(*src, delimiter))) - *p = 0; - - if ((ret = dm_strdup(*src))) - *src += strlen(ret) + 1; - - if (p) - *p = delimiter; + if ((ret = strndup(*src, len))) + *src += len + 1; return ret; } @@ -687,20 +722,18 @@ static char *_fetch_string(char **src, const int delimiter) static int _parse_message(struct dm_event_daemon_message *msg, char **dso_name, char **uuid, enum dm_event_mask *evmask) { - char *id = NULL; + char *id; char *p = msg->data; if ((id = _fetch_string(&p, ' ')) && (*dso_name = _fetch_string(&p, ' ')) && (*uuid = _fetch_string(&p, ' '))) { *evmask = atoi(p); - - dm_free(id); + free(id); return 0; } - if (id) - dm_free(id); + free(id); return -ENOMEM; } @@ -714,21 +747,24 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next) char *reply_dso = NULL, *reply_uuid = NULL; enum dm_event_mask reply_mask = 0; struct dm_task *dmt = NULL; - struct dm_event_daemon_message msg = { 0, 0, NULL }; + struct dm_event_daemon_message msg = { 0 }; struct dm_info info; if (!(dmt = _get_device_info(dmevh))) { - stack; - return 0; + log_debug("Device does not exists (uuid=%s, name=%s, %d:%d).", + dmevh->uuid, dmevh->dev_name, + dmevh->major, dmevh->minor); + ret = -ENODEV; + goto fail; } uuid = dm_task_get_uuid(dmt); - if (_do_event(next ? DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE : - DM_EVENT_CMD_GET_REGISTERED_DEVICE, dmevh->dmeventd_path, - &msg, dmevh->dso, uuid, dmevh->mask, 0)) { + /* FIXME Distinguish errors connecting to daemon */ + if ((ret = _do_event(next ? DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE : + DM_EVENT_CMD_GET_REGISTERED_DEVICE, dmevh->dmeventd_path, + &msg, dmevh->dso, uuid, dmevh->mask, 0))) { log_debug("%s: device not registered.", dm_task_get_name(dmt)); - ret = -ENOENT; goto fail; } @@ -739,7 +775,7 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next) dm_task_destroy(dmt); dmt = NULL; - dm_free(msg.data); + free(msg.data); msg.data = NULL; _dm_event_handler_clear_dev_info(dmevh); @@ -747,8 +783,8 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next) ret = -ENXIO; /* dmeventd probably gave us bogus uuid back */ goto fail; } - dmevh->uuid = dm_strdup(reply_uuid); - if (!dmevh->uuid) { + + if (!(dmevh->uuid = strdup(reply_uuid))) { ret = -ENOMEM; goto fail; } @@ -761,14 +797,13 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next) dm_event_handler_set_dso(dmevh, reply_dso); dm_event_handler_set_event_mask(dmevh, reply_mask); - dm_free(reply_dso); + free(reply_dso); reply_dso = NULL; - dm_free(reply_uuid); + free(reply_uuid); reply_uuid = NULL; - dmevh->dev_name = dm_strdup(dm_task_get_name(dmt)); - if (!dmevh->dev_name) { + if (!(dmevh->dev_name = strdup(dm_task_get_name(dmt)))) { ret = -ENOMEM; goto fail; } @@ -786,9 +821,9 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next) return ret; fail: - dm_free(msg.data); - dm_free(reply_dso); - dm_free(reply_uuid); + free(msg.data); + free(reply_dso); + free(reply_uuid); _dm_event_handler_clear_dev_info(dmevh); if (dmt) dm_task_destroy(dmt); @@ -808,23 +843,116 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next) int dm_event_get_version(struct dm_event_fifos *fifos, int *version) { char *p; - struct dm_event_daemon_message msg = { 0, 0, NULL }; + struct dm_event_daemon_message msg = { 0 }; if (daemon_talk(fifos, &msg, DM_EVENT_CMD_HELLO, NULL, NULL, 0, 0)) return 0; p = msg.data; *version = 0; - p = strchr(p, ' '); /* Message ID */ - if (!p) return 0; - p = strchr(p + 1, ' '); /* HELLO */ - if (!p) return 0; - p = strchr(p + 1, ' '); /* HELLO, once more */ - if (p) + if (!p || !(p = strchr(p, ' '))) /* Message ID */ + return 0; + if (!(p = strchr(p + 1, ' '))) /* HELLO */ + return 0; + if ((p = strchr(p + 1, ' '))) /* HELLO, once more */ *version = atoi(p); + return 1; } +void dm_event_log_set(int debug_log_level, int use_syslog) +{ + _debug_level = debug_log_level; + _use_syslog = use_syslog; +} + +void dm_event_log(const char *subsys, int level, const char *file, + int line, int dm_errno_or_class, + const char *format, va_list ap) +{ + static int _abort_on_internal_errors = -1; + static pthread_mutex_t _log_mutex = PTHREAD_MUTEX_INITIALIZER; + static time_t start = 0; + const char *indent = ""; + FILE *stream = log_stderr(level) ? stderr : stdout; + int prio; + time_t now; + int log_with_debug = 0; + + if (subsys[0] == '#') { + /* Subsystems starting with '#' are logged + * only when debugging is enabled. */ + log_with_debug++; + subsys++; + } + + switch (log_level(level)) { + case _LOG_DEBUG: + /* Never shown without -ddd */ + if (_debug_level < 3) + return; + prio = LOG_DEBUG; + indent = " "; + break; + case _LOG_INFO: + if (log_with_debug && _debug_level < 2) + return; + prio = LOG_INFO; + indent = " "; + break; + case _LOG_NOTICE: + if (log_with_debug && _debug_level < 1) + return; + prio = LOG_NOTICE; + indent = " "; + break; + case _LOG_WARN: + prio = LOG_WARNING; + break; + case _LOG_ERR: + prio = LOG_ERR; + stream = stderr; + break; + default: + prio = LOG_CRIT; + } + + /* Serialize to keep lines readable */ + pthread_mutex_lock(&_log_mutex); + + if (_use_syslog) { + vsyslog(prio, format, ap); + } else { + now = time(NULL); + if (!start) + start = now; + now -= start; + if (_debug_level) + fprintf(stream, "[%2lld:%02lld] %8x:%-6s%s", + (long long)now / 60, (long long)now % 60, + // TODO: Maybe use shorter ID + // ((int)(pthread_self()) >> 6) & 0xffff, + (int)pthread_self(), subsys, + (_debug_level > 3) ? "" : indent); + if (_debug_level > 3) + fprintf(stream, "%28s:%4d %s", file, line, indent); + vfprintf(stream, _(format), ap); + fputc('\n', stream); + fflush(stream); + } + + pthread_mutex_unlock(&_log_mutex); + + if (_abort_on_internal_errors < 0) + /* Set when env DM_ABORT_ON_INTERNAL_ERRORS is not "0" */ + _abort_on_internal_errors = + strcmp(getenv("DM_ABORT_ON_INTERNAL_ERRORS") ? : "0", "0"); + + if (_abort_on_internal_errors && + !strncmp(format, INTERNAL_ERROR, sizeof(INTERNAL_ERROR) - 1)) + abort(); +} + #if 0 /* left out for now */ static char *_skip_string(char *src, const int delimiter) @@ -837,7 +965,7 @@ static char *_skip_string(char *src, const int delimiter) int dm_event_set_timeout(const char *device_path, uint32_t timeout) { - struct dm_event_daemon_message msg = { 0, 0, NULL }; + struct dm_event_daemon_message msg = { 0 }; if (!device_exists(device_path)) return -ENODEV; @@ -849,22 +977,24 @@ int dm_event_set_timeout(const char *device_path, uint32_t timeout) int dm_event_get_timeout(const char *device_path, uint32_t *timeout) { int ret; - struct dm_event_daemon_message msg = { 0, 0, NULL }; + struct dm_event_daemon_message msg = { 0 }; if (!device_exists(device_path)) return -ENODEV; + if (!(ret = _do_event(DM_EVENT_CMD_GET_TIMEOUT, &msg, NULL, device_path, 0, 0))) { char *p = _skip_string(msg.data, ' '); if (!p) { - log_error("malformed reply from dmeventd '%s'\n", + log_error("Malformed reply from dmeventd '%s'.", msg.data); + free(msg.data); return -EIO; } *timeout = atoi(p); } - if (msg.data) - dm_free(msg.data); + free(msg.data); + return ret; } #endif diff --git a/daemons/dmeventd/libdevmapper-event.h b/daemons/dmeventd/libdevmapper-event.h index 7ce3f39..cc65e72 100644 --- a/daemons/dmeventd/libdevmapper-event.h +++ b/daemons/dmeventd/libdevmapper-event.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. + * Copyright (C) 2005-2015 Red Hat, Inc. All rights reserved. * * This file is part of the device-mapper userspace tools. * @@ -9,7 +9,7 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /* @@ -21,6 +21,7 @@ #ifndef LIB_DMEVENT_H #define LIB_DMEVENT_H +#include <stdarg.h> #include <stdint.h> /* @@ -46,8 +47,9 @@ enum dm_event_mask { }; #define DM_EVENT_ALL_ERRORS DM_EVENT_ERROR_MASK -#define DM_EVENT_PROTOCOL_VERSION 1 +#define DM_EVENT_PROTOCOL_VERSION 2 +struct dm_task; struct dm_event_handler; struct dm_event_handler *dm_event_handler_create(void); @@ -104,6 +106,25 @@ int dm_event_get_registered_device(struct dm_event_handler *dmevh, int next); int dm_event_register_handler(const struct dm_event_handler *dmevh); int dm_event_unregister_handler(const struct dm_event_handler *dmevh); +/* Set debug level for logging, and whether to log on stdout/stderr or syslog */ +void dm_event_log_set(int debug_log_level, int use_syslog); + +/* Log messages acroding to current debug level */ +__attribute__((format(printf, 6, 0))) +void dm_event_log(const char *subsys, int level, const char *file, + int line, int dm_errno_or_class, + const char *format, va_list ap); +/* Macro to route print_log do dm_event_log() */ +#define DM_EVENT_LOG_FN(subsys) \ +void print_log(int level, const char *file, int line, int dm_errno_or_class,\ + const char *format, ...)\ +{\ + va_list ap;\ + va_start(ap, format);\ + dm_event_log(subsys, level, file, line, dm_errno_or_class, format, ap);\ + va_end(ap);\ +} + /* Prototypes for DSO interface, see dmeventd.c, struct dso_data for detailed descriptions. */ // FIXME misuse of bitmask as enum diff --git a/daemons/dmeventd/libdevmapper-event.pc.in b/daemons/dmeventd/libdevmapper-event.pc.in index 839433f..fcad5bc 100644 --- a/daemons/dmeventd/libdevmapper-event.pc.in +++ b/daemons/dmeventd/libdevmapper-event.pc.in @@ -8,4 +8,3 @@ Description: device-mapper event library Version: @DM_LIB_PATCHLEVEL@ Cflags: -I${includedir} Libs: -L${libdir} -ldevmapper-event -Requires.private: devmapper diff --git a/daemons/dmeventd/plugins/Makefile.in b/daemons/dmeventd/plugins/Makefile.in index b26e6d8..951dd2b 100644 --- a/daemons/dmeventd/plugins/Makefile.in +++ b/daemons/dmeventd/plugins/Makefile.in @@ -1,6 +1,6 @@ # # Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004-2005, 2011 Red Hat, Inc. All rights reserved. +# Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved. # # This file is part of LVM2. # @@ -10,33 +10,13 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA srcdir = @srcdir@ top_srcdir = @top_srcdir@ top_builddir = @top_builddir@ -SUBDIRS += lvm2 - -ifneq ("@MIRRORS@", "none") - SUBDIRS += mirror -endif - -ifneq ("@SNAPSHOTS@", "none") - SUBDIRS += snapshot -endif - -ifneq ("@RAID@", "none") - SUBDIRS += raid -endif - -ifneq ("@THIN@", "none") - SUBDIRS += thin -endif - -ifeq ($(MAKECMDGOALS),distclean) - SUBDIRS = lvm2 mirror snapshot raid thin -endif +SUBDIRS += lvm2 snapshot raid thin mirror vdo include $(top_builddir)/make.tmpl @@ -44,3 +24,4 @@ snapshot: lvm2 mirror: lvm2 raid: lvm2 thin: lvm2 +vdo: lvm2 diff --git a/daemons/dmeventd/plugins/lvm2/Makefile.in b/daemons/dmeventd/plugins/lvm2/Makefile.in index fcb2a0a..858de9d 100644 --- a/daemons/dmeventd/plugins/lvm2/Makefile.in +++ b/daemons/dmeventd/plugins/lvm2/Makefile.in @@ -1,5 +1,5 @@ # -# Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved. +# Copyright (C) 2010-2014 Red Hat, Inc. All rights reserved. # # This file is part of LVM2. # @@ -9,13 +9,14 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA srcdir = @srcdir@ top_srcdir = @top_srcdir@ top_builddir = @top_builddir@ CLDFLAGS += -L$(top_builddir)/tools +LIBS += $(DMEVENT_LIBS) $(PTHREAD_LIBS) @LVM2CMD_LIB@ SOURCES = dmeventd_lvm.c @@ -24,8 +25,6 @@ LIB_VERSION = $(LIB_VERSION_LVM) include $(top_builddir)/make.tmpl -LIBS += @LVM2CMD_LIB@ -ldevmapper $(PTHREAD_LIBS) $(DAEMON_LIBS) - install_lvm2: install_lib_shared install: install_lvm2 diff --git a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c index 5d5a46b..cddf6ce 100644 --- a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c +++ b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 Red Hat, Inc. All rights reserved. + * Copyright (C) 2010-2015 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -9,19 +9,15 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "lib.h" -#include "log.h" - -#include "lvm2cmd.h" +#include "lib/misc/lib.h" #include "dmeventd_lvm.h" +#include "daemons/dmeventd/libdevmapper-event.h" +#include "tools/lvm2cmd.h" #include <pthread.h> -#include <syslog.h> - -extern int dmeventd_debug; /* * register_device() is called first and performs initialisation. @@ -35,49 +31,27 @@ static pthread_mutex_t _register_mutex = PTHREAD_MUTEX_INITIALIZER; static int _register_count = 0; static struct dm_pool *_mem_pool = NULL; static void *_lvm_handle = NULL; +static DM_LIST_INIT(_env_registry); + +struct env_data { + struct dm_list list; + const char *cmd; + const char *data; +}; + +DM_EVENT_LOG_FN("#lvm") + +static void _lvm2_print_log(int level, const char *file, int line, + int dm_errno_or_class, const char *msg) +{ + print_log(level, file, line, dm_errno_or_class, "%s", msg); +} /* * Currently only one event can be processed at a time. */ static pthread_mutex_t _event_mutex = PTHREAD_MUTEX_INITIALIZER; -/* - * FIXME Do not pass things directly to syslog, rather use the existing logging - * facilities to sort logging ... however that mechanism needs to be somehow - * configurable and we don't have that option yet - */ -static void _temporary_log_fn(int level, - const char *file __attribute__((unused)), - int line __attribute__((unused)), - int dm_errno __attribute__((unused)), - const char *message) -{ - level &= ~(_LOG_STDERR | _LOG_ONCE); - - switch (level) { - case _LOG_DEBUG: - if (dmeventd_debug >= 3) - syslog(LOG_DEBUG, "%s", message); - break; - case _LOG_INFO: - if (dmeventd_debug >= 2) - syslog(LOG_INFO, "%s", message); - break; - case _LOG_NOTICE: - if (dmeventd_debug >= 1) - syslog(LOG_NOTICE, "%s", message); - break; - case _LOG_WARN: - syslog(LOG_WARNING, "%s", message); - break; - case _LOG_ERR: - syslog(LOG_ERR, "%s", message); - break; - default: - syslog(LOG_CRIT, "%s", message); - } -} - void dmeventd_lvm2_lock(void) { pthread_mutex_lock(&_event_mutex); @@ -94,23 +68,26 @@ int dmeventd_lvm2_init(void) pthread_mutex_lock(&_register_mutex); - /* - * Need some space for allocations. 1024 should be more - * than enough for what we need (device mapper name splitting) - */ - if (!_mem_pool && !(_mem_pool = dm_pool_create("mirror_dso", 1024))) - goto out; - if (!_lvm_handle) { - lvm2_log_fn(_temporary_log_fn); - if (!(_lvm_handle = lvm2_init())) { - dm_pool_destroy(_mem_pool); - _mem_pool = NULL; + lvm2_log_fn(_lvm2_print_log); + + if (!(_lvm_handle = lvm2_init_threaded())) + goto out; + + /* + * Need some space for allocations. 1024 should be more + * than enough for what we need (device mapper name splitting) + */ + if (!_mem_pool && !(_mem_pool = dm_pool_create("mirror_dso", 1024))) { + lvm2_exit(_lvm_handle); + _lvm_handle = NULL; goto out; } + lvm2_disable_dmeventd_monitoring(_lvm_handle); /* FIXME Temporary: move to dmeventd core */ lvm2_run(_lvm_handle, "_memlock_inc"); + log_debug("lvm plugin initilized."); } _register_count++; @@ -126,11 +103,14 @@ void dmeventd_lvm2_exit(void) pthread_mutex_lock(&_register_mutex); if (!--_register_count) { + log_debug("lvm plugin shuting down."); lvm2_run(_lvm_handle, "_memlock_dec"); dm_pool_destroy(_mem_pool); _mem_pool = NULL; + dm_list_init(&_env_registry); lvm2_exit(_lvm_handle); _lvm_handle = NULL; + log_debug("lvm plugin exited."); } pthread_mutex_unlock(&_register_mutex); @@ -143,32 +123,69 @@ struct dm_pool *dmeventd_lvm2_pool(void) int dmeventd_lvm2_run(const char *cmdline) { - return lvm2_run(_lvm_handle, cmdline); + /* coverity[missing_lock] no locking for run part */ + return (lvm2_run(_lvm_handle, cmdline) == LVM2_COMMAND_SUCCEEDED); } int dmeventd_lvm2_command(struct dm_pool *mem, char *buffer, size_t size, const char *cmd, const char *device) { + static char _internal_prefix[] = "_dmeventd_"; char *vg = NULL, *lv = NULL, *layer; int r; + struct env_data *env_data; + const char *env = NULL; if (!dm_split_lvm_name(mem, device, &vg, &lv, &layer)) { - syslog(LOG_ERR, "Unable to determine VG name from %s.\n", - device); + log_error("Unable to determine VG name from %s.", + device); return 0; } /* strip off the mirror component designations */ - layer = strstr(lv, "_mlog"); - if (layer) + if ((layer = strstr(lv, "_mimagetmp")) || + (layer = strstr(lv, "_mlog"))) *layer = '\0'; + if (!strncmp(cmd, _internal_prefix, sizeof(_internal_prefix) - 1)) { + /* check if ENVVAR wasn't already resolved */ + dm_list_iterate_items(env_data, &_env_registry) + if (!strcmp(cmd, env_data->cmd)) { + env = env_data->data; + break; + } + + if (!env) { + /* run lvm2 command to find out setting value */ + dmeventd_lvm2_lock(); + if (!dmeventd_lvm2_run(cmd) || + !(env = getenv(cmd))) { + dmeventd_lvm2_unlock(); + log_error("Unable to find configured command."); + return 0; + } + /* output of internal command passed via env var */ + env = dm_pool_strdup(_mem_pool, env); /* copy with lock */ + dmeventd_lvm2_unlock(); + if (!env || + !(env_data = dm_pool_zalloc(_mem_pool, sizeof(*env_data))) || + !(env_data->cmd = dm_pool_strdup(_mem_pool, cmd))) { + log_error("Unable to allocate env memory."); + return 0; + } + env_data->data = env; + /* add to ENVVAR registry */ + dm_list_add(&_env_registry, &env_data->list); + } + cmd = env; + } + r = dm_snprintf(buffer, size, "%s %s/%s", cmd, vg, lv); dm_pool_free(mem, vg); if (r < 0) { - syslog(LOG_ERR, "Unable to form LVM command. (too long).\n"); + log_error("Unable to form LVM command. (too long)."); return 0; } diff --git a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h index 1960c71..353a03d 100644 --- a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h +++ b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 Red Hat, Inc. All rights reserved. + * Copyright (C) 2010-2015 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -9,7 +9,7 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /* @@ -22,11 +22,11 @@ * liblvm2cmd thread-safe so this can go away. */ -#include "libdevmapper.h" - #ifndef _DMEVENTD_LVMWRAP_H #define _DMEVENTD_LVMWRAP_H +struct dm_pool; + int dmeventd_lvm2_init(void); void dmeventd_lvm2_exit(void); int dmeventd_lvm2_run(const char *cmdline); @@ -39,4 +39,36 @@ struct dm_pool *dmeventd_lvm2_pool(void); int dmeventd_lvm2_command(struct dm_pool *mem, char *buffer, size_t size, const char *cmd, const char *device); +#define dmeventd_lvm2_run_with_lock(cmdline) \ + ({\ + int rc;\ + dmeventd_lvm2_lock();\ + rc = dmeventd_lvm2_run(cmdline);\ + dmeventd_lvm2_unlock();\ + rc;\ + }) + +#define dmeventd_lvm2_init_with_pool(name, st) \ + ({\ + struct dm_pool *mem;\ + st = NULL;\ + if (dmeventd_lvm2_init()) {\ + if ((mem = dm_pool_create(name, 2048)) &&\ + (st = dm_pool_zalloc(mem, sizeof(*st))))\ + st->mem = mem;\ + else {\ + if (mem)\ + dm_pool_destroy(mem);\ + dmeventd_lvm2_exit();\ + }\ + }\ + st;\ + }) + +#define dmeventd_lvm2_exit_with_pool(pool) \ + do {\ + dm_pool_destroy(pool->mem);\ + dmeventd_lvm2_exit();\ + } while(0) + #endif /* _DMEVENTD_LVMWRAP_H */ diff --git a/daemons/dmeventd/plugins/mirror/Makefile.in b/daemons/dmeventd/plugins/mirror/Makefile.in index 85b33c9..1552d97 100644 --- a/daemons/dmeventd/plugins/mirror/Makefile.in +++ b/daemons/dmeventd/plugins/mirror/Makefile.in @@ -1,6 +1,6 @@ # # Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004-2005, 2008-2011 Red Hat, Inc. All rights reserved. +# Copyright (C) 2004-2005, 2008-2014 Red Hat, Inc. All rights reserved. # # This file is part of LVM2. # @@ -10,14 +10,14 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA srcdir = @srcdir@ top_srcdir = @top_srcdir@ top_builddir = @top_builddir@ -INCLUDES += -I$(top_srcdir)/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2 -CLDFLAGS += -L$(top_builddir)/tools -L$(top_builddir)/daemons/dmeventd/plugins/lvm2 +CLDFLAGS += -L$(top_builddir)/daemons/dmeventd/plugins/lvm2 +LIBS += -ldevmapper-event-lvm2 SOURCES = dmeventd_mirror.c @@ -25,13 +25,8 @@ LIB_NAME = libdevmapper-event-lvm2mirror LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX) LIB_VERSION = $(LIB_VERSION_LVM) -CFLOW_LIST = $(SOURCES) -CFLOW_LIST_TARGET = $(LIB_NAME).cflow - include $(top_builddir)/make.tmpl -LIBS += -ldevmapper-event-lvm2 -ldevmapper $(DAEMON_LIBS) - install_lvm2: install_dm_plugin install: install_lvm2 diff --git a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c index e59feb4..5f1db32 100644 --- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c +++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005-2012 Red Hat, Inc. All rights reserved. + * Copyright (C) 2005-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -9,28 +9,30 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "lib.h" +#include "lib/misc/lib.h" +#include "daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h" +#include "daemons/dmeventd/libdevmapper-event.h" +#include "lib/activate/activate.h" -#include "lvm2cmd.h" -#include "errors.h" -#include "libdevmapper-event.h" -#include "dmeventd_lvm.h" -#include "defaults.h" - -#include <syslog.h> /* FIXME Replace syslog with multilog */ -/* FIXME Missing openlog? */ -/* FIXME Replace most syslogs with log_error() style messages and add complete context. */ /* FIXME Reformat to 80 char lines. */ #define ME_IGNORE 0 #define ME_INSYNC 1 #define ME_FAILURE 2 -static int _process_status_code(const char status_code, const char *dev_name, - const char *dev_type, int r) +struct dso_state { + struct dm_pool *mem; + char cmd_lvconvert[512]; +}; + +DM_EVENT_LOG_FN("mirr") + +static void _process_status_code(dm_status_mirror_health_t health, + uint32_t major, uint32_t minor, + const char *dev_type, int *r) { /* * A => Alive - No failures @@ -40,196 +42,170 @@ static int _process_status_code(const char status_code, const char *dev_name, * R => Read - A read failure occurred, mirror data unaffected * U => Unclassified failure (bug) */ - if (status_code == 'F') { - syslog(LOG_ERR, "%s device %s flush failed.", - dev_type, dev_name); - r = ME_FAILURE; - } else if (status_code == 'S') - syslog(LOG_ERR, "%s device %s sync failed.", - dev_type, dev_name); - else if (status_code == 'R') - syslog(LOG_ERR, "%s device %s read failed.", - dev_type, dev_name); - else if (status_code != 'A') { - syslog(LOG_ERR, "%s device %s has failed (%c).", - dev_type, dev_name, status_code); - r = ME_FAILURE; + switch (health) { + case DM_STATUS_MIRROR_ALIVE: + return; + case DM_STATUS_MIRROR_FLUSH_FAILED: + log_error("%s device %u:%u flush failed.", + dev_type, major, minor); + *r = ME_FAILURE; + break; + case DM_STATUS_MIRROR_SYNC_FAILED: + log_error("%s device %u:%u sync failed.", + dev_type, major, minor); + break; + case DM_STATUS_MIRROR_READ_FAILED: + log_error("%s device %u:%u read failed.", + dev_type, major, minor); + break; + default: + log_error("%s device %u:%u has failed (%c).", + dev_type, major, minor, (char)health); + *r = ME_FAILURE; + break; } - - return r; } -static int _get_mirror_event(char *params) +static int _get_mirror_event(struct dso_state *state, char *params) { - int i, r = ME_INSYNC; - char **args = NULL; - char *dev_status_str; - char *log_status_str; - char *sync_str; - char *p = NULL; - int log_argc, num_devs; + int r = ME_INSYNC; + unsigned i; + struct dm_status_mirror *ms; - /* - * dm core parms: 0 409600 mirror - * Mirror core parms: 2 253:4 253:5 400/400 - * New-style failure params: 1 AA - * New-style log params: 3 cluster 253:3 A - * or 3 disk 253:3 A - * or 1 core - */ - - /* number of devices */ - if (!dm_split_words(params, 1, 0, &p)) - goto out_parse; - - if (!(num_devs = atoi(p)) || - (num_devs > DEFAULT_MIRROR_MAX_IMAGES) || (num_devs < 0)) - goto out_parse; - p += strlen(p) + 1; - - /* devices names + "400/400" + "1 AA" + 1 or 3 log parms + NULL */ - args = dm_malloc((num_devs + 7) * sizeof(char *)); - if (!args || dm_split_words(p, num_devs + 7, 0, args) < num_devs + 5) - goto out_parse; - - /* FIXME: Code differs from lib/mirror/mirrored.c */ - dev_status_str = args[2 + num_devs]; - log_argc = atoi(args[3 + num_devs]); - log_status_str = args[3 + num_devs + log_argc]; - sync_str = args[num_devs]; + if (!dm_get_status_mirror(state->mem, params, &ms)) { + log_error("Unable to parse mirror status string."); + return ME_IGNORE; + } /* Check for bad mirror devices */ - for (i = 0; i < num_devs; i++) - r = _process_status_code(dev_status_str[i], args[i], - i ? "Secondary mirror" : "Primary mirror", r); + for (i = 0; i < ms->dev_count; ++i) + _process_status_code(ms->devs[i].health, + ms->devs[i].major, ms->devs[i].minor, + i ? "Secondary mirror" : "Primary mirror", &r); /* Check for bad disk log device */ - if (log_argc > 1) - r = _process_status_code(log_status_str[0], - args[2 + num_devs + log_argc], - "Log", r); - - if (r == ME_FAILURE) - goto out; - - p = strstr(sync_str, "/"); - if (p) { - p[0] = '\0'; - if (strcmp(sync_str, p+1)) - r = ME_IGNORE; - p[0] = '/'; - } else - goto out_parse; - -out: - dm_free(args); - return r; + for (i = 0; i < ms->log_count; ++i) + _process_status_code(ms->logs[i].health, + ms->logs[i].major, ms->logs[i].minor, + "Log", &r); -out_parse: - dm_free(args); - syslog(LOG_ERR, "Unable to parse mirror status string."); - return ME_IGNORE; -} + /* Ignore if not in-sync */ + if ((r == ME_INSYNC) && (ms->insync_regions != ms->total_regions)) + r = ME_IGNORE; -static int _remove_failed_devices(const char *device) -{ - int r; -#define CMD_SIZE 256 /* FIXME Use system restriction */ - char cmd_str[CMD_SIZE]; + dm_pool_free(state->mem, ms); - if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), cmd_str, sizeof(cmd_str), - "lvconvert --config devices{ignore_suspended_devices=1} " - "--repair --use-policies", device)) - return -ENAMETOOLONG; /* FIXME Replace with generic error return - reason for failure has already got logged */ + return r; +} - r = dmeventd_lvm2_run(cmd_str); +static int _remove_failed_devices(const char *cmd_lvconvert, const char *device) +{ + /* if repair goes OK, report success even if lvscan has failed */ + if (!dmeventd_lvm2_run_with_lock(cmd_lvconvert)) { + log_error("Repair of mirrored device %s failed.", device); + return 0; + } - syslog(LOG_INFO, "Repair of mirrored device %s %s.", device, - (r == ECMD_PROCESSED) ? "finished successfully" : "failed"); + log_info("Repair of mirrored device %s finished successfully.", device); - return (r == ECMD_PROCESSED) ? 0 : -1; + return 1; } void process_event(struct dm_task *dmt, enum dm_event_mask event __attribute__((unused)), - void **unused __attribute__((unused))) + void **user) { + struct dso_state *state = *user; void *next = NULL; uint64_t start, length; char *target_type = NULL; char *params; const char *device = dm_task_get_name(dmt); - dmeventd_lvm2_lock(); - do { next = dm_get_next_target(dmt, next, &start, &length, &target_type, ¶ms); if (!target_type) { - syslog(LOG_INFO, "%s mapping lost.", device); + log_info("%s mapping lost.", device); continue; } - if (strcmp(target_type, "mirror")) { - syslog(LOG_INFO, "%s has unmirrored portion.", device); + if (strcmp(target_type, TARGET_NAME_MIRROR)) { + log_info("%s has unmirrored portion.", device); continue; } - switch(_get_mirror_event(params)) { + switch(_get_mirror_event(state, params)) { case ME_INSYNC: /* FIXME: all we really know is that this _part_ of the device is in sync Also, this is not an error */ - syslog(LOG_NOTICE, "%s is now in-sync.", device); + log_notice("%s is now in-sync.", device); break; case ME_FAILURE: - syslog(LOG_ERR, "Device failure in %s.", device); - if (_remove_failed_devices(device)) + log_error("Device failure in %s.", device); + if (!_remove_failed_devices(state->cmd_lvconvert, device)) /* FIXME Why are all the error return codes unused? Get rid of them? */ - syslog(LOG_ERR, "Failed to remove faulty devices in %s.", - device); + log_error("Failed to remove faulty devices in %s.", + device); /* Should check before warning user that device is now linear else - syslog(LOG_NOTICE, "%s is now a linear device.\n", - device); + log_notice("%s is now a linear device.", + device); */ break; case ME_IGNORE: break; default: /* FIXME Provide value then! */ - syslog(LOG_INFO, "Unknown event received."); + log_warn("WARNING: %s received unknown event.", device); } } while (next); - - dmeventd_lvm2_unlock(); } int register_device(const char *device, const char *uuid __attribute__((unused)), int major __attribute__((unused)), int minor __attribute__((unused)), - void **unused __attribute__((unused))) + void **user) { - if (!dmeventd_lvm2_init()) - return 0; + struct dso_state *state; - syslog(LOG_INFO, "Monitoring mirror device %s for events.", device); + if (!dmeventd_lvm2_init_with_pool("mirror_state", state)) + goto_bad; + + /* CANNOT use --config as this disables cached content */ + if (!dmeventd_lvm2_command(state->mem, state->cmd_lvconvert, sizeof(state->cmd_lvconvert), + "lvconvert --repair --use-policies", device)) + goto_bad; + + *user = state; + + log_info("Monitoring mirror device %s for events.", device); return 1; +bad: + log_error("Failed to monitor mirror %s.", device); + + if (state) + dmeventd_lvm2_exit_with_pool(state); + + return 0; } int unregister_device(const char *device, const char *uuid __attribute__((unused)), int major __attribute__((unused)), int minor __attribute__((unused)), - void **unused __attribute__((unused))) + void **user) { - syslog(LOG_INFO, "No longer monitoring mirror device %s for events.", - device); - dmeventd_lvm2_exit(); + struct dso_state *state = *user; + + dmeventd_lvm2_exit_with_pool(state); + log_info("No longer monitoring mirror device %s for events.", + device); return 1; } diff --git a/daemons/dmeventd/plugins/raid/Makefile.in b/daemons/dmeventd/plugins/raid/Makefile.in index a6b7788..2082351 100644 --- a/daemons/dmeventd/plugins/raid/Makefile.in +++ b/daemons/dmeventd/plugins/raid/Makefile.in @@ -1,5 +1,5 @@ # -# Copyright (C) 2011 Red Hat, Inc. All rights reserved. +# Copyright (C) 2011-2014 Red Hat, Inc. All rights reserved. # # This file is part of LVM2. # @@ -9,14 +9,14 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA srcdir = @srcdir@ top_srcdir = @top_srcdir@ top_builddir = @top_builddir@ -INCLUDES += -I$(top_srcdir)/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2 -CLDFLAGS += -L$(top_builddir)/tools -L$(top_builddir)/daemons/dmeventd/plugins/lvm2 +CLDFLAGS += -L$(top_builddir)/daemons/dmeventd/plugins/lvm2 +LIBS += -ldevmapper-event-lvm2 SOURCES = dmeventd_raid.c @@ -24,13 +24,8 @@ LIB_NAME = libdevmapper-event-lvm2raid LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX) LIB_VERSION = $(LIB_VERSION_LVM) -CFLOW_LIST = $(SOURCES) -CFLOW_LIST_TARGET = $(LIB_NAME).cflow - include $(top_builddir)/make.tmpl -LIBS += -ldevmapper-event-lvm2 -ldevmapper - install_lvm2: install_dm_plugin install: install_lvm2 diff --git a/daemons/dmeventd/plugins/raid/dmeventd_raid.c b/daemons/dmeventd/plugins/raid/dmeventd_raid.c index a3ecdc1..5be605a 100644 --- a/daemons/dmeventd/plugins/raid/dmeventd_raid.c +++ b/daemons/dmeventd/plugins/raid/dmeventd_raid.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2005-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -9,164 +9,181 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "lib.h" +#include "lib/misc/lib.h" +#include "daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h" +#include "daemons/dmeventd/libdevmapper-event.h" +#include "lib/config/defaults.h" -#include "lvm2cmd.h" -#include "errors.h" -#include "libdevmapper-event.h" -#include "dmeventd_lvm.h" +/* Hold enough elements for the mximum number of RAID images */ +#define RAID_DEVS_ELEMS ((DEFAULT_RAID_MAX_IMAGES + 63) / 64) + +struct dso_state { + struct dm_pool *mem; + char cmd_lvconvert[512]; + uint64_t raid_devs[RAID_DEVS_ELEMS]; + int failed; + int warned; +}; + +DM_EVENT_LOG_FN("raid") -#include <syslog.h> /* FIXME Replace syslog with multilog */ -/* FIXME Missing openlog? */ -/* FIXME Replace most syslogs with log_error() style messages and add complete context. */ /* FIXME Reformat to 80 char lines. */ -/* - * run_repair is a close copy to - * plugins/mirror/dmeventd_mirror.c:_remove_failed_devices() - */ -static int run_repair(const char *device) +static int _process_raid_event(struct dso_state *state, char *params, const char *device) { - int r; -#define CMD_SIZE 256 /* FIXME Use system restriction */ - char cmd_str[CMD_SIZE]; + struct dm_status_raid *status; + const char *d; + int dead = 0, r = 1; + uint32_t dev; - if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), cmd_str, sizeof(cmd_str), - "lvconvert --config devices{ignore_suspended_devices=1} " - "--repair --use-policies", device)) - return -1; - - r = dmeventd_lvm2_run(cmd_str); + if (!dm_get_status_raid(state->mem, params, &status)) { + log_error("Failed to process status line for %s.", device); + return 0; + } - if (r != ECMD_PROCESSED) - syslog(LOG_INFO, "Repair of RAID device %s failed.", device); + d = status->dev_health; + while ((d = strchr(d, 'D'))) { + dev = (uint32_t)(d - status->dev_health); - return (r == ECMD_PROCESSED) ? 0 : -1; -} + if (!(state->raid_devs[dev / 64] & (UINT64_C(1) << (dev % 64)))) { + state->raid_devs[dev / 64] |= (UINT64_C(1) << (dev % 64)); + log_warn("WARNING: Device #%u of %s array, %s, has failed.", + dev, status->raid_type, device); + } -static int _process_raid_event(char *params, const char *device) -{ - int i, n, failure = 0; - char *p, *a[4]; - char *raid_type; - char *num_devices; - char *health_chars; - char *resync_ratio; + d++; + dead = 1; + } /* - * RAID parms: <raid_type> <#raid_disks> \ - * <health chars> <resync ratio> + * if we are converting from non-RAID to RAID (e.g. linear -> raid1) + * and too many original devices die, such that we cannot continue + * the "recover" operation, the sync action will go to "idle", the + * unsynced devs will remain at 'a', and the original devices will + * NOT SWITCH TO 'D', but will remain at 'A' - hoping to be revived. + * + * This is simply the way the kernel works... */ - if (!dm_split_words(params, 4, 0, a)) { - syslog(LOG_ERR, "Failed to process status line for %s\n", - device); - return -EINVAL; - } - raid_type = a[0]; - num_devices = a[1]; - health_chars = a[2]; - resync_ratio = a[3]; - - if (!(n = atoi(num_devices))) { - syslog(LOG_ERR, "Failed to parse number of devices for %s: %s", - device, num_devices); - return -EINVAL; + if (!strcmp(status->sync_action, "idle") && + (status->dev_health[0] == 'a') && + (status->insync_regions < status->total_regions)) { + log_error("Primary sources for new RAID, %s, have failed.", + device); + dead = 1; /* run it through LVM repair */ } - for (i = 0; i < n; i++) { - switch (health_chars[i]) { - case 'A': - /* Device is 'A'live and well */ - case 'a': - /* Device is 'a'live, but not yet in-sync */ - break; - case 'D': - syslog(LOG_ERR, - "Device #%d of %s array, %s, has failed.", - i, raid_type, device); - failure++; - break; - default: - /* Unhandled character returned from kernel */ - break; + if (dead) { + /* + * Use the first event to run a repair ignoring any additional ones. + * + * We presume lvconvert to do pre-repair + * checks to avoid bloat in this plugin. + */ + if (!state->warned && status->insync_regions < status->total_regions) { + state->warned = 1; + log_warn("WARNING: waiting for resynchronization to finish " + "before initiating repair on RAID device %s.", device); + /* Fall through to allow lvconvert to run. */ } - if (failure) - return run_repair(device); - } - p = strstr(resync_ratio, "/"); - if (!p) { - syslog(LOG_ERR, "Failed to parse resync_ratio for %s: %s", - device, resync_ratio); - return -EINVAL; + if (state->failed) + goto out; /* already reported */ + + state->failed = 1; + + /* if repair goes OK, report success even if lvscan has failed */ + if (!dmeventd_lvm2_run_with_lock(state->cmd_lvconvert)) { + log_error("Repair of RAID device %s failed.", device); + r = 0; + } + } else { + state->failed = 0; + if (status->insync_regions == status->total_regions) + memset(&state->raid_devs, 0, sizeof(state->raid_devs)); + log_info("%s array, %s, is %s in-sync.", + status->raid_type, device, + (status->insync_regions == status->total_regions) ? "now" : "not"); } - p[0] = '\0'; - syslog(LOG_INFO, "%s array, %s, is %s in-sync.", - raid_type, device, strcmp(resync_ratio, p+1) ? "not" : "now"); +out: + dm_pool_free(state->mem, status); - return 0; + return r; } void process_event(struct dm_task *dmt, enum dm_event_mask event __attribute__((unused)), - void **unused __attribute__((unused))) + void **user) { + struct dso_state *state = *user; void *next = NULL; uint64_t start, length; char *target_type = NULL; char *params; const char *device = dm_task_get_name(dmt); - dmeventd_lvm2_lock(); - do { next = dm_get_next_target(dmt, next, &start, &length, &target_type, ¶ms); if (!target_type) { - syslog(LOG_INFO, "%s mapping lost.", device); + log_info("%s mapping lost.", device); continue; } if (strcmp(target_type, "raid")) { - syslog(LOG_INFO, "%s has non-raid portion.", device); + log_info("%s has non-raid portion.", device); continue; } - if (_process_raid_event(params, device)) - syslog(LOG_ERR, "Failed to process event for %s", - device); + if (!_process_raid_event(state, params, device)) + log_error("Failed to process event for %s.", + device); } while (next); - - dmeventd_lvm2_unlock(); } int register_device(const char *device, const char *uuid __attribute__((unused)), int major __attribute__((unused)), int minor __attribute__((unused)), - void **unused __attribute__((unused))) + void **user) { - if (!dmeventd_lvm2_init()) - return 0; + struct dso_state *state; + + if (!dmeventd_lvm2_init_with_pool("raid_state", state)) + goto_bad; - syslog(LOG_INFO, "Monitoring RAID device %s for events.", device); + if (!dmeventd_lvm2_command(state->mem, state->cmd_lvconvert, sizeof(state->cmd_lvconvert), + "lvconvert --repair --use-policies", device)) + goto_bad; + + *user = state; + + log_info("Monitoring RAID device %s for events.", device); return 1; +bad: + log_error("Failed to monitor RAID %s.", device); + + if (state) + dmeventd_lvm2_exit_with_pool(state); + + return 0; } int unregister_device(const char *device, const char *uuid __attribute__((unused)), int major __attribute__((unused)), int minor __attribute__((unused)), - void **unused __attribute__((unused))) + void **user) { - syslog(LOG_INFO, "No longer monitoring RAID device %s for events.", - device); - dmeventd_lvm2_exit(); + struct dso_state *state = *user; + + dmeventd_lvm2_exit_with_pool(state); + log_info("No longer monitoring RAID device %s for events.", + device); return 1; } diff --git a/daemons/dmeventd/plugins/snapshot/Makefile.in b/daemons/dmeventd/plugins/snapshot/Makefile.in index a4cff15..c5f71b8 100644 --- a/daemons/dmeventd/plugins/snapshot/Makefile.in +++ b/daemons/dmeventd/plugins/snapshot/Makefile.in @@ -1,6 +1,6 @@ # # Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. +# Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved. # # This file is part of the LVM2. # @@ -10,14 +10,14 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA srcdir = @srcdir@ top_srcdir = @top_srcdir@ top_builddir = @top_builddir@ -INCLUDES += -I$(top_srcdir)/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2 -CLDFLAGS += -L$(top_builddir)/tools -L$(top_builddir)/daemons/dmeventd/plugins/lvm2 +CLDFLAGS += -L$(top_builddir)/daemons/dmeventd/plugins/lvm2 +LIBS += -ldevmapper-event-lvm2 SOURCES = dmeventd_snapshot.c @@ -26,8 +26,6 @@ LIB_VERSION = $(LIB_VERSION_LVM) include $(top_builddir)/make.tmpl -LIBS += -ldevmapper-event-lvm2 -ldevmapper $(DAEMON_LIBS) - install_lvm2: install_dm_plugin install: install_lvm2 diff --git a/daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c b/daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c index 205218a..9c49a72 100644 --- a/daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c +++ b/daemons/dmeventd/plugins/snapshot/dmeventd_snapshot.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2007-2015 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -9,74 +9,35 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "lib.h" - -#include "lvm2cmd.h" -#include "errors.h" -#include "libdevmapper-event.h" -#include "dmeventd_lvm.h" +#include "lib/misc/lib.h" +#include "daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h" +#include "daemons/dmeventd/libdevmapper-event.h" +#include <sys/sysmacros.h> #include <sys/wait.h> -#include <syslog.h> /* FIXME Replace syslog with multilog */ -/* FIXME Missing openlog? */ +#include <stdarg.h> +#include <pthread.h> /* First warning when snapshot is 80% full. */ -#define WARNING_THRESH 80 +#define WARNING_THRESH (DM_PERCENT_1 * 80) /* Run a check every 5%. */ -#define CHECK_STEP 5 +#define CHECK_STEP (DM_PERCENT_1 * 5) /* Do not bother checking snapshots less than 50% full. */ -#define CHECK_MINIMUM 50 +#define CHECK_MINIMUM (DM_PERCENT_1 * 50) #define UMOUNT_COMMAND "/bin/umount" -struct snap_status { - int invalid; - int used; - int max; -}; - struct dso_state { - int percent_check; - int known_size; - char cmd_str[1024]; + struct dm_pool *mem; + dm_percent_t percent_check; + uint64_t known_size; + char cmd_lvextend[512]; }; -/* FIXME possibly reconcile this with target_percent when we gain - access to regular LVM library here. */ -static void _parse_snapshot_params(char *params, struct snap_status *status) -{ - char *p; - /* - * xx/xx -- fractions used/max - * Invalid -- snapshot invalidated - * Unknown -- status unknown - */ - status->used = status->max = 0; - - if (!strncmp(params, "Invalid", 7)) { - status->invalid = 1; - return; - } - - /* - * When we return without setting non-zero max, the parent is - * responsible for reporting errors. - */ - if (!strncmp(params, "Unknown", 7)) - return; - - if (!(p = strstr(params, "/"))) - return; - - *p = '\0'; - p++; - - status->used = atoi(params); - status->max = atoi(p); -} +DM_EVENT_LOG_FN("snap") static int _run(const char *cmd, ...) { @@ -102,7 +63,7 @@ static int _run(const char *cmd, ...) va_end(ap); execvp(cmd, (char **)argv); - syslog(LOG_ERR, "Failed to execute %s: %s.\n", cmd, strerror(errno)); + log_sys_error("exec", cmd); exit(127); } @@ -121,8 +82,44 @@ static int _run(const char *cmd, ...) static int _extend(const char *cmd) { - return dmeventd_lvm2_run(cmd) == ECMD_PROCESSED; + log_debug("Extending snapshot via %s.", cmd); + return dmeventd_lvm2_run_with_lock(cmd); +} + +#ifdef SNAPSHOT_REMOVE +/* Remove invalid snapshot from dm-table */ +/* Experimental for now and not used by default */ +static int _remove(const char *uuid) +{ + int r = 1; + uint32_t cookie = 0; + struct dm_task *dmt; + + if (!(dmt = dm_task_create(DM_DEVICE_REMOVE))) + return 0; + + if (!dm_task_set_uuid(dmt, uuid)) { + r = 0; + goto_out; + } + + dm_task_retry_remove(dmt); + + if (!dm_task_set_cookie(dmt, &cookie, 0)) { + r = 0; + goto_out; + } + + if (!dm_task_run(dmt)) { + r = 0; + goto_out; + } +out: + dm_task_destroy(dmt); + + return r; } +#endif /* SNAPSHOT_REMOVE */ static void _umount(const char *device, int major, int minor) { @@ -130,9 +127,11 @@ static void _umount(const char *device, int major, int minor) char buffer[4096]; char *words[3]; struct stat st; + const char procmounts[] = "/proc/mounts"; - if (!(mounts = fopen("/proc/mounts", "r"))) { - syslog(LOG_ERR, "Could not read /proc/mounts. Not umounting %s.\n", device); + if (!(mounts = fopen(procmounts, "r"))) { + log_sys_error("fopen", procmounts); + log_error("Not umounting %s.", device); return; } @@ -150,117 +149,128 @@ static void _umount(const char *device, int major, int minor) continue; /* can't stat, skip this one */ if (S_ISBLK(st.st_mode) && - major(st.st_rdev) == major && - minor(st.st_rdev) == minor) { - syslog(LOG_ERR, "Unmounting invalid snapshot %s from %s.\n", device, words[1]); - if (!_run(UMOUNT_COMMAND, "-fl", words[1], NULL)) - syslog(LOG_ERR, "Failed to umount snapshot %s from %s: %s.\n", - device, words[1], strerror(errno)); + (int) major(st.st_rdev) == major && + (int) minor(st.st_rdev) == minor) { + log_error("Unmounting invalid snapshot %s from %s.", device, words[1]); + if (!_run(UMOUNT_COMMAND, "-fl", words[1], NULL)) + log_error("Failed to umount snapshot %s from %s: %s.", + device, words[1], strerror(errno)); } } if (fclose(mounts)) - syslog(LOG_ERR, "Failed to close /proc/mounts.\n"); + log_sys_error("close", procmounts); } void process_event(struct dm_task *dmt, enum dm_event_mask event __attribute__((unused)), - void **private) + void **user) { + struct dso_state *state = *user; void *next = NULL; uint64_t start, length; char *target_type = NULL; char *params; - struct snap_status status = { 0 }; + struct dm_status_snapshot *status = NULL; const char *device = dm_task_get_name(dmt); int percent; - struct dso_state *state = *private; + struct dm_info info; + int ret; /* No longer monitoring, waiting for remove */ if (!state->percent_check) return; - dmeventd_lvm2_lock(); - dm_get_next_target(dmt, next, &start, &length, &target_type, ¶ms); - if (!target_type) - goto out; - - _parse_snapshot_params(params, &status); - - if (status.invalid) { - struct dm_info info; - if (dm_task_get_info(dmt, &info)) { - dmeventd_lvm2_unlock(); - _umount(device, info.major, info.minor); - return; - } /* else; too bad, but this is best-effort thing... */ + if (!target_type || strcmp(target_type, "snapshot")) { + log_error("Target %s is not snapshot.", target_type); + return; } - /* Snapshot size had changed. Clear the threshold. */ - if (state->known_size != status.max) { - state->percent_check = CHECK_MINIMUM; - state->known_size = status.max; + if (!dm_get_status_snapshot(state->mem, params, &status)) { + log_error("Cannot parse snapshot %s state: %s.", device, params); + return; } /* * If the snapshot has been invalidated or we failed to parse * the status string. Report the full status string to syslog. */ - if (status.invalid || !status.max) { - syslog(LOG_ERR, "Snapshot %s changed state to: %s\n", device, params); + if (status->invalid || status->overflow || !status->total_sectors) { + log_warn("WARNING: Snapshot %s changed state to: %s and should be removed.", + device, params); state->percent_check = 0; + if (dm_task_get_info(dmt, &info)) + _umount(device, info.major, info.minor); +#ifdef SNAPSHOT_REMOVE + /* Maybe configurable ? */ + _remove(dm_task_get_uuid(dmt)); +#endif + if ((ret = pthread_kill(pthread_self(), SIGALRM)) && (ret != ESRCH)) + log_sys_error("pthread_kill", "self"); goto out; } - percent = 100 * status.used / status.max; + if (length <= (status->used_sectors - status->metadata_sectors)) { + /* TODO eventually recognize earlier when room is enough */ + log_info("Dropping monitoring of fully provisioned snapshot %s.", + device); + if ((ret = pthread_kill(pthread_self(), SIGALRM)) && (ret != ESRCH)) + log_sys_error("pthread_kill", "self"); + goto out; + } + + /* Snapshot size had changed. Clear the threshold. */ + if (state->known_size != status->total_sectors) { + state->percent_check = CHECK_MINIMUM; + state->known_size = status->total_sectors; + } + + percent = dm_make_percent(status->used_sectors, status->total_sectors); if (percent >= state->percent_check) { /* Usage has raised more than CHECK_STEP since the last time. Run actions. */ state->percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP; if (percent >= WARNING_THRESH) /* Print a warning to syslog. */ - syslog(LOG_WARNING, "Snapshot %s is now %i%% full.\n", device, percent); + log_warn("WARNING: Snapshot %s is now %.2f%% full.", + device, dm_percent_to_round_float(percent, 2)); + /* Try to extend the snapshot, in accord with user-set policies */ - if (!_extend(state->cmd_str)) - syslog(LOG_ERR, "Failed to extend snapshot %s.\n", device); + if (!_extend(state->cmd_lvextend)) + log_error("Failed to extend snapshot %s.", device); } - out: - dmeventd_lvm2_unlock(); + dm_pool_free(state->mem, status); } int register_device(const char *device, const char *uuid __attribute__((unused)), int major __attribute__((unused)), int minor __attribute__((unused)), - void **private) + void **user) { struct dso_state *state; - if (!dmeventd_lvm2_init()) - goto out; - - if (!(state = dm_zalloc(sizeof(*state)))) - goto bad; + if (!dmeventd_lvm2_init_with_pool("snapshot_state", state)) + goto_bad; - if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), - state->cmd_str, sizeof(state->cmd_str), + if (!dmeventd_lvm2_command(state->mem, state->cmd_lvextend, + sizeof(state->cmd_lvextend), "lvextend --use-policies", device)) - goto bad; + goto_bad; state->percent_check = CHECK_MINIMUM; - state->known_size = 0; - *private = state; + *user = state; - syslog(LOG_INFO, "Monitoring snapshot %s\n", device); + log_info("Monitoring snapshot %s.", device); return 1; bad: - dm_free(state); - dmeventd_lvm2_exit(); -out: - syslog(LOG_ERR, "Failed to monitor snapshot %s.\n", device); + log_error("Failed to monitor snapshot %s.", device); + + if (state) + dmeventd_lvm2_exit_with_pool(state); return 0; } @@ -269,13 +279,12 @@ int unregister_device(const char *device, const char *uuid __attribute__((unused)), int major __attribute__((unused)), int minor __attribute__((unused)), - void **private) + void **user) { - struct dso_state *state = *private; + struct dso_state *state = *user; - syslog(LOG_INFO, "No longer monitoring snapshot %s\n", device); - dm_free(state); - dmeventd_lvm2_exit(); + dmeventd_lvm2_exit_with_pool(state); + log_info("No longer monitoring snapshot %s.", device); return 1; } diff --git a/daemons/dmeventd/plugins/thin/Makefile.in b/daemons/dmeventd/plugins/thin/Makefile.in index e964ab5..b98e426 100644 --- a/daemons/dmeventd/plugins/thin/Makefile.in +++ b/daemons/dmeventd/plugins/thin/Makefile.in @@ -1,5 +1,5 @@ # -# Copyright (C) 2011 Red Hat, Inc. All rights reserved. +# Copyright (C) 2011-2014 Red Hat, Inc. All rights reserved. # # This file is part of LVM2. # @@ -9,14 +9,14 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA srcdir = @srcdir@ top_srcdir = @top_srcdir@ top_builddir = @top_builddir@ -INCLUDES += -I$(top_srcdir)/tools -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2 -CLDFLAGS += -L$(top_builddir)/tools -L$(top_builddir)/daemons/dmeventd/plugins/lvm2 +CLDFLAGS += -L$(top_builddir)/daemons/dmeventd/plugins/lvm2 +LIBS += -ldevmapper-event-lvm2 SOURCES = dmeventd_thin.c @@ -24,13 +24,8 @@ LIB_NAME = libdevmapper-event-lvm2thin LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX) LIB_VERSION = $(LIB_VERSION_LVM) -CFLOW_LIST = $(SOURCES) -CFLOW_LIST_TARGET = $(LIB_NAME).cflow - include $(top_builddir)/make.tmpl -LIBS += -ldevmapper-event-lvm2 -ldevmapper - install_lvm2: install_dm_plugin install: install_lvm2 diff --git a/daemons/dmeventd/plugins/thin/dmeventd_thin.c b/daemons/dmeventd/plugins/thin/dmeventd_thin.c index a1af4c0..5ff5bb8 100644 --- a/daemons/dmeventd/plugins/thin/dmeventd_thin.c +++ b/daemons/dmeventd/plugins/thin/dmeventd_thin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2012 Red Hat, Inc. All rights reserved. + * Copyright (C) 2011-2017 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -9,424 +9,392 @@ * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "lib.h" - -#include "lvm2cmd.h" -#include "errors.h" -#include "libdevmapper-event.h" -#include "dmeventd_lvm.h" +#include "lib/misc/lib.h" +#include "daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h" +#include "daemons/dmeventd/libdevmapper-event.h" #include <sys/wait.h> -#include <syslog.h> /* FIXME Replace syslog with multilog */ -/* FIXME Missing openlog? */ +#include <stdarg.h> -/* First warning when thin is 80% full. */ -#define WARNING_THRESH 80 +/* TODO - move this mountinfo code into library to be reusable */ +#ifdef __linux__ +# include "libdm/misc/kdev_t.h" +#else +# define MAJOR(x) major((x)) +# define MINOR(x) minor((x)) +#endif + +/* First warning when thin data or metadata is 80% full. */ +#define WARNING_THRESH (DM_PERCENT_1 * 80) +/* Umount thin LVs when thin data or metadata LV is >= + * and lvextend --use-policies has failed. */ +#define UMOUNT_THRESH (DM_PERCENT_1 * 95) /* Run a check every 5%. */ -#define CHECK_STEP 5 -/* Do not bother checking thins less than 50% full. */ -#define CHECK_MINIMUM 50 +#define CHECK_STEP (DM_PERCENT_1 * 5) +/* Do not bother checking thin data or metadata is less than 50% full. */ +#define CHECK_MINIMUM (DM_PERCENT_1 * 50) #define UMOUNT_COMMAND "/bin/umount" +#define MAX_FAILS (256) /* ~42 mins between cmd call retry with 10s delay */ + #define THIN_DEBUG 0 struct dso_state { struct dm_pool *mem; int metadata_percent_check; + int metadata_percent; int data_percent_check; + int data_percent; uint64_t known_metadata_size; uint64_t known_data_size; - char cmd_str[1024]; + unsigned fails; + unsigned max_fails; + int restore_sigset; + sigset_t old_sigset; + pid_t pid; + char *argv[3]; + char *cmd_str; }; +DM_EVENT_LOG_FN("thin") -/* TODO - move this mountinfo code into library to be reusable */ -#ifdef linux -# include "kdev_t.h" -#else -# define MAJOR(x) major((x)) -# define MINOR(x) minor((x)) -# define MKDEV(x,y) makedev((x),(y)) -#endif - -/* Macros to make string defines */ -#define TO_STRING_EXP(A) #A -#define TO_STRING(A) TO_STRING_EXP(A) - -static int _is_octal(int a) -{ - return (((a) & ~7) == '0'); -} - -/* Convert mangled mountinfo into normal ASCII string */ -static void _unmangle_mountinfo_string(const char *src, char *buf) +static int _run_command(struct dso_state *state) { - if (!src) - return; - - while (*src) { - if ((*src == '\\') && - _is_octal(src[1]) && _is_octal(src[2]) && _is_octal(src[3])) { - *buf++ = 64 * (src[1] & 7) + 8 * (src[2] & 7) + (src[3] & 7); - src += 4; - } else - *buf++ = *src++; + char val[16]; + int i; + + /* Mark for possible lvm2 command we are running from dmeventd + * lvm2 will not try to talk back to dmeventd while processing it */ + (void) setenv("LVM_RUN_BY_DMEVENTD", "1", 1); + + if (state->data_percent) { + /* Prepare some known data to env vars for easy use */ + if (dm_snprintf(val, sizeof(val), "%d", + state->data_percent / DM_PERCENT_1) != -1) + (void) setenv("DMEVENTD_THIN_POOL_DATA", val, 1); + if (dm_snprintf(val, sizeof(val), "%d", + state->metadata_percent / DM_PERCENT_1) != -1) + (void) setenv("DMEVENTD_THIN_POOL_METADATA", val, 1); + } else { + /* For an error event it's for a user to check status and decide */ + log_debug("Error event processing."); } - *buf = '\0'; -} - -/* Parse one line of mountinfo */ -static int _parse_mountinfo_line(const char *line, unsigned *maj, unsigned *min, char *buf) -{ - char root[PATH_MAX + 1]; - char target[PATH_MAX + 1]; - /* TODO: maybe detect availability of %ms glib support ? */ - if (sscanf(line, "%*u %*u %u:%u %" TO_STRING(PATH_MAX) - "s %" TO_STRING(PATH_MAX) "s", - maj, min, root, target) < 4) + log_verbose("Executing command: %s", state->cmd_str); + + /* TODO: + * Support parallel run of 'task' and it's waitpid maintainence + * ATM we can't handle signaling of SIGALRM + * as signalling is not allowed while 'process_event()' is running + */ + if (!(state->pid = fork())) { + /* child */ + (void) close(0); + for (i = 3; i < 255; ++i) (void) close(i); + execvp(state->argv[0], state->argv); + _exit(errno); + } else if (state->pid == -1) { + log_error("Can't fork command %s.", state->cmd_str); + state->fails = 1; return 0; - - _unmangle_mountinfo_string(target, buf); - -#if THIN_DEBUG - syslog(LOG_DEBUG, "Mounted %u:%u %s", *maj, *min, buf); -#endif + } return 1; } -/* Get dependencies for device, and try to find matching device */ -static int _has_deps(const char *name, int tp_major, int tp_minor, int *dev_minor) +static int _use_policy(struct dm_task *dmt, struct dso_state *state) { - struct dm_task *dmt; - const struct dm_deps *deps; - struct dm_info info; - int major, minor; - int r = 0; - - if (!(dmt = dm_task_create(DM_DEVICE_DEPS))) - return 0; - - if (!dm_task_set_name(dmt, name)) - goto out; - - if (!dm_task_no_open_count(dmt)) - goto out; - - if (!dm_task_run(dmt)) - goto out; - - if (!dm_task_get_info(dmt, &info)) - goto out; - - if (!(deps = dm_task_get_deps(dmt))) - goto out; - - if (!info.exists || deps->count != 1) - goto out; - - major = (int) MAJOR(deps->device[0]); - minor = (int) MINOR(deps->device[0]); - if ((major != tp_major) || (minor != tp_minor)) - goto out; - - *dev_minor = info.minor; - #if THIN_DEBUG - { - char dev_name[PATH_MAX]; - if (dm_device_get_name(major, minor, 0, dev_name, sizeof(dev_name))) - syslog(LOG_DEBUG, "Found %s (%u:%u) depends on %s", - name, major, *dev_minor, dev_name); - } + log_debug("dmeventd executes: %s.", state->cmd_str); #endif - r = 1; -out: - dm_task_destroy(dmt); - - return r; -} - -/* Get all active devices */ -static int _find_all_devs(dm_bitset_t bs, int tp_major, int tp_minor) -{ - struct dm_task *dmt; - struct dm_names *names; - unsigned next = 0; - int minor, r = 1; + if (state->argv[0]) + return _run_command(state); - if (!(dmt = dm_task_create(DM_DEVICE_LIST))) + if (!dmeventd_lvm2_run_with_lock(state->cmd_str)) { + log_error("Failed command for %s.", dm_task_get_name(dmt)); + state->fails = 1; return 0; - - if (!dm_task_run(dmt)) { - r = 0; - goto out; - } - - if (!(names = dm_task_get_names(dmt))) { - r = 0; - goto out; - } - - if (!names->dev) - goto out; - - do { - names = (struct dm_names *)((char *) names + next); - if (_has_deps(names->name, tp_major, tp_minor, &minor)) - dm_bit_set(bs, minor); - next = names->next; - } while (next); - -out: - dm_task_destroy(dmt); - - return r; -} - -static int _extend(struct dso_state *state) -{ -#if THIN_DEBUG - syslog(LOG_INFO, "dmeventd executes: %s.\n", state->cmd_str); -#endif - return (dmeventd_lvm2_run(state->cmd_str) == ECMD_PROCESSED); -} - -static int _run(const char *cmd, ...) -{ - va_list ap; - int argc = 1; /* for argv[0], i.e. cmd */ - int i = 0; - const char **argv; - pid_t pid = fork(); - int status; - - if (pid == 0) { /* child */ - va_start(ap, cmd); - while (va_arg(ap, const char *)) - ++argc; - va_end(ap); - - /* + 1 for the terminating NULL */ - argv = alloca(sizeof(const char *) * (argc + 1)); - - argv[0] = cmd; - va_start(ap, cmd); - while ((argv[++i] = va_arg(ap, const char *))); - va_end(ap); - - execvp(cmd, (char **)argv); - syslog(LOG_ERR, "Failed to execute %s: %s.\n", cmd, strerror(errno)); - exit(127); - } - - if (pid > 0) { /* parent */ - if (waitpid(pid, &status, 0) != pid) - return 0; /* waitpid failed */ - if (!WIFEXITED(status) || WEXITSTATUS(status)) - return 0; /* the child failed */ } - if (pid < 0) - return 0; /* fork failed */ + state->fails = 0; - return 1; /* all good */ + return 1; } -/* - * Find all thin pool users and try to umount them. - * TODO: work with read-only thin pool support - */ -static void _umount(struct dm_task *dmt, const char *device) +/* Check if executed command has finished + * Only 1 command may run */ +static int _wait_for_pid(struct dso_state *state) { - static const char mountinfo[] = "/proc/self/mountinfo"; - static const size_t MINORS = 4096; - FILE *minfo; - char buffer[4096]; - char target[PATH_MAX]; - struct dm_info info; - unsigned maj, min; - dm_bitset_t minors; /* Bitset for active thin pool minors */ - - if (!dm_task_get_info(dmt, &info)) - return; + int status = 0; - dmeventd_lvm2_unlock(); + if (state->pid == -1) + return 1; - if (!(minors = dm_bitset_create(NULL, MINORS))) { - syslog(LOG_ERR, "Failed to allocate bitset. Not unmounting %s.\n", device); - goto out; - } - - if (!(minfo = fopen(mountinfo, "r"))) { - syslog(LOG_ERR, "Could not read %s. Not umounting %s.\n", mountinfo, device); - goto out; - } + if (!waitpid(state->pid, &status, WNOHANG)) + return 0; - if (!_find_all_devs(minors, info.major, info.minor)) { - syslog(LOG_ERR, "Failed to detect mounted volumes for %s.\n", device); - goto out; + /* Wait for finish */ + if (WIFEXITED(status)) { + log_verbose("Child %d exited with status %d.", + state->pid, WEXITSTATUS(status)); + state->fails = WEXITSTATUS(status) ? 1 : 0; + } else { + if (WIFSIGNALED(status)) + log_verbose("Child %d was terminated with status %d.", + state->pid, WTERMSIG(status)); + state->fails = 1; } - while (!feof(minfo)) { - /* read mountinfo line */ - if (!fgets(buffer, sizeof(buffer), minfo)) - break; /* eof, likely */ - - if (_parse_mountinfo_line(buffer, &maj, &min, target) && - (maj == info.major) && dm_bit(minors, min)) { - syslog(LOG_INFO, "Unmounting thin volume %s from %s.\n", - device, target); - if (!_run(UMOUNT_COMMAND, "-fl", target, NULL)) - syslog(LOG_ERR, "Failed to umount thin %s from %s: %s.\n", - device, target, strerror(errno)); - } - } + state->pid = -1; - if (fclose(minfo)) - syslog(LOG_ERR, "Failed to close %s\n", mountinfo); - - dm_bitset_destroy(minors); -out: - dmeventd_lvm2_lock(); + return 1; } void process_event(struct dm_task *dmt, enum dm_event_mask event __attribute__((unused)), - void **private) + void **user) { const char *device = dm_task_get_name(dmt); - int percent; - struct dso_state *state = *private; + struct dso_state *state = *user; struct dm_status_thin_pool *tps = NULL; void *next = NULL; uint64_t start, length; char *target_type = NULL; char *params; + int needs_policy = 0; + struct dm_task *new_dmt = NULL; -#if 0 - /* No longer monitoring, waiting for remove */ - if (!state->meta_percent_check && !state->data_percent_check) - return; +#if THIN_DEBUG + log_debug("Watch for tp-data:%.2f%% tp-metadata:%.2f%%.", + dm_percent_to_round_float(state->data_percent_check, 2), + dm_percent_to_round_float(state->metadata_percent_check, 2)); #endif - dmeventd_lvm2_lock(); + if (!_wait_for_pid(state)) { + log_warn("WARNING: Skipping event, child %d is still running (%s).", + state->pid, state->cmd_str); + return; + } + + if (event & DM_EVENT_DEVICE_ERROR) { + /* Error -> no need to check and do instant resize */ + state->data_percent = state->metadata_percent = 0; + if (_use_policy(dmt, state)) + goto out; + + stack; + + /* + * Rather update oldish status + * since after 'command' processing + * percentage info could have changed a lot. + * If we would get above UMOUNT_THRESH + * we would wait for next sigalarm. + */ + if (!(new_dmt = dm_task_create(DM_DEVICE_STATUS))) + goto_out; + + if (!dm_task_set_uuid(new_dmt, dm_task_get_uuid(dmt))) + goto_out; + + /* Non-blocking status read */ + if (!dm_task_no_flush(new_dmt)) + log_warn("WARNING: Can't set no_flush for dm status."); + + if (!dm_task_run(new_dmt)) + goto_out; + + dmt = new_dmt; + } dm_get_next_target(dmt, next, &start, &length, &target_type, ¶ms); if (!target_type || (strcmp(target_type, "thin-pool") != 0)) { - syslog(LOG_ERR, "Invalid target type.\n"); + log_error("Invalid target type."); goto out; } if (!dm_get_status_thin_pool(state->mem, params, &tps)) { - syslog(LOG_ERR, "Failed to parse status.\n"); - _umount(dmt, device); + log_error("Failed to parse status."); goto out; } #if THIN_DEBUG - syslog(LOG_INFO, "%p: Got status %" PRIu64 " / %" PRIu64 - " %" PRIu64 " / %" PRIu64 ".\n", state, - tps->used_metadata_blocks, tps->total_metadata_blocks, - tps->used_data_blocks, tps->total_data_blocks); + log_debug("Thin pool status " FMTu64 "/" FMTu64 " " + FMTu64 "/" FMTu64 ".", + tps->used_metadata_blocks, tps->total_metadata_blocks, + tps->used_data_blocks, tps->total_data_blocks); #endif /* Thin pool size had changed. Clear the threshold. */ if (state->known_metadata_size != tps->total_metadata_blocks) { state->metadata_percent_check = CHECK_MINIMUM; state->known_metadata_size = tps->total_metadata_blocks; + state->fails = 0; } if (state->known_data_size != tps->total_data_blocks) { state->data_percent_check = CHECK_MINIMUM; state->known_data_size = tps->total_data_blocks; + state->fails = 0; } - percent = 100 * tps->used_metadata_blocks / tps->total_metadata_blocks; - if (percent >= state->metadata_percent_check) { - /* - * Usage has raised more than CHECK_STEP since the last - * time. Run actions. - */ - state->metadata_percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP; - - /* FIXME: extension of metadata needs to be written! */ - if (percent >= WARNING_THRESH) /* Print a warning to syslog. */ - syslog(LOG_WARNING, "Thin metadata %s is now %i%% full.\n", - device, percent); - /* Try to extend the metadata, in accord with user-set policies */ - if (!_extend(state)) { - syslog(LOG_ERR, "Failed to extend thin metadata %s.\n", - device); - _umount(dmt, device); - } - /* FIXME: hmm READ-ONLY switch should happen in error path */ - } + /* + * Trigger action when threshold boundary is exceeded. + * Report 80% threshold warning when it's used above 80%. + * Only 100% is exception as it cannot be surpased so policy + * action is called for: >50%, >55% ... >95%, 100% + */ + state->metadata_percent = dm_make_percent(tps->used_metadata_blocks, tps->total_metadata_blocks); + if ((state->metadata_percent > WARNING_THRESH) && + (state->metadata_percent > state->metadata_percent_check)) + log_warn("WARNING: Thin pool %s metadata is now %.2f%% full.", + device, dm_percent_to_round_float(state->metadata_percent, 2)); + if (state->metadata_percent > CHECK_MINIMUM) { + /* Run action when usage raised more than CHECK_STEP since the last time */ + if (state->metadata_percent > state->metadata_percent_check) + needs_policy = 1; + state->metadata_percent_check = (state->metadata_percent / CHECK_STEP + 1) * CHECK_STEP; + if (state->metadata_percent_check == DM_PERCENT_100) + state->metadata_percent_check--; /* Can't get bigger then 100% */ + } else + state->metadata_percent_check = CHECK_MINIMUM; - percent = 100 * tps->used_data_blocks / tps->total_data_blocks; - if (percent >= state->data_percent_check) { - /* - * Usage has raised more than CHECK_STEP since - * the last time. Run actions. - */ - state->data_percent_check = (percent / CHECK_STEP) * CHECK_STEP + CHECK_STEP; - - if (percent >= WARNING_THRESH) /* Print a warning to syslog. */ - syslog(LOG_WARNING, "Thin %s is now %i%% full.\n", device, percent); - /* Try to extend the thin data, in accord with user-set policies */ - if (!_extend(state)) { - syslog(LOG_ERR, "Failed to extend thin %s.\n", device); - state->data_percent_check = 0; - _umount(dmt, device); + state->data_percent = dm_make_percent(tps->used_data_blocks, tps->total_data_blocks); + if ((state->data_percent > WARNING_THRESH) && + (state->data_percent > state->data_percent_check)) + log_warn("WARNING: Thin pool %s data is now %.2f%% full.", + device, dm_percent_to_round_float(state->data_percent, 2)); + if (state->data_percent > CHECK_MINIMUM) { + /* Run action when usage raised more than CHECK_STEP since the last time */ + if (state->data_percent > state->data_percent_check) + needs_policy = 1; + state->data_percent_check = (state->data_percent / CHECK_STEP + 1) * CHECK_STEP; + if (state->data_percent_check == DM_PERCENT_100) + state->data_percent_check--; /* Can't get bigger then 100% */ + } else + state->data_percent_check = CHECK_MINIMUM; + + /* Reduce number of _use_policy() calls by power-of-2 factor till frequency of MAX_FAILS is reached. + * Avoids too high number of error retries, yet shows some status messages in log regularly. + * i.e. PV could have been pvmoved and VG/LV was locked for a while... + */ + if (state->fails) { + if (state->fails++ <= state->max_fails) { + log_debug("Postponing frequently failing policy (%u <= %u).", + state->fails - 1, state->max_fails); + goto out; } - /* FIXME: hmm READ-ONLY switch should happen in error path */ - } + if (state->max_fails < MAX_FAILS) + state->max_fails <<= 1; + state->fails = needs_policy = 1; /* Retry failing command */ + } else + state->max_fails = 1; /* Reset on success */ + + if (needs_policy) + _use_policy(dmt, state); out: if (tps) dm_pool_free(state->mem, tps); - dmeventd_lvm2_unlock(); + if (new_dmt) + dm_task_destroy(new_dmt); +} + +/* Handle SIGCHLD for a thread */ +static void _sig_child(int signum __attribute__((unused))) +{ + /* empty SIG_IGN */; +} + +/* Setup handler for SIGCHLD when executing external command + * to get quick 'waitpid()' reaction + * It will interrupt syscall just like SIGALRM and + * invoke process_event(). + */ +static void _init_thread_signals(struct dso_state *state) +{ + struct sigaction act = { .sa_handler = _sig_child }; + sigset_t my_sigset; + + sigemptyset(&my_sigset); + + if (sigaction(SIGCHLD, &act, NULL)) + log_warn("WARNING: Failed to set SIGCHLD action."); + else if (sigaddset(&my_sigset, SIGCHLD)) + log_warn("WARNING: Failed to add SIGCHLD to set."); + else if (pthread_sigmask(SIG_UNBLOCK, &my_sigset, &state->old_sigset)) + log_warn("WARNING: Failed to unblock SIGCHLD."); + else + state->restore_sigset = 1; +} + +static void _restore_thread_signals(struct dso_state *state) +{ + if (state->restore_sigset && + pthread_sigmask(SIG_SETMASK, &state->old_sigset, NULL)) + log_warn("WARNING: Failed to block SIGCHLD."); } int register_device(const char *device, const char *uuid __attribute__((unused)), int major __attribute__((unused)), int minor __attribute__((unused)), - void **private) + void **user) { - struct dm_pool *statemem = NULL; struct dso_state *state; + char *str; + char cmd_str[PATH_MAX + 128 + 2]; /* cmd ' ' vg/lv \0 */ - if (!dmeventd_lvm2_init()) - goto bad; - - if (!(statemem = dm_pool_create("thin_pool_state", 2048)) || - !(state = dm_pool_zalloc(statemem, sizeof(*state))) || - !dmeventd_lvm2_command(statemem, state->cmd_str, - sizeof(state->cmd_str), - "lvextend --use-policies", - device)) { - if (statemem) - dm_pool_destroy(statemem); - dmeventd_lvm2_exit(); - goto bad; - } + if (!dmeventd_lvm2_init_with_pool("thin_pool_state", state)) + goto_bad; - state->mem = statemem; - state->metadata_percent_check = CHECK_MINIMUM; - state->data_percent_check = CHECK_MINIMUM; - *private = state; + if (!dmeventd_lvm2_command(state->mem, cmd_str, sizeof(cmd_str), + "_dmeventd_thin_command", device)) + goto_bad; - syslog(LOG_INFO, "Monitoring thin %s.\n", device); + if (strncmp(cmd_str, "lvm ", 4) == 0) { + if (!(state->cmd_str = dm_pool_strdup(state->mem, cmd_str + 4))) { + log_error("Failed to copy lvm command."); + goto bad; + } + } else if (cmd_str[0] == '/') { + if (!(state->cmd_str = dm_pool_strdup(state->mem, cmd_str))) { + log_error("Failed to copy thin command."); + goto bad; + } + + /* Find last space before 'vg/lv' */ + if (!(str = strrchr(state->cmd_str, ' '))) + goto inval; + + if (!(state->argv[0] = dm_pool_strndup(state->mem, state->cmd_str, + str - state->cmd_str))) { + log_error("Failed to copy command."); + goto bad; + } + + state->argv[1] = str + 1; /* 1 argument - vg/lv */ + _init_thread_signals(state); + } else /* Unuspported command format */ + goto inval; + + state->pid = -1; + *user = state; + + log_info("Monitoring thin pool %s.", device); return 1; +inval: + log_error("Invalid command for monitoring: %s.", cmd_str); bad: - syslog(LOG_ERR, "Failed to monitor thin %s.\n", device); + log_error("Failed to monitor thin pool %s.", device); + + if (state) + dmeventd_lvm2_exit_with_pool(state); return 0; } @@ -435,13 +403,34 @@ int unregister_device(const char *device, const char *uuid __attribute__((unused)), int major __attribute__((unused)), int minor __attribute__((unused)), - void **private) + void **user) { - struct dso_state *state = *private; + struct dso_state *state = *user; + int i; + + for (i = 0; !_wait_for_pid(state) && (i < 6); ++i) { + if (i == 0) + /* Give it 2 seconds, then try to terminate & kill it */ + log_verbose("Child %d still not finished (%s) waiting.", + state->pid, state->cmd_str); + else if (i == 3) { + log_warn("WARNING: Terminating child %d.", state->pid); + kill(state->pid, SIGINT); + kill(state->pid, SIGTERM); + } else if (i == 5) { + log_warn("WARNING: Killing child %d.", state->pid); + kill(state->pid, SIGKILL); + } + sleep(1); + } + + if (state->pid != -1) + log_warn("WARNING: Cannot kill child %d!", state->pid); + + _restore_thread_signals(state); - syslog(LOG_INFO, "No longer monitoring thin %s.\n", device); - dm_pool_destroy(state->mem); - dmeventd_lvm2_exit(); + dmeventd_lvm2_exit_with_pool(state); + log_info("No longer monitoring thin pool %s.", device); return 1; } diff --git a/daemons/dmeventd/plugins/vdo/.exported_symbols b/daemons/dmeventd/plugins/vdo/.exported_symbols new file mode 100644 index 0000000..b88c705 --- /dev/null +++ b/daemons/dmeventd/plugins/vdo/.exported_symbols @@ -0,0 +1,3 @@ +process_event +register_device +unregister_device diff --git a/daemons/dmeventd/plugins/vdo/Makefile.in b/daemons/dmeventd/plugins/vdo/Makefile.in new file mode 100644 index 0000000..44942ea --- /dev/null +++ b/daemons/dmeventd/plugins/vdo/Makefile.in @@ -0,0 +1,31 @@ +# +# Copyright (C) 2018 Red Hat, Inc. All rights reserved. +# +# This file is part of LVM2. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +top_builddir = @top_builddir@ + +CLDFLAGS += -L$(top_builddir)/daemons/dmeventd/plugins/lvm2 +LIBS += -ldevmapper-event-lvm2 + +SOURCES = dmeventd_vdo.c + +LIB_NAME = libdevmapper-event-lvm2vdo +LIB_SHARED = $(LIB_NAME).$(LIB_SUFFIX) +LIB_VERSION = $(LIB_VERSION_LVM) + +include $(top_builddir)/make.tmpl + +install_lvm2: install_dm_plugin + +install: install_lvm2 diff --git a/daemons/dmeventd/plugins/vdo/dmeventd_vdo.c b/daemons/dmeventd/plugins/vdo/dmeventd_vdo.c new file mode 100644 index 0000000..d3f1ba4 --- /dev/null +++ b/daemons/dmeventd/plugins/vdo/dmeventd_vdo.c @@ -0,0 +1,413 @@ +/* + * Copyright (C) 2018 Red Hat, Inc. All rights reserved. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "lib/misc/lib.h" +#include "daemons/dmeventd/plugins/lvm2/dmeventd_lvm.h" +#include "daemons/dmeventd/libdevmapper-event.h" + +/* + * Use parser from new device_mapper library. + * Although during compilation we can see dm_vdo_status_parse() + * in runtime we are linked agains systems libdm 'older' library + * which does not provide this symbol and plugin fails to load + */ +/* coverity[unnecessary_header] used for parsing */ +#include "device_mapper/vdo/status.c" + +#include <sys/wait.h> +#include <stdarg.h> + +/* First warning when VDO pool is 80% full. */ +#define WARNING_THRESH (DM_PERCENT_1 * 80) +/* Run a check every 5%. */ +#define CHECK_STEP (DM_PERCENT_1 * 5) +/* Do not bother checking VDO pool is less than 50% full. */ +#define CHECK_MINIMUM (DM_PERCENT_1 * 50) + +#define MAX_FAILS (256) /* ~42 mins between cmd call retry with 10s delay */ + +#define VDO_DEBUG 0 + +struct dso_state { + struct dm_pool *mem; + int percent_check; + int percent; + uint64_t known_data_size; + unsigned fails; + unsigned max_fails; + int restore_sigset; + sigset_t old_sigset; + pid_t pid; + char *argv[3]; + const char *cmd_str; + const char *name; +}; + +DM_EVENT_LOG_FN("vdo") + +static int _run_command(struct dso_state *state) +{ + char val[16]; + int i; + + /* Mark for possible lvm2 command we are running from dmeventd + * lvm2 will not try to talk back to dmeventd while processing it */ + (void) setenv("LVM_RUN_BY_DMEVENTD", "1", 1); + + if (state->percent) { + /* Prepare some known data to env vars for easy use */ + if (dm_snprintf(val, sizeof(val), "%d", + state->percent / DM_PERCENT_1) != -1) + (void) setenv("DMEVENTD_VDO_POOL", val, 1); + } else { + /* For an error event it's for a user to check status and decide */ + log_debug("Error event processing."); + } + + log_verbose("Executing command: %s", state->cmd_str); + + /* TODO: + * Support parallel run of 'task' and it's waitpid maintainence + * ATM we can't handle signaling of SIGALRM + * as signalling is not allowed while 'process_event()' is running + */ + if (!(state->pid = fork())) { + /* child */ + (void) close(0); + for (i = 3; i < 255; ++i) (void) close(i); + execvp(state->argv[0], state->argv); + _exit(errno); + } else if (state->pid == -1) { + log_error("Can't fork command %s.", state->cmd_str); + state->fails = 1; + return 0; + } + + return 1; +} + +static int _use_policy(struct dm_task *dmt, struct dso_state *state) +{ +#if VDO_DEBUG + log_debug("dmeventd executes: %s.", state->cmd_str); +#endif + if (state->argv[0]) + return _run_command(state); + + if (!dmeventd_lvm2_run_with_lock(state->cmd_str)) { + log_error("Failed command for %s.", dm_task_get_name(dmt)); + state->fails = 1; + return 0; + } + + state->fails = 0; + + return 1; +} + +/* Check if executed command has finished + * Only 1 command may run */ +static int _wait_for_pid(struct dso_state *state) +{ + int status = 0; + + if (state->pid == -1) + return 1; + + if (!waitpid(state->pid, &status, WNOHANG)) + return 0; + + /* Wait for finish */ + if (WIFEXITED(status)) { + log_verbose("Child %d exited with status %d.", + state->pid, WEXITSTATUS(status)); + state->fails = WEXITSTATUS(status) ? 1 : 0; + } else { + if (WIFSIGNALED(status)) + log_verbose("Child %d was terminated with status %d.", + state->pid, WTERMSIG(status)); + state->fails = 1; + } + + state->pid = -1; + + return 1; +} + +void process_event(struct dm_task *dmt, + enum dm_event_mask event __attribute__((unused)), + void **user) +{ + const char *device = dm_task_get_name(dmt); + struct dso_state *state = *user; + void *next = NULL; + uint64_t start, length; + char *target_type = NULL; + char *params; + int needs_policy = 0; + struct dm_task *new_dmt = NULL; + struct dm_vdo_status_parse_result vdop = { .status = NULL }; + +#if VDO_DEBUG + log_debug("Watch for VDO %s:%.2f%%.", state->name, + dm_percent_to_round_float(state->percent_check, 2)); +#endif + if (!_wait_for_pid(state)) { + log_warn("WARNING: Skipping event, child %d is still running (%s).", + state->pid, state->cmd_str); + return; + } + + if (event & DM_EVENT_DEVICE_ERROR) { +#if VDO_DEBUG + log_debug("VDO event error."); +#endif + /* Error -> no need to check and do instant resize */ + state->percent = 0; + if (_use_policy(dmt, state)) + goto out; + + stack; + + if (!(new_dmt = dm_task_create(DM_DEVICE_STATUS))) + goto_out; + + if (!dm_task_set_uuid(new_dmt, dm_task_get_uuid(dmt))) + goto_out; + + /* Non-blocking status read */ + if (!dm_task_no_flush(new_dmt)) + log_warn("WARNING: Can't set no_flush for dm status."); + + if (!dm_task_run(new_dmt)) + goto_out; + + dmt = new_dmt; + } + + dm_get_next_target(dmt, next, &start, &length, &target_type, ¶ms); + + if (!target_type || (strcmp(target_type, "vdo") != 0)) { + log_error("Invalid target type."); + goto out; + } + + if (!dm_vdo_status_parse(state->mem, params, &vdop)) { + log_error("Failed to parse status."); + goto out; + } + + state->percent = dm_make_percent(vdop.status->used_blocks, + vdop.status->total_blocks); + +#if VDO_DEBUG + log_debug("VDO %s status %.2f%% " FMTu64 "/" FMTu64 ".", + state->name, dm_percent_to_round_float(state->percent, 2), + vdop.status->used_blocks, vdop.status->total_blocks); +#endif + + /* VDO pool size had changed. Clear the threshold. */ + if (state->known_data_size != vdop.status->total_blocks) { + state->percent_check = CHECK_MINIMUM; + state->known_data_size = vdop.status->total_blocks; + state->fails = 0; + } + + /* + * Trigger action when threshold boundary is exceeded. + * Report 80% threshold warning when it's used above 80%. + * Only 100% is exception as it cannot be surpased so policy + * action is called for: >50%, >55% ... >95%, 100% + */ + if ((state->percent > WARNING_THRESH) && + (state->percent > state->percent_check)) + log_warn("WARNING: VDO %s %s is now %.2f%% full.", + state->name, device, + dm_percent_to_round_float(state->percent, 2)); + if (state->percent > CHECK_MINIMUM) { + /* Run action when usage raised more than CHECK_STEP since the last time */ + if (state->percent > state->percent_check) + needs_policy = 1; + state->percent_check = (state->percent / CHECK_STEP + 1) * CHECK_STEP; + if (state->percent_check == DM_PERCENT_100) + state->percent_check--; /* Can't get bigger then 100% */ + } else + state->percent_check = CHECK_MINIMUM; + + /* Reduce number of _use_policy() calls by power-of-2 factor till frequency of MAX_FAILS is reached. + * Avoids too high number of error retries, yet shows some status messages in log regularly. + * i.e. PV could have been pvmoved and VG/LV was locked for a while... + */ + if (state->fails) { + if (state->fails++ <= state->max_fails) { + log_debug("Postponing frequently failing policy (%u <= %u).", + state->fails - 1, state->max_fails); + goto out; + } + if (state->max_fails < MAX_FAILS) + state->max_fails <<= 1; + state->fails = needs_policy = 1; /* Retry failing command */ + } else + state->max_fails = 1; /* Reset on success */ + + if (needs_policy) + _use_policy(dmt, state); +out: + if (vdop.status) + dm_pool_free(state->mem, vdop.status); + + if (new_dmt) + dm_task_destroy(new_dmt); +} + +/* Handle SIGCHLD for a thread */ +static void _sig_child(int signum __attribute__((unused))) +{ + /* empty SIG_IGN */; +} + +/* Setup handler for SIGCHLD when executing external command + * to get quick 'waitpid()' reaction + * It will interrupt syscall just like SIGALRM and + * invoke process_event(). + */ +static void _init_thread_signals(struct dso_state *state) +{ + struct sigaction act = { .sa_handler = _sig_child }; + sigset_t my_sigset; + + sigemptyset(&my_sigset); + + if (sigaction(SIGCHLD, &act, NULL)) + log_warn("WARNING: Failed to set SIGCHLD action."); + else if (sigaddset(&my_sigset, SIGCHLD)) + log_warn("WARNING: Failed to add SIGCHLD to set."); + else if (pthread_sigmask(SIG_UNBLOCK, &my_sigset, &state->old_sigset)) + log_warn("WARNING: Failed to unblock SIGCHLD."); + else + state->restore_sigset = 1; +} + +static void _restore_thread_signals(struct dso_state *state) +{ + if (state->restore_sigset && + pthread_sigmask(SIG_SETMASK, &state->old_sigset, NULL)) + log_warn("WARNING: Failed to block SIGCHLD."); +} + +int register_device(const char *device, + const char *uuid, + int major __attribute__((unused)), + int minor __attribute__((unused)), + void **user) +{ + struct dso_state *state; + const char *cmd; + char *str; + char cmd_str[PATH_MAX + 128 + 2]; /* cmd ' ' vg/lv \0 */ + const char *name = "pool"; + + if (!dmeventd_lvm2_init_with_pool("vdo_pool_state", state)) + goto_bad; + + state->cmd_str = ""; + + /* Search for command for LVM- prefixed devices only */ + cmd = (strncmp(uuid, "LVM-", 4) == 0) ? "_dmeventd_vdo_command" : ""; + + if (!dmeventd_lvm2_command(state->mem, cmd_str, sizeof(cmd_str), cmd, device)) + goto_bad; + + if (strncmp(cmd_str, "lvm ", 4) == 0) { + if (!(state->cmd_str = dm_pool_strdup(state->mem, cmd_str + 4))) { + log_error("Failed to copy lvm VDO command."); + goto bad; + } + } else if (cmd_str[0] == '/') { + if (!(state->cmd_str = dm_pool_strdup(state->mem, cmd_str))) { + log_error("Failed to copy VDO command."); + goto bad; + } + + /* Find last space before 'vg/lv' */ + if (!(str = strrchr(state->cmd_str, ' '))) + goto inval; + + if (!(state->argv[0] = dm_pool_strndup(state->mem, state->cmd_str, + str - state->cmd_str))) { + log_error("Failed to copy command."); + goto bad; + } + + state->argv[1] = str + 1; /* 1 argument - vg/lv */ + _init_thread_signals(state); + } else if (cmd[0] == 0) { + state->name = "volume"; /* What to use with 'others?' */ + } else/* Unuspported command format */ + goto inval; + + state->pid = -1; + state->name = name; + *user = state; + + log_info("Monitoring VDO %s %s.", name, device); + + return 1; +inval: + log_error("Invalid command for monitoring: %s.", cmd_str); +bad: + log_error("Failed to monitor VDO %s %s.", name, device); + + if (state) + dmeventd_lvm2_exit_with_pool(state); + + return 0; +} + +int unregister_device(const char *device, + const char *uuid __attribute__((unused)), + int major __attribute__((unused)), + int minor __attribute__((unused)), + void **user) +{ + struct dso_state *state = *user; + const char *name = state->name; + int i; + + for (i = 0; !_wait_for_pid(state) && (i < 6); ++i) { + if (i == 0) + /* Give it 2 seconds, then try to terminate & kill it */ + log_verbose("Child %d still not finished (%s) waiting.", + state->pid, state->cmd_str); + else if (i == 3) { + log_warn("WARNING: Terminating child %d.", state->pid); + kill(state->pid, SIGINT); + kill(state->pid, SIGTERM); + } else if (i == 5) { + log_warn("WARNING: Killing child %d.", state->pid); + kill(state->pid, SIGKILL); + } + sleep(1); + } + + if (state->pid != -1) + log_warn("WARNING: Cannot kill child %d!", state->pid); + + _restore_thread_signals(state); + + dmeventd_lvm2_exit_with_pool(state); + log_info("No longer monitoring VDO %s %s.", name, device); + + return 1; +} |