diff options
author | Tim Pepper <timothy.c.pepper@linux.intel.com> | 2012-10-03 11:09:31 -0700 |
---|---|---|
committer | Tim Pepper <timothy.c.pepper@linux.intel.com> | 2012-10-03 11:50:30 -0700 |
commit | 285c61073dbfb39f25f013ede0da33a7c1f1bcec (patch) | |
tree | 9bc68bc58a97fd8ddf5f7fcde98f08c034ba22e8 | |
parent | e798f389f5694e7a9910cdca71c657ccfb87e41c (diff) | |
download | corewatcher-285c61073dbfb39f25f013ede0da33a7c1f1bcec.tar.gz corewatcher-285c61073dbfb39f25f013ede0da33a7c1f1bcec.tar.bz2 corewatcher-285c61073dbfb39f25f013ede0da33a7c1f1bcec.zip |
Separate report submission into its own thread
The primary motivation of this commit is separated submit.c's submission
queue handling into a separate thread which sleeps on a condition variable
that is asserted in the inotify and timer event loops. Includes simpler
data structures/locking for the submission queue.
Associated refactoring includes:
- pull inotification out into its own file to keep corewatcher.c simpler
(subsequent event threads should be similarly separated)
- add many stderr fprintf's to allow better tracking of where the code is
during runtime when in non-daemon mode
- log both failed and successful server submission counts
- various bits of added error checking/handling
- better document internally the expectations on core file naming, how
the file name strings are transformed and stored in the internal
lists/arrays/hashtables, and an audit to insure oops->filename is
consistently populated and used
- fixed a few possible memory leaks
- updated some variable and function names to be more self documenting,
added inline documentation in places
- added design documentation covering the basic states, code that runs for
transitions between states, global state data structures and the
associated locks
Signed-off-by: Tim Pepper <timothy.c.pepper@linux.intel.com>
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | src/Makefile.am | 1 | ||||
-rw-r--r-- | src/coredump.c | 171 | ||||
-rw-r--r-- | src/corewatcher.c | 112 | ||||
-rw-r--r-- | src/corewatcher.h | 16 | ||||
-rw-r--r-- | src/inotification.c | 132 | ||||
-rw-r--r-- | src/submit.c | 356 |
7 files changed, 419 insertions, 371 deletions
diff --git a/configure.ac b/configure.ac index bfa03a6..96c1700 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ AC_PREREQ([2.68]) -AC_INIT([nitra-corewatcher],[0.9.4],[timothy.c.pepper@linux.intel.com]) +AC_INIT([nitra-corewatcher],[0.9.5],[timothy.c.pepper@linux.intel.com]) AM_INIT_AUTOMAKE([foreign -Wall -Werror]) AC_CONFIG_FILES([Makefile src/Makefile]) AC_CONFIG_SRCDIR([src/corewatcher.c]) diff --git a/src/Makefile.am b/src/Makefile.am index 602695d..3a449e4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -11,6 +11,7 @@ corewatcher_SOURCES = \ configfile.c \ coredump.c \ corewatcher.c \ + inotification.c \ find_file.c \ submit.c diff --git a/src/coredump.c b/src/coredump.c index 76d20de..2990cc7 100644 --- a/src/coredump.c +++ b/src/coredump.c @@ -22,6 +22,7 @@ * Authors: * Arjan van de Ven <arjan@linux.intel.com> * William Douglas <william.douglas@intel.com> + * Tim Pepper <timothy.c.pepper@linux.intel.com> */ #include <unistd.h> @@ -47,14 +48,6 @@ int sig = 0; const char *core_folder = "/var/lib/corewatcher/"; const char *processed_folder = "/var/lib/corewatcher/processed/"; -/* - * the application must initialize the GMutex's - * core_status.processing_mtx, core_status.queued_mtx, - * processing_queue_mtx and gdb_mtx - * before calling into this file's scan_corefolders() - * (also since that calls submit_queue() there are dependencies - * there which need taken care of too) - */ /* Always pick up the processing_mtx and then the processing_queue_mtx, reverse for setting down */ /* Always pick up the gdb_mtx and then the @@ -330,6 +323,8 @@ static struct oops *extract_core(char *fullpath, char *appfile) int parsing_maps = 0; struct stat stat_buf; + fprintf(stderr, "+ extract_core() called for %s\n", fullpath); + if (asprintf(&command, "LANG=C gdb --batch -f %s %s -x /etc/corewatcher/gdb.command 2> /dev/null", appfile, fullpath) == -1) return NULL; @@ -358,6 +353,8 @@ static struct oops *extract_core(char *fullpath, char *appfile) h1 = strdup("Unknown"); file = popen(command, "r"); + if (!file) + fprintf(stderr, "+ gdb failed for %s\n", fullpath); while (file && !feof(file)) { size_t size = 0; @@ -374,6 +371,9 @@ static struct oops *extract_core(char *fullpath, char *appfile) parsing_maps = 1; /*continue;*/ } + if (strncmp(line, "No shared libraries loaded at this time.", 40) == 0) { + break; + } if (!parsing_maps) { /* parsing backtrace */ c2 = c1; @@ -414,6 +414,12 @@ fixup: /* gdb outputs some 0x1a's which break XML */ pclose(file); free(command); + if (!h1) + h1 = strdup("Unknown"); + + if (!m1) + m1 = strdup(" Unknown"); + ret = asprintf(&text, "%s" "backtrace: |\n" @@ -440,52 +446,49 @@ fixup: /* gdb outputs some 0x1a's which break XML */ } /* - * filename is of the form core_XXXX[.blah] - * we need to get the pid out as we want - * output of the form XXXX[.ext] + * input filename has the form: core_$APP_$TIMESTAMP[.$PID] + * output filename has form of: $APP_$TIMESTAMP[.ext] */ char *get_core_filename(char *filename, char *ext) { - char *pid = NULL, *c = NULL, *s = NULL, *detail_filename = NULL; + char *name = NULL, *dotpid = NULL, *stamp = NULL, *detail_filename = NULL; if (!filename) return NULL; - if (!(s = strstr(filename, "_"))) + if (!(stamp = strstr(filename, "_"))) return NULL; - if (!(++s)) - return NULL; - /* causes valgrind whining because we copy from middle of a string */ - if (!(pid = strdup(s))) + if (!(++stamp)) return NULL; - c = strstr(pid, "."); + if (!(name = strdup(stamp))) + return NULL; - if (c) - *c = '\0'; + /* strip trailing .PID if present */ + dotpid = strstr(name, "."); + if (dotpid) + *dotpid = '\0'; if (ext) { - /* causes valgrind whining because we copy from middle of a string */ - if ((asprintf(&detail_filename, "%s%s.%s", processed_folder, pid, ext)) == -1) { - free(pid); + if ((asprintf(&detail_filename, "%s%s.%s", processed_folder, name, ext)) == -1) { + free(name); return NULL; } } else { - /* causes valgrind whining because we copy from middle of a string */ - if ((asprintf(&detail_filename, "%s%s", processed_folder, pid)) == -1) { - free(pid); + if ((asprintf(&detail_filename, "%s%s", processed_folder, name)) == -1) { + free(name); return NULL; } } + free(name); - free(pid); return detail_filename; } /* * Write the backtrace from the core file into a text - * file named after the pid + * file named as $APP_$TIMESTAMP.txt */ static void write_core_detail_file(char *filename, char *text) { @@ -517,37 +520,49 @@ static void write_core_detail_file(char *filename, char *text) */ static void remove_from_processing_queue(void) { + fprintf(stderr, "+ removing processing_queue head\n"); free(processing_queue[pq_head]); processing_queue[pq_head++] = NULL; - if (pq_head == MAX_PROCESSING_OOPS) + if (pq_head == MAX_PROCESSING_OOPS) { + fprintf(stderr, "+ wrapping processing_queue head to 0 (bugs here?)\n"); pq_head = 0; + } } /* - * Removes file from processing_oops hash based on pid. - * Extracts pid from the fullpath such that - * /home/user/core.pid will be tranformed into just the pid. + * Removes file from processing_oops hash based on core name, + * extracting that core name from a fullpath such as + * "/${processed_folder}/core_$APP_$TIMESTAMP.$PID" + * in order to get just "$APP_$TIMESTAMP" * * Expects the lock on the given hash to be held. */ -void remove_pid_from_hash(char *fullpath, GHashTable *ht) +void remove_name_from_hash(char *fullpath, GHashTable *ht) { - char *c1 = NULL, *c2 = NULL; + char *name = NULL, *corename = NULL, *shortname = NULL; - if (!(c1 = strip_directories(fullpath))) + if (!(name = strip_directories(fullpath))) return; - if (!(c2 = get_core_filename(c1, NULL))) { - free(c1); + if (!(corename = get_core_filename(name, NULL))) { + free(name); return; } + free(name); - free(c1); + if (!(shortname = strip_directories(corename))) { + free(corename); + return; + } + free(corename); - g_hash_table_remove(ht, c2); + if (g_hash_table_remove(ht, shortname)) + fprintf(stderr, "+ core %s removed from processing_oops hash table\n", shortname); + else + fprintf(stderr, "+ core %s not in processing_oops hash table\n", shortname); - free(c2); + free(shortname); } /* @@ -581,13 +596,12 @@ static struct oops *process_common(char *fullpath) /* * Processes .to-process core files. - * Also creates the pid.txt file and adds + * Also creates the $APP_$TIMESTAMP.txt file and adds * the oops struct to the submit queue * * Picks up and sets down the gdb_mtx and * picks up and sets down the processing_queue_mtx. * (held at the same time in that order) - * Also will pick up and sets down the queued_mtx. */ static void *process_new(void __unused *vp) { @@ -598,28 +612,40 @@ static void *process_new(void __unused *vp) g_mutex_lock(&gdb_mtx); g_mutex_lock(&processing_queue_mtx); + fprintf(stderr, "+ Entered process_new()\n"); + if (!(fullpath = processing_queue[pq_head])) { - /* something went quite wrong */ + fprintf(stderr, "+ processing_queue corruption?\n"); g_mutex_unlock(&processing_queue_mtx); g_mutex_unlock(&gdb_mtx); g_mutex_unlock(&core_status.processing_mtx); return NULL; } - if (!(corefn = strip_directories(fullpath))) + if (!(corefn = strip_directories(fullpath))) { + fprintf(stderr, "+ No corefile? (%s)\n", fullpath); goto clean_process_new; + } - if (!(procfn = replace_name(fullpath, ".to-process", ".processed"))) + if (!(procfn = replace_name(fullpath, ".to-process", ".processed"))) { + fprintf(stderr, "+ Problems with filename manipulation for %s\n", corefn); goto clean_process_new; + } - if (!(oops = process_common(fullpath))) + if (!(oops = process_common(fullpath))) { + fprintf(stderr, "+ Problems processing %s\n", procfn); goto clean_process_new; + } - if (!(oops->detail_filename = get_core_filename(corefn, "txt"))) + if (!(oops->detail_filename = get_core_filename(corefn, "txt"))) { + fprintf(stderr, "+ Problems with filename manipulation for %s\n", procfn); goto clean_process_new; + } - if (rename(fullpath, procfn)) + if (rename(fullpath, procfn)) { + fprintf(stderr, "+ Unable to move %s to %s\n", fullpath, procfn); goto clean_process_new; + } free(oops->filename); oops->filename = procfn; @@ -632,20 +658,18 @@ static void *process_new(void __unused *vp) write_core_detail_file(corefn, oops->text); - g_mutex_lock(&core_status.queued_mtx); queue_backtrace(oops); - g_mutex_unlock(&core_status.queued_mtx); - /* don't need to free procfn because was set to oops->filename and that gets free'd */ + fprintf(stderr, "+ Leaving process_new() with %s queued\n", oops->detail_filename); + + /* mustn't free procfn because it was hung on oops->filename */ free(corefn); - FREE_OOPS(oops); return NULL; clean_process_new: - remove_pid_from_hash(fullpath, core_status.processing_oops); + remove_name_from_hash(fullpath, core_status.processing_oops); remove_from_processing_queue(); free(procfn); - procfn = NULL; /* don't know if oops->filename == procfn so be safe */ free(corefn); FREE_OOPS(oops); g_mutex_unlock(&processing_queue_mtx); @@ -660,7 +684,6 @@ clean_process_new: * Picks up and sets down the gdb_mtx. * Picks up and sets down the processing_queue_mtx. * (held at the same time in that order) - * Also will pick up and sets down the queued_mtx. */ static void *process_old(void __unused *vp) { @@ -670,37 +693,45 @@ static void *process_old(void __unused *vp) g_mutex_lock(&gdb_mtx); g_mutex_lock(&processing_queue_mtx); + fprintf(stderr, "+ Entered process_old()\n"); + if (!(fullpath = processing_queue[pq_head])) { - /* something went quite wrong */ + fprintf(stderr, "+ processing_queue corruption?\n"); g_mutex_unlock(&processing_queue_mtx); g_mutex_unlock(&gdb_mtx); return NULL; } + fprintf(stderr, "+ Reprocessing %s\n", fullpath); - if (!(corefn = strip_directories(fullpath))) + if (!(corefn = strip_directories(fullpath))) { + fprintf(stderr, "+ No corefile? (%s)\n", fullpath); goto clean_process_old; + } - if (!(oops = process_common(fullpath))) + if (!(oops = process_common(fullpath))) { + fprintf(stderr, "+ Problems processing %s\n", corefn); goto clean_process_old; + } - if (!(oops->detail_filename = get_core_filename(corefn, "txt"))) + if (!(oops->detail_filename = get_core_filename(corefn, "txt"))) { + fprintf(stderr, "+ Problems with filename manipulation for %s\n", corefn); goto clean_process_old; + } remove_from_processing_queue(); g_mutex_unlock(&processing_queue_mtx); g_mutex_unlock(&gdb_mtx); - g_mutex_lock(&core_status.queued_mtx); queue_backtrace(oops); - g_mutex_unlock(&core_status.queued_mtx); + + fprintf(stderr, "+ Leaving process_old() with %s queued\n", oops->detail_filename); free(corefn); - FREE_OOPS(oops); return NULL; clean_process_old: - remove_pid_from_hash(fullpath, core_status.processing_oops); + remove_name_from_hash(fullpath, core_status.processing_oops); remove_from_processing_queue(); free(corefn); FREE_OOPS(oops); @@ -710,7 +741,7 @@ clean_process_old: } /* - * Adds corefile (based on pid) to the processing_oops + * Adds corefile (based on name) to the processing_oops * hash table if it is not already there, then * tries to add to the processing_queue. * @@ -739,6 +770,7 @@ static int add_to_processing(char *fullpath) g_mutex_lock(&core_status.processing_mtx); if (g_hash_table_lookup(core_status.processing_oops, c2)) { g_mutex_unlock(&core_status.processing_mtx); + fprintf(stderr, "+ ...name %s already in processing_oops hash table\n", c2); goto clean_add_to_processing; } @@ -746,6 +778,7 @@ static int add_to_processing(char *fullpath) if (processing_queue[pq_tail]) { g_mutex_unlock(&processing_queue_mtx); g_mutex_unlock(&core_status.processing_mtx); + fprintf(stderr, "+ ...processing_queue full\n"); goto clean_add_to_processing; } @@ -811,7 +844,7 @@ static void scan_core_folder(void __unused *unused) dir = opendir(core_folder); if (!dir) return; - fprintf(stderr, "+ scanning %s...\n", core_folder); + fprintf(stderr, "+ Begin scanning %s...\n", core_folder); while(1) { free(fullpath); fullpath = NULL; @@ -824,7 +857,7 @@ static void scan_core_folder(void __unused *unused) if (strncmp(entry->d_name, "core_", 5)) continue; - /* matched core_#### where #### is the pid of the process */ + /* matched core_#### */ r = asprintf(&fullpath, "%s%s", core_folder, entry->d_name); if (r == -1) { fullpath = NULL; @@ -848,6 +881,7 @@ static void scan_core_folder(void __unused *unused) } } closedir(dir); + fprintf(stderr, "+ End scanning %s...\n", core_folder); } static void scan_processed_folder(void __unused *unused) @@ -861,7 +895,7 @@ static void scan_processed_folder(void __unused *unused) dir = opendir(processed_folder); if (!dir) return; - fprintf(stderr, "+ scanning %s...\n", processed_folder); + fprintf(stderr, "+ Begin scanning %s...\n", processed_folder); while(1) { free(fullpath); fullpath = NULL; @@ -889,6 +923,7 @@ static void scan_processed_folder(void __unused *unused) reprocess_corefile(fullpath); } closedir(dir); + fprintf(stderr, "+ End scanning %s...\n", processed_folder); } int scan_corefolders(void __unused *unused) @@ -896,7 +931,5 @@ int scan_corefolders(void __unused *unused) scan_core_folder(NULL); scan_processed_folder(NULL); - submit_queue(); - return 1; } diff --git a/src/corewatcher.c b/src/corewatcher.c index d7b9fa8..aed3de0 100644 --- a/src/corewatcher.c +++ b/src/corewatcher.c @@ -22,6 +22,7 @@ * Authors: * Arjan van de Ven <arjan@linux.intel.com> * William Douglas <william.douglas@intel.com> + * Tim Pepper <timothy.c.pepper@linux.intel.com> */ #include <unistd.h> @@ -50,15 +51,6 @@ #include "corewatcher.h" -/* - * rather than malloc() on each inotify event, preallocate a decent chunk - * of memory so multiple events can be read in one go, trading a little - * extra memory for less runtime overhead if/when multiple crashes happen - * in short order. - */ -#include <sys/inotify.h> -#define BUF_LEN 2048 - static struct option opts[] = { { "nodaemon", 0, NULL, 'n' }, { "debug", 0, NULL, 'd' }, @@ -81,89 +73,6 @@ static void usage(const char *name) fprintf(stderr, " -h, --help Display this help message\n"); } -gboolean inotify_source_prepare(__unused GSource *source, gint *timeout_) -{ - *timeout_ = -1; - fprintf(stderr, "+ inotification prepare\n"); - return FALSE; -} - -gboolean inotify_source_check(__unused GSource *source) -{ - int fd, wd; - char buffer[BUF_LEN]; - size_t len; - - fprintf(stderr, "+ inotification check\n"); - /* inotification of crashes */ - fd = inotify_init(); - if (fd < 0) { - fprintf(stderr, "corewatcher inotify init failed.. exiting\n"); - return EXIT_FAILURE; - } - wd = inotify_add_watch(fd, core_folder, IN_CLOSE_WRITE); - if (wd < 0) { - fprintf(stderr, "corewatcher inotify add failed.. exiting\n"); - return EXIT_FAILURE; - } - fprintf(stderr, "+ awaiting inotification...\n"); - len = read(fd, buffer, BUF_LEN); - if (len <=0 ) { - fprintf(stderr, "corewatcher inotify read failed.. exiting\n"); - return FALSE; - } - fprintf(stderr, "+ inotification received!\n"); - /* for now simply ignore the actual crash files we've been notified of - * and let our callback be dispatched to go look for any/all crash - * files */ - - /* slight delay to minimize storms of notifications (the inotify - * read() can return a batch of notifications*/ - sleep(5); - return TRUE; -} - -gboolean inotify_source_dispatch(__unused GSource *source, - GSourceFunc callback, gpointer user_data) -{ - fprintf(stderr, "+ inotify dispatch\n"); - if(callback(user_data)) { - fprintf(stderr, "+ inotify dispatch success\n"); - return TRUE; - } else { - //should not happen as our callback always returns 1 - fprintf(stderr, "+ inotify dispatch failed.\n"); - return FALSE; - } -} - -void *inotify_loop(void __unused *unused) -{ - /* inotification of crashes */ - GMainLoop *loop; - GMainContext *context; - GSource *source; - GSourceFuncs InotifySourceFuncs = { - inotify_source_prepare, - inotify_source_check, - inotify_source_dispatch, - NULL, - NULL, - NULL, - }; - - context = g_main_context_new(); - loop = g_main_loop_new(context, FALSE); - loop = g_main_loop_ref(loop); - source = g_source_new(&InotifySourceFuncs, sizeof(GSource)); - g_source_attach(source, context); - g_source_set_callback(source, scan_corefolders, NULL, NULL); - g_main_loop_run(loop); - g_main_loop_unref(loop); - - return NULL; -} - int main(int argc, char**argv) { GMainLoop *loop; @@ -172,11 +81,11 @@ int main(int argc, char**argv) int j = 0; DIR *dir = NULL; GThread *inotify_thread = NULL; + GThread *submit_thread = NULL; g_thread_init (NULL); core_status.processing_oops = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL); - core_status.queued_oops = g_hash_table_new(g_str_hash, g_str_equal); /* * Signal the kernel that we're not timing critical @@ -249,14 +158,11 @@ int main(int argc, char**argv) * We ignore this advice, since 99.99% of the time this program * will not use http at all, but the curl code does consume * memory. - */ - -/* curl_global_init(CURL_GLOBAL_ALL); -*/ + */ if (godaemon && daemon(0, 0)) { - fprintf(stderr, "corewatcher failed to daemonize.. exiting \n"); + fprintf(stderr, "corewatcher failed to daemonize..exiting\n"); return EXIT_FAILURE; } sched_yield(); @@ -264,8 +170,11 @@ int main(int argc, char**argv) loop = g_main_loop_new(NULL, FALSE); loop = g_main_loop_ref(loop); - if (!debug) - sleep(20); + submit_thread = g_thread_new("corewatcher submit", submit_loop, NULL); + if (submit_thread == NULL) { + fprintf(stderr, "+ Unable to start submit thread...exiting\n"); + return EXIT_FAILURE; + } scan_corefolders(NULL); @@ -303,8 +212,9 @@ out: for (j = 0; j < url_count; j++) free(submit_url[j]); + g_mutex_lock(&core_status.processing_mtx); g_hash_table_destroy(core_status.processing_oops); - g_hash_table_destroy(core_status.queued_oops); + g_mutex_unlock(&core_status.processing_mtx); return EXIT_SUCCESS; } diff --git a/src/corewatcher.h b/src/corewatcher.h index 0e28f68..d8e1c10 100644 --- a/src/corewatcher.h +++ b/src/corewatcher.h @@ -21,6 +21,7 @@ * Authors: * Arjan van de Ven <arjan@linux.intel.com> * William Douglas <william.douglas@intel.com> + * Tim Pepper <timothy.c.pepper@linux.intel.com> */ @@ -28,11 +29,10 @@ #define __INCLUDE_GUARD_KERNELOOPS_H_ /* borrowed from the kernel */ -#define barrier() __asm__ __volatile__("": : :"memory") #define __unused __attribute__ ((__unused__)) #define MAX_PROCESSING_OOPS 10 -#define MAX_URLS 9 +#define MAX_URLS 2 #define FREE_OOPS(oops) \ do { \ @@ -59,25 +59,23 @@ struct oops { queued_mtx -> processing_mtx -> gdb_mtx ->processing_queue_mtx */ struct core_status { GHashTable *processing_oops; - GHashTable *queued_oops; GMutex processing_mtx; - GMutex queued_mtx; }; +/* inotification.c */ +extern void *inotify_loop(void * unused); + /* submit.c */ -extern GMutex queued_bt_mtx; extern void queue_backtrace(struct oops *oops); -extern void submit_queue(void); extern char *replace_name(char *filename, char *replace, char *new); +extern void *submit_loop(void * unused); /* coredump.c */ -extern GMutex processing_queue_mtx; -extern GMutex gdb_mtx; extern int move_core(char *fullpath, char *ext); extern int scan_corefolders(void * unused); extern char *strip_directories(char *fullpath); extern char *get_core_filename(char *filename, char *ext); -extern void remove_pid_from_hash(char *fullpath, GHashTable *ht); +extern void remove_name_from_hash(char *fullpath, GHashTable *ht); extern int uid; extern int sig; extern const char *core_folder; diff --git a/src/inotification.c b/src/inotification.c new file mode 100644 index 0000000..7d788c0 --- /dev/null +++ b/src/inotification.c @@ -0,0 +1,132 @@ +#define _GNU_SOURCE +/* + * Copyright 2007, Intel Corporation + * + * This file is part of corewatcher.org + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + * + * Authors: + * Arjan van de Ven <arjan@linux.intel.com> + * William Douglas <william.douglas@intel.com> + * Tim Pepper <timothy.c.pepper@linux.intel.com> + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <glib.h> + +#include "corewatcher.h" + +/* + * rather than malloc() on each inotify event, preallocate a decent chunk + * of memory so multiple events can be read in one go, trading a little + * extra memory for less runtime overhead if/when multiple crashes happen + * in short order. + */ +#include <sys/inotify.h> +#define BUF_LEN 2048 + +gboolean inotify_source_prepare(__unused GSource *source, gint *timeout_) +{ + *timeout_ = 0; + fprintf(stderr, "+ inotification prepare\n"); + return FALSE; +} + +gboolean inotify_source_check(__unused GSource *source) +{ + int fd, wd; + char buffer[BUF_LEN]; + size_t len; + + fprintf(stderr, "+ inotification check\n"); + /* inotification of crashes */ + fd = inotify_init(); + if (fd < 0) { + fprintf(stderr, "corewatcher inotify init failed...exiting\n"); + return EXIT_FAILURE; + } + wd = inotify_add_watch(fd, core_folder, IN_CLOSE_WRITE); + if (wd < 0) { + fprintf(stderr, "corewatcher inotify add failed...exiting\n"); + return EXIT_FAILURE; + } + fprintf(stderr, "+ awaiting inotification...\n"); + len = read(fd, buffer, BUF_LEN); + if (len <=0 ) { + fprintf(stderr, "corewatcher inotify read failed...exiting\n"); + return FALSE; + } + fprintf(stderr, "+ inotification received!\n"); + /* for now simply ignore the actual crash files we've been notified of + * and let our callback be dispatched to go look for any/all crash + * files */ + + /* slight delay to minimize storms of notifications (the inotify + * read() can return a batch of notifications*/ + sleep(5); + return TRUE; +} + +gboolean inotify_source_dispatch(__unused GSource *source, + GSourceFunc callback, gpointer user_data) +{ + fprintf(stderr, "+ inotify dispatch\n"); + if(callback(user_data)) { + fprintf(stderr, "+ inotify dispatch success\n"); + return TRUE; + } else { + //should not happen as our callback always returns 1 + fprintf(stderr, "+ inotify dispatch failed\n"); + return FALSE; + } +} + +void *inotify_loop(void __unused *unused) +{ + /* inotification of crashes */ + GMainLoop *loop; + GMainContext *context; + GSource *source; + GSourceFuncs InotifySourceFuncs = { + inotify_source_prepare, + inotify_source_check, + inotify_source_dispatch, + NULL, + NULL, + NULL, + }; + + context = g_main_context_new(); + loop = g_main_loop_new(context, FALSE); + loop = g_main_loop_ref(loop); + source = g_source_new(&InotifySourceFuncs, sizeof(GSource)); + g_source_attach(source, context); + g_source_set_callback(source, scan_corefolders, NULL, NULL); + + fprintf(stderr, "+ inotify loop starting\n"); + g_main_loop_run(loop); + fprintf(stderr, "+ inotify loop finished\n"); + + g_main_loop_unref(loop); + + return NULL; +} + diff --git a/src/submit.c b/src/submit.c index a3c9d0d..dca9f11 100644 --- a/src/submit.c +++ b/src/submit.c @@ -22,6 +22,7 @@ * Authors: * Arjan van de Ven <arjan@linux.intel.com> * William Douglas <william.douglas@intel.com> + * Tim Pepper <timothy.c.pepper@linux.intel.com> */ #include <unistd.h> @@ -37,57 +38,37 @@ #include "corewatcher.h" -/* - * the application must initialize the GMutex queued_bt_mtx - * before calling into this file's functions - */ /* Always pick up the queued_mtx and then the queued_bt_mtx, reverse for setting down */ -GMutex queued_bt_mtx; -static struct oops *queued_backtraces = NULL; -static char result_url[4096]; +GMutex bt_mtx; +GCond bt_work; +GHashTable *bt_hash; +static struct oops *bt_list = NULL; /* - * Creates a duplicate of oops and adds it to - * the submit queue if the oops isn't already - * there. - * - * Expects the queued_mtx to be held - * Picks up and sets down the queued_bt_mtx. + * Adds an oops to the work queue if the oops + * isn't already there. */ void queue_backtrace(struct oops *oops) { - struct oops *new = NULL; - if (!oops || !oops->filename) return; - /* first, check if we haven't already submitted the oops */ + g_mutex_lock(&bt_mtx); - if (g_hash_table_lookup(core_status.queued_oops, oops->filename)) + /* if this is already on bt_list / bt_hash, free and done */ + if (g_hash_table_lookup(bt_hash, oops->filename)) { + FREE_OOPS(oops); + g_mutex_unlock(&bt_mtx); return; + } - new = malloc(sizeof(struct oops)); - if (!new) - return; - g_mutex_lock(&queued_bt_mtx); - new->next = queued_backtraces; - if (oops->application) - new->application = strdup(oops->application); - else - new->application = NULL; - if (oops->text) - new->text = strdup(oops->text); - else - new->text = NULL; - new->filename = strdup(oops->filename); - if (oops->detail_filename) - new->detail_filename = strdup(oops->detail_filename); - else - new->detail_filename = NULL; - queued_backtraces = new; - g_mutex_unlock(&queued_bt_mtx); - g_hash_table_insert(core_status.queued_oops, new->filename, new->filename); + /* otherwise add to bt_list / bt_hash, signal work */ + oops->next = bt_list; + bt_list = oops; + g_hash_table_insert(bt_hash, oops->filename, oops->filename); + g_cond_signal(&bt_work); + g_mutex_unlock(&bt_mtx); } /* @@ -95,19 +76,15 @@ void queue_backtrace(struct oops *oops) * be submitted. * * Picks up and sets down the processing_mtx. - * Picks up and sets down the queued_bt_mtx. - * Expects the queued_mtx to be held. + * Picks up and sets down the bt_mtx. */ static void print_queue(void) { - struct oops *oops = NULL, *next = NULL, *queue = NULL; + struct oops *oops = NULL, *next = NULL; int count = 0; - g_mutex_lock(&queued_bt_mtx); - queue = queued_backtraces; - queued_backtraces = NULL; - barrier(); - oops = queue; + g_mutex_lock(&bt_mtx); + oops = bt_list; while (oops) { fprintf(stderr, "+ Submit text is:\n---[start of oops]---\n%s\n---[end of oops]---\n", oops->text); next = oops->next; @@ -115,44 +92,41 @@ static void print_queue(void) oops = next; count++; } - g_mutex_unlock(&queued_bt_mtx); + g_hash_table_remove_all(bt_hash); + g_mutex_unlock(&bt_mtx); + g_mutex_lock(&core_status.processing_mtx); g_hash_table_remove_all(core_status.processing_oops); g_mutex_unlock(&core_status.processing_mtx); - - g_hash_table_remove_all(core_status.queued_oops); } -static void write_logfile(int count, char *wsubmit_url) +static size_t writefunction(void *ptr, size_t size, size_t nmemb, void __attribute((unused)) *stream) { - openlog("corewatcher", 0, LOG_KERN); - syslog(LOG_WARNING, "Submitted %i coredump signatures to %s", count, wsubmit_url); - closelog(); -} + char *httppost_ret = NULL; + char *errstr1 = NULL; + char *errstr2 = NULL; + int ret = 0; -static size_t writefunction( void *ptr, size_t size, size_t nmemb, void __attribute((unused)) *stream) -{ - char *c = NULL, *c1 = NULL, *c2 = NULL; - c = malloc(size * nmemb + 1); - if (!c) + httppost_ret = malloc(size * nmemb + 1); + if (!httppost_ret) return -1; - memset(c, 0, size * nmemb + 1); - memcpy(c, ptr, size * nmemb); - printf("received %s \n", c); - c1 = strstr(c, "201 "); - if (c1) { - c1 += 4; - if (c1 >= c + strlen(c)) { - free(c); - return -1; - } - c2 = strchr(c1, '\n'); - if (c2) *c2 = 0; - strncpy(result_url, c1, 4095); - } - free(c); - return size * nmemb; + memset(httppost_ret, 0, size * nmemb + 1); + memcpy(httppost_ret, ptr, size * nmemb); + + fprintf(stderr, "+ received:\n"); + fprintf(stderr, "%s", httppost_ret); + fprintf(stderr, "\n\n"); + + errstr1 = strstr(httppost_ret, "the server encountered an error"); + errstr2 = strstr(httppost_ret, "ScannerError at /crash_submit/"); + if (errstr1 || errstr2) + ret = -1; + else + ret = size * nmemb; + + free(httppost_ret); + return ret; } /* @@ -195,143 +169,143 @@ char *replace_name(char *filename, char *replace, char *new) } /* - * Attempts to send the oops queue to the submission url wsubmit_url, - * will use proxy if configured. + * Worker thread for submitting backtraces * + * Picks up and sets down the bt_mtx. * Picks up and sets down the processing_mtx. - * Expects queued_mtx to be held. */ -static void submit_queue_with_url(struct oops *queue, char *wsubmit_url, char *proxy) +void *submit_loop(void __unused *unused) { - int result = 0; + int i = 0, n = 0, sentcount, failcount; struct oops *oops = NULL; - CURL *handle = NULL; - int count = 0; - - handle = curl_easy_init(); - curl_easy_setopt(handle, CURLOPT_URL, wsubmit_url); - if (proxy) - curl_easy_setopt(handle, CURLOPT_PROXY, proxy); - - oops = queue; - while (oops) { - struct curl_httppost *post = NULL; - struct curl_httppost *last = NULL; - - /* set up the POST data */ - curl_formadd(&post, &last, - CURLFORM_COPYNAME, "crash", - CURLFORM_COPYCONTENTS, oops->text, CURLFORM_END); - - curl_easy_setopt(handle, CURLOPT_HTTPPOST, post); - curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, writefunction); - result = curl_easy_perform(handle); - curl_formfree(post); - - if (!result) { - char *nf = NULL; - nf = replace_name(oops->filename, ".processed", ".submitted"); - rename(oops->filename, nf); - g_mutex_lock(&core_status.processing_mtx); - remove_pid_from_hash(oops->filename, core_status.processing_oops); - g_mutex_unlock(&core_status.processing_mtx); - free(nf); - - g_hash_table_remove(core_status.queued_oops, oops->filename); - count++; - } else { - g_hash_table_remove(core_status.queued_oops, oops->filename); - queue_backtrace(oops); - } - oops = oops->next; - } - - curl_easy_cleanup(handle); - - if (count && !testmode) - write_logfile(count, wsubmit_url); -} - -/* - * Entry function for submitting oops data. - * - * Picks up and sets down the queued_mtx. - * Picks up and sets down the queued_bt_mtx. - */ -void submit_queue(void) -{ - int i = 0, n = 0, submit = 0; - struct oops *queue = NULL, *oops = NULL, *next = NULL; - CURL *handle = NULL; + struct oops *work_list = NULL; + struct oops *requeue_list = NULL; + char *newfilename = NULL; pxProxyFactory *pf = NULL; char **proxies = NULL; - char *proxy = NULL; - - g_mutex_lock(&core_status.queued_mtx); + int result = 0; + CURL *handle = NULL; + struct curl_httppost *post = NULL; + struct curl_httppost *last = NULL; - if (!g_hash_table_size(core_status.queued_oops)) { - g_mutex_unlock(&core_status.queued_mtx); - return; - } + fprintf(stderr, "+ Begin submit_loop()\n"); - memset(result_url, 0, 4096); + bt_hash = g_hash_table_new(g_str_hash, g_str_equal); if (testmode) { + fprintf(stderr, "+ The queue contains:\n"); print_queue(); - g_mutex_unlock(&core_status.queued_mtx); - return; + fprintf(stderr, "+ Leaving submit_loop(), testmode\n"); + return NULL; } - g_mutex_lock(&queued_bt_mtx); - queue = queued_backtraces; - queued_backtraces = NULL; - barrier(); - g_mutex_unlock(&queued_bt_mtx); - - pf = px_proxy_factory_new(); - handle = curl_easy_init(); - curl_easy_setopt(handle, CURLOPT_NOBODY, 1); - curl_easy_setopt(handle, CURLOPT_TIMEOUT, 5); - - for (i = 0; i < url_count; i++) { - curl_easy_setopt(handle, CURLOPT_URL, submit_url[i]); - if (pf) - proxies = px_proxy_factory_get_proxies(pf, submit_url[i]); - if (proxies) { - proxy = proxies[0]; - curl_easy_setopt(handle, CURLOPT_PROXY, proxy); - } else { - proxy = NULL; + while (1) { + g_mutex_lock(&bt_mtx); + while (!bt_list) { + if (requeue_list) { + bt_list = requeue_list; + requeue_list = NULL; + fprintf(stderr, "+ submit_loop() requeued old work, awaiting new work\n"); + } else { + fprintf(stderr, "+ submit_loop() queue empty, awaiting new work\n"); + } + g_cond_wait(&bt_work, &bt_mtx); } - if (!curl_easy_perform(handle)) { - submit_queue_with_url(queue, submit_url[i], proxy); - submit = 1; + fprintf(stderr, "+ submit_loop() checking for work\n"); + /* pull out current work and release the mutex */ + work_list = bt_list; + bt_list = NULL; + g_mutex_unlock(&bt_mtx); + + /* net init */ + handle = curl_easy_init(); + pf = px_proxy_factory_new(); + curl_easy_setopt(handle, CURLOPT_NOBODY, 1); + curl_easy_setopt(handle, CURLOPT_TIMEOUT, 5); + + sentcount = 0; + failcount = 0; + + /* try to find a good url/proxy combo */ + for (i = 0; i < url_count; i++) { + curl_easy_setopt(handle, CURLOPT_URL, submit_url[i]); + + /* use a proxy if one is present */ + if (pf) { + proxies = px_proxy_factory_get_proxies(pf, submit_url[i]); + if (proxies) + curl_easy_setopt(handle, CURLOPT_PROXY, proxies[0]); + } + + /* check the connection before POSTing form */ + result = curl_easy_perform(handle); + if (result) + continue; + + /* have a good url/proxy now...send reports there */ + while (work_list) { + oops = work_list; + work_list = oops->next; + oops->next = NULL; + + fprintf(stderr, "+ attempting to POST %s\n", oops->detail_filename); + + /* set up the POST data */ + curl_formadd(&post, &last, + CURLFORM_COPYNAME, "crash", + CURLFORM_COPYCONTENTS, oops->text, CURLFORM_END); + curl_easy_setopt(handle, CURLOPT_HTTPPOST, post); + curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, writefunction); + result = curl_easy_perform(handle); + curl_formfree(post); + + if (!result) { + fprintf(stderr, "+ successfully sent %s\n", oops->detail_filename); + sentcount++; + + newfilename = replace_name(oops->filename, ".processed", ".submitted"); + rename(oops->filename, newfilename); + free(newfilename); + + g_mutex_lock(&core_status.processing_mtx); + remove_name_from_hash(oops->filename, core_status.processing_oops); + g_mutex_unlock(&core_status.processing_mtx); + + g_mutex_lock(&bt_mtx); + g_hash_table_remove(bt_hash, oops->filename); + g_mutex_unlock(&bt_mtx); + FREE_OOPS(oops); + } else { + fprintf(stderr, "+ requeuing %s\n", oops->detail_filename); + failcount++; + + oops->next = requeue_list; + requeue_list = oops; + } + } + for (n = 0; proxies[n]; n++) free(proxies[n]); free(proxies); - break; - } - for (n = 0; proxies[n]; n++) - free(proxies[n]); - free(proxies); - } - - px_proxy_factory_free(pf); - if (submit) { - oops = queue; - while (oops) { - next = oops->next; - FREE_OOPS(oops); - oops = next; + openlog("corewatcher", 0, LOG_KERN); + if (sentcount) + syslog(LOG_WARNING, "Successful sent %d coredump signatures to %s", sentcount, submit_url[i]); + if (failcount) + syslog(LOG_WARNING, "Failed to send %d coredump signatures to %s", failcount, submit_url[i]); + closelog(); } - } else { - g_mutex_lock(&queued_bt_mtx); - queued_backtraces = queue; - g_mutex_unlock(&queued_bt_mtx); + + px_proxy_factory_free(pf); + curl_easy_cleanup(handle); } - curl_easy_cleanup(handle); + fprintf(stderr, "+ End submit_loop()\n"); + + /* curl docs say this is not thread safe...but we never get here*/ curl_global_cleanup(); - g_mutex_unlock(&core_status.queued_mtx); + + g_hash_table_destroy(bt_hash); + + return NULL; } |