summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Pepper <timothy.c.pepper@linux.intel.com>2012-10-03 11:09:31 -0700
committerTim Pepper <timothy.c.pepper@linux.intel.com>2012-10-03 11:50:30 -0700
commit285c61073dbfb39f25f013ede0da33a7c1f1bcec (patch)
tree9bc68bc58a97fd8ddf5f7fcde98f08c034ba22e8
parente798f389f5694e7a9910cdca71c657ccfb87e41c (diff)
downloadcorewatcher-285c61073dbfb39f25f013ede0da33a7c1f1bcec.tar.gz
corewatcher-285c61073dbfb39f25f013ede0da33a7c1f1bcec.tar.bz2
corewatcher-285c61073dbfb39f25f013ede0da33a7c1f1bcec.zip
Separate report submission into its own thread
The primary motivation of this commit is separated submit.c's submission queue handling into a separate thread which sleeps on a condition variable that is asserted in the inotify and timer event loops. Includes simpler data structures/locking for the submission queue. Associated refactoring includes: - pull inotification out into its own file to keep corewatcher.c simpler (subsequent event threads should be similarly separated) - add many stderr fprintf's to allow better tracking of where the code is during runtime when in non-daemon mode - log both failed and successful server submission counts - various bits of added error checking/handling - better document internally the expectations on core file naming, how the file name strings are transformed and stored in the internal lists/arrays/hashtables, and an audit to insure oops->filename is consistently populated and used - fixed a few possible memory leaks - updated some variable and function names to be more self documenting, added inline documentation in places - added design documentation covering the basic states, code that runs for transitions between states, global state data structures and the associated locks Signed-off-by: Tim Pepper <timothy.c.pepper@linux.intel.com>
-rw-r--r--configure.ac2
-rw-r--r--src/Makefile.am1
-rw-r--r--src/coredump.c171
-rw-r--r--src/corewatcher.c112
-rw-r--r--src/corewatcher.h16
-rw-r--r--src/inotification.c132
-rw-r--r--src/submit.c356
7 files changed, 419 insertions, 371 deletions
diff --git a/configure.ac b/configure.ac
index bfa03a6..96c1700 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,5 +1,5 @@
AC_PREREQ([2.68])
-AC_INIT([nitra-corewatcher],[0.9.4],[timothy.c.pepper@linux.intel.com])
+AC_INIT([nitra-corewatcher],[0.9.5],[timothy.c.pepper@linux.intel.com])
AM_INIT_AUTOMAKE([foreign -Wall -Werror])
AC_CONFIG_FILES([Makefile src/Makefile])
AC_CONFIG_SRCDIR([src/corewatcher.c])
diff --git a/src/Makefile.am b/src/Makefile.am
index 602695d..3a449e4 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -11,6 +11,7 @@ corewatcher_SOURCES = \
configfile.c \
coredump.c \
corewatcher.c \
+ inotification.c \
find_file.c \
submit.c
diff --git a/src/coredump.c b/src/coredump.c
index 76d20de..2990cc7 100644
--- a/src/coredump.c
+++ b/src/coredump.c
@@ -22,6 +22,7 @@
* Authors:
* Arjan van de Ven <arjan@linux.intel.com>
* William Douglas <william.douglas@intel.com>
+ * Tim Pepper <timothy.c.pepper@linux.intel.com>
*/
#include <unistd.h>
@@ -47,14 +48,6 @@ int sig = 0;
const char *core_folder = "/var/lib/corewatcher/";
const char *processed_folder = "/var/lib/corewatcher/processed/";
-/*
- * the application must initialize the GMutex's
- * core_status.processing_mtx, core_status.queued_mtx,
- * processing_queue_mtx and gdb_mtx
- * before calling into this file's scan_corefolders()
- * (also since that calls submit_queue() there are dependencies
- * there which need taken care of too)
- */
/* Always pick up the processing_mtx and then the
processing_queue_mtx, reverse for setting down */
/* Always pick up the gdb_mtx and then the
@@ -330,6 +323,8 @@ static struct oops *extract_core(char *fullpath, char *appfile)
int parsing_maps = 0;
struct stat stat_buf;
+ fprintf(stderr, "+ extract_core() called for %s\n", fullpath);
+
if (asprintf(&command, "LANG=C gdb --batch -f %s %s -x /etc/corewatcher/gdb.command 2> /dev/null", appfile, fullpath) == -1)
return NULL;
@@ -358,6 +353,8 @@ static struct oops *extract_core(char *fullpath, char *appfile)
h1 = strdup("Unknown");
file = popen(command, "r");
+ if (!file)
+ fprintf(stderr, "+ gdb failed for %s\n", fullpath);
while (file && !feof(file)) {
size_t size = 0;
@@ -374,6 +371,9 @@ static struct oops *extract_core(char *fullpath, char *appfile)
parsing_maps = 1;
/*continue;*/
}
+ if (strncmp(line, "No shared libraries loaded at this time.", 40) == 0) {
+ break;
+ }
if (!parsing_maps) { /* parsing backtrace */
c2 = c1;
@@ -414,6 +414,12 @@ fixup: /* gdb outputs some 0x1a's which break XML */
pclose(file);
free(command);
+ if (!h1)
+ h1 = strdup("Unknown");
+
+ if (!m1)
+ m1 = strdup(" Unknown");
+
ret = asprintf(&text,
"%s"
"backtrace: |\n"
@@ -440,52 +446,49 @@ fixup: /* gdb outputs some 0x1a's which break XML */
}
/*
- * filename is of the form core_XXXX[.blah]
- * we need to get the pid out as we want
- * output of the form XXXX[.ext]
+ * input filename has the form: core_$APP_$TIMESTAMP[.$PID]
+ * output filename has form of: $APP_$TIMESTAMP[.ext]
*/
char *get_core_filename(char *filename, char *ext)
{
- char *pid = NULL, *c = NULL, *s = NULL, *detail_filename = NULL;
+ char *name = NULL, *dotpid = NULL, *stamp = NULL, *detail_filename = NULL;
if (!filename)
return NULL;
- if (!(s = strstr(filename, "_")))
+ if (!(stamp = strstr(filename, "_")))
return NULL;
- if (!(++s))
- return NULL;
- /* causes valgrind whining because we copy from middle of a string */
- if (!(pid = strdup(s)))
+ if (!(++stamp))
return NULL;
- c = strstr(pid, ".");
+ if (!(name = strdup(stamp)))
+ return NULL;
- if (c)
- *c = '\0';
+ /* strip trailing .PID if present */
+ dotpid = strstr(name, ".");
+ if (dotpid)
+ *dotpid = '\0';
if (ext) {
- /* causes valgrind whining because we copy from middle of a string */
- if ((asprintf(&detail_filename, "%s%s.%s", processed_folder, pid, ext)) == -1) {
- free(pid);
+ if ((asprintf(&detail_filename, "%s%s.%s", processed_folder, name, ext)) == -1) {
+ free(name);
return NULL;
}
} else {
- /* causes valgrind whining because we copy from middle of a string */
- if ((asprintf(&detail_filename, "%s%s", processed_folder, pid)) == -1) {
- free(pid);
+ if ((asprintf(&detail_filename, "%s%s", processed_folder, name)) == -1) {
+ free(name);
return NULL;
}
}
+ free(name);
- free(pid);
return detail_filename;
}
/*
* Write the backtrace from the core file into a text
- * file named after the pid
+ * file named as $APP_$TIMESTAMP.txt
*/
static void write_core_detail_file(char *filename, char *text)
{
@@ -517,37 +520,49 @@ static void write_core_detail_file(char *filename, char *text)
*/
static void remove_from_processing_queue(void)
{
+ fprintf(stderr, "+ removing processing_queue head\n");
free(processing_queue[pq_head]);
processing_queue[pq_head++] = NULL;
- if (pq_head == MAX_PROCESSING_OOPS)
+ if (pq_head == MAX_PROCESSING_OOPS) {
+ fprintf(stderr, "+ wrapping processing_queue head to 0 (bugs here?)\n");
pq_head = 0;
+ }
}
/*
- * Removes file from processing_oops hash based on pid.
- * Extracts pid from the fullpath such that
- * /home/user/core.pid will be tranformed into just the pid.
+ * Removes file from processing_oops hash based on core name,
+ * extracting that core name from a fullpath such as
+ * "/${processed_folder}/core_$APP_$TIMESTAMP.$PID"
+ * in order to get just "$APP_$TIMESTAMP"
*
* Expects the lock on the given hash to be held.
*/
-void remove_pid_from_hash(char *fullpath, GHashTable *ht)
+void remove_name_from_hash(char *fullpath, GHashTable *ht)
{
- char *c1 = NULL, *c2 = NULL;
+ char *name = NULL, *corename = NULL, *shortname = NULL;
- if (!(c1 = strip_directories(fullpath)))
+ if (!(name = strip_directories(fullpath)))
return;
- if (!(c2 = get_core_filename(c1, NULL))) {
- free(c1);
+ if (!(corename = get_core_filename(name, NULL))) {
+ free(name);
return;
}
+ free(name);
- free(c1);
+ if (!(shortname = strip_directories(corename))) {
+ free(corename);
+ return;
+ }
+ free(corename);
- g_hash_table_remove(ht, c2);
+ if (g_hash_table_remove(ht, shortname))
+ fprintf(stderr, "+ core %s removed from processing_oops hash table\n", shortname);
+ else
+ fprintf(stderr, "+ core %s not in processing_oops hash table\n", shortname);
- free(c2);
+ free(shortname);
}
/*
@@ -581,13 +596,12 @@ static struct oops *process_common(char *fullpath)
/*
* Processes .to-process core files.
- * Also creates the pid.txt file and adds
+ * Also creates the $APP_$TIMESTAMP.txt file and adds
* the oops struct to the submit queue
*
* Picks up and sets down the gdb_mtx and
* picks up and sets down the processing_queue_mtx.
* (held at the same time in that order)
- * Also will pick up and sets down the queued_mtx.
*/
static void *process_new(void __unused *vp)
{
@@ -598,28 +612,40 @@ static void *process_new(void __unused *vp)
g_mutex_lock(&gdb_mtx);
g_mutex_lock(&processing_queue_mtx);
+ fprintf(stderr, "+ Entered process_new()\n");
+
if (!(fullpath = processing_queue[pq_head])) {
- /* something went quite wrong */
+ fprintf(stderr, "+ processing_queue corruption?\n");
g_mutex_unlock(&processing_queue_mtx);
g_mutex_unlock(&gdb_mtx);
g_mutex_unlock(&core_status.processing_mtx);
return NULL;
}
- if (!(corefn = strip_directories(fullpath)))
+ if (!(corefn = strip_directories(fullpath))) {
+ fprintf(stderr, "+ No corefile? (%s)\n", fullpath);
goto clean_process_new;
+ }
- if (!(procfn = replace_name(fullpath, ".to-process", ".processed")))
+ if (!(procfn = replace_name(fullpath, ".to-process", ".processed"))) {
+ fprintf(stderr, "+ Problems with filename manipulation for %s\n", corefn);
goto clean_process_new;
+ }
- if (!(oops = process_common(fullpath)))
+ if (!(oops = process_common(fullpath))) {
+ fprintf(stderr, "+ Problems processing %s\n", procfn);
goto clean_process_new;
+ }
- if (!(oops->detail_filename = get_core_filename(corefn, "txt")))
+ if (!(oops->detail_filename = get_core_filename(corefn, "txt"))) {
+ fprintf(stderr, "+ Problems with filename manipulation for %s\n", procfn);
goto clean_process_new;
+ }
- if (rename(fullpath, procfn))
+ if (rename(fullpath, procfn)) {
+ fprintf(stderr, "+ Unable to move %s to %s\n", fullpath, procfn);
goto clean_process_new;
+ }
free(oops->filename);
oops->filename = procfn;
@@ -632,20 +658,18 @@ static void *process_new(void __unused *vp)
write_core_detail_file(corefn, oops->text);
- g_mutex_lock(&core_status.queued_mtx);
queue_backtrace(oops);
- g_mutex_unlock(&core_status.queued_mtx);
- /* don't need to free procfn because was set to oops->filename and that gets free'd */
+ fprintf(stderr, "+ Leaving process_new() with %s queued\n", oops->detail_filename);
+
+ /* mustn't free procfn because it was hung on oops->filename */
free(corefn);
- FREE_OOPS(oops);
return NULL;
clean_process_new:
- remove_pid_from_hash(fullpath, core_status.processing_oops);
+ remove_name_from_hash(fullpath, core_status.processing_oops);
remove_from_processing_queue();
free(procfn);
- procfn = NULL; /* don't know if oops->filename == procfn so be safe */
free(corefn);
FREE_OOPS(oops);
g_mutex_unlock(&processing_queue_mtx);
@@ -660,7 +684,6 @@ clean_process_new:
* Picks up and sets down the gdb_mtx.
* Picks up and sets down the processing_queue_mtx.
* (held at the same time in that order)
- * Also will pick up and sets down the queued_mtx.
*/
static void *process_old(void __unused *vp)
{
@@ -670,37 +693,45 @@ static void *process_old(void __unused *vp)
g_mutex_lock(&gdb_mtx);
g_mutex_lock(&processing_queue_mtx);
+ fprintf(stderr, "+ Entered process_old()\n");
+
if (!(fullpath = processing_queue[pq_head])) {
- /* something went quite wrong */
+ fprintf(stderr, "+ processing_queue corruption?\n");
g_mutex_unlock(&processing_queue_mtx);
g_mutex_unlock(&gdb_mtx);
return NULL;
}
+ fprintf(stderr, "+ Reprocessing %s\n", fullpath);
- if (!(corefn = strip_directories(fullpath)))
+ if (!(corefn = strip_directories(fullpath))) {
+ fprintf(stderr, "+ No corefile? (%s)\n", fullpath);
goto clean_process_old;
+ }
- if (!(oops = process_common(fullpath)))
+ if (!(oops = process_common(fullpath))) {
+ fprintf(stderr, "+ Problems processing %s\n", corefn);
goto clean_process_old;
+ }
- if (!(oops->detail_filename = get_core_filename(corefn, "txt")))
+ if (!(oops->detail_filename = get_core_filename(corefn, "txt"))) {
+ fprintf(stderr, "+ Problems with filename manipulation for %s\n", corefn);
goto clean_process_old;
+ }
remove_from_processing_queue();
g_mutex_unlock(&processing_queue_mtx);
g_mutex_unlock(&gdb_mtx);
- g_mutex_lock(&core_status.queued_mtx);
queue_backtrace(oops);
- g_mutex_unlock(&core_status.queued_mtx);
+
+ fprintf(stderr, "+ Leaving process_old() with %s queued\n", oops->detail_filename);
free(corefn);
- FREE_OOPS(oops);
return NULL;
clean_process_old:
- remove_pid_from_hash(fullpath, core_status.processing_oops);
+ remove_name_from_hash(fullpath, core_status.processing_oops);
remove_from_processing_queue();
free(corefn);
FREE_OOPS(oops);
@@ -710,7 +741,7 @@ clean_process_old:
}
/*
- * Adds corefile (based on pid) to the processing_oops
+ * Adds corefile (based on name) to the processing_oops
* hash table if it is not already there, then
* tries to add to the processing_queue.
*
@@ -739,6 +770,7 @@ static int add_to_processing(char *fullpath)
g_mutex_lock(&core_status.processing_mtx);
if (g_hash_table_lookup(core_status.processing_oops, c2)) {
g_mutex_unlock(&core_status.processing_mtx);
+ fprintf(stderr, "+ ...name %s already in processing_oops hash table\n", c2);
goto clean_add_to_processing;
}
@@ -746,6 +778,7 @@ static int add_to_processing(char *fullpath)
if (processing_queue[pq_tail]) {
g_mutex_unlock(&processing_queue_mtx);
g_mutex_unlock(&core_status.processing_mtx);
+ fprintf(stderr, "+ ...processing_queue full\n");
goto clean_add_to_processing;
}
@@ -811,7 +844,7 @@ static void scan_core_folder(void __unused *unused)
dir = opendir(core_folder);
if (!dir)
return;
- fprintf(stderr, "+ scanning %s...\n", core_folder);
+ fprintf(stderr, "+ Begin scanning %s...\n", core_folder);
while(1) {
free(fullpath);
fullpath = NULL;
@@ -824,7 +857,7 @@ static void scan_core_folder(void __unused *unused)
if (strncmp(entry->d_name, "core_", 5))
continue;
- /* matched core_#### where #### is the pid of the process */
+ /* matched core_#### */
r = asprintf(&fullpath, "%s%s", core_folder, entry->d_name);
if (r == -1) {
fullpath = NULL;
@@ -848,6 +881,7 @@ static void scan_core_folder(void __unused *unused)
}
}
closedir(dir);
+ fprintf(stderr, "+ End scanning %s...\n", core_folder);
}
static void scan_processed_folder(void __unused *unused)
@@ -861,7 +895,7 @@ static void scan_processed_folder(void __unused *unused)
dir = opendir(processed_folder);
if (!dir)
return;
- fprintf(stderr, "+ scanning %s...\n", processed_folder);
+ fprintf(stderr, "+ Begin scanning %s...\n", processed_folder);
while(1) {
free(fullpath);
fullpath = NULL;
@@ -889,6 +923,7 @@ static void scan_processed_folder(void __unused *unused)
reprocess_corefile(fullpath);
}
closedir(dir);
+ fprintf(stderr, "+ End scanning %s...\n", processed_folder);
}
int scan_corefolders(void __unused *unused)
@@ -896,7 +931,5 @@ int scan_corefolders(void __unused *unused)
scan_core_folder(NULL);
scan_processed_folder(NULL);
- submit_queue();
-
return 1;
}
diff --git a/src/corewatcher.c b/src/corewatcher.c
index d7b9fa8..aed3de0 100644
--- a/src/corewatcher.c
+++ b/src/corewatcher.c
@@ -22,6 +22,7 @@
* Authors:
* Arjan van de Ven <arjan@linux.intel.com>
* William Douglas <william.douglas@intel.com>
+ * Tim Pepper <timothy.c.pepper@linux.intel.com>
*/
#include <unistd.h>
@@ -50,15 +51,6 @@
#include "corewatcher.h"
-/*
- * rather than malloc() on each inotify event, preallocate a decent chunk
- * of memory so multiple events can be read in one go, trading a little
- * extra memory for less runtime overhead if/when multiple crashes happen
- * in short order.
- */
-#include <sys/inotify.h>
-#define BUF_LEN 2048
-
static struct option opts[] = {
{ "nodaemon", 0, NULL, 'n' },
{ "debug", 0, NULL, 'd' },
@@ -81,89 +73,6 @@ static void usage(const char *name)
fprintf(stderr, " -h, --help Display this help message\n");
}
-gboolean inotify_source_prepare(__unused GSource *source, gint *timeout_)
-{
- *timeout_ = -1;
- fprintf(stderr, "+ inotification prepare\n");
- return FALSE;
-}
-
-gboolean inotify_source_check(__unused GSource *source)
-{
- int fd, wd;
- char buffer[BUF_LEN];
- size_t len;
-
- fprintf(stderr, "+ inotification check\n");
- /* inotification of crashes */
- fd = inotify_init();
- if (fd < 0) {
- fprintf(stderr, "corewatcher inotify init failed.. exiting\n");
- return EXIT_FAILURE;
- }
- wd = inotify_add_watch(fd, core_folder, IN_CLOSE_WRITE);
- if (wd < 0) {
- fprintf(stderr, "corewatcher inotify add failed.. exiting\n");
- return EXIT_FAILURE;
- }
- fprintf(stderr, "+ awaiting inotification...\n");
- len = read(fd, buffer, BUF_LEN);
- if (len <=0 ) {
- fprintf(stderr, "corewatcher inotify read failed.. exiting\n");
- return FALSE;
- }
- fprintf(stderr, "+ inotification received!\n");
- /* for now simply ignore the actual crash files we've been notified of
- * and let our callback be dispatched to go look for any/all crash
- * files */
-
- /* slight delay to minimize storms of notifications (the inotify
- * read() can return a batch of notifications*/
- sleep(5);
- return TRUE;
-}
-
-gboolean inotify_source_dispatch(__unused GSource *source,
- GSourceFunc callback, gpointer user_data)
-{
- fprintf(stderr, "+ inotify dispatch\n");
- if(callback(user_data)) {
- fprintf(stderr, "+ inotify dispatch success\n");
- return TRUE;
- } else {
- //should not happen as our callback always returns 1
- fprintf(stderr, "+ inotify dispatch failed.\n");
- return FALSE;
- }
-}
-
-void *inotify_loop(void __unused *unused)
-{
- /* inotification of crashes */
- GMainLoop *loop;
- GMainContext *context;
- GSource *source;
- GSourceFuncs InotifySourceFuncs = {
- inotify_source_prepare,
- inotify_source_check,
- inotify_source_dispatch,
- NULL,
- NULL,
- NULL,
- };
-
- context = g_main_context_new();
- loop = g_main_loop_new(context, FALSE);
- loop = g_main_loop_ref(loop);
- source = g_source_new(&InotifySourceFuncs, sizeof(GSource));
- g_source_attach(source, context);
- g_source_set_callback(source, scan_corefolders, NULL, NULL);
- g_main_loop_run(loop);
- g_main_loop_unref(loop);
-
- return NULL;
-}
-
int main(int argc, char**argv)
{
GMainLoop *loop;
@@ -172,11 +81,11 @@ int main(int argc, char**argv)
int j = 0;
DIR *dir = NULL;
GThread *inotify_thread = NULL;
+ GThread *submit_thread = NULL;
g_thread_init (NULL);
core_status.processing_oops = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
- core_status.queued_oops = g_hash_table_new(g_str_hash, g_str_equal);
/*
* Signal the kernel that we're not timing critical
@@ -249,14 +158,11 @@ int main(int argc, char**argv)
* We ignore this advice, since 99.99% of the time this program
* will not use http at all, but the curl code does consume
* memory.
- */
-
-/*
curl_global_init(CURL_GLOBAL_ALL);
-*/
+ */
if (godaemon && daemon(0, 0)) {
- fprintf(stderr, "corewatcher failed to daemonize.. exiting \n");
+ fprintf(stderr, "corewatcher failed to daemonize..exiting\n");
return EXIT_FAILURE;
}
sched_yield();
@@ -264,8 +170,11 @@ int main(int argc, char**argv)
loop = g_main_loop_new(NULL, FALSE);
loop = g_main_loop_ref(loop);
- if (!debug)
- sleep(20);
+ submit_thread = g_thread_new("corewatcher submit", submit_loop, NULL);
+ if (submit_thread == NULL) {
+ fprintf(stderr, "+ Unable to start submit thread...exiting\n");
+ return EXIT_FAILURE;
+ }
scan_corefolders(NULL);
@@ -303,8 +212,9 @@ out:
for (j = 0; j < url_count; j++)
free(submit_url[j]);
+ g_mutex_lock(&core_status.processing_mtx);
g_hash_table_destroy(core_status.processing_oops);
- g_hash_table_destroy(core_status.queued_oops);
+ g_mutex_unlock(&core_status.processing_mtx);
return EXIT_SUCCESS;
}
diff --git a/src/corewatcher.h b/src/corewatcher.h
index 0e28f68..d8e1c10 100644
--- a/src/corewatcher.h
+++ b/src/corewatcher.h
@@ -21,6 +21,7 @@
* Authors:
* Arjan van de Ven <arjan@linux.intel.com>
* William Douglas <william.douglas@intel.com>
+ * Tim Pepper <timothy.c.pepper@linux.intel.com>
*/
@@ -28,11 +29,10 @@
#define __INCLUDE_GUARD_KERNELOOPS_H_
/* borrowed from the kernel */
-#define barrier() __asm__ __volatile__("": : :"memory")
#define __unused __attribute__ ((__unused__))
#define MAX_PROCESSING_OOPS 10
-#define MAX_URLS 9
+#define MAX_URLS 2
#define FREE_OOPS(oops) \
do { \
@@ -59,25 +59,23 @@ struct oops {
queued_mtx -> processing_mtx -> gdb_mtx ->processing_queue_mtx */
struct core_status {
GHashTable *processing_oops;
- GHashTable *queued_oops;
GMutex processing_mtx;
- GMutex queued_mtx;
};
+/* inotification.c */
+extern void *inotify_loop(void * unused);
+
/* submit.c */
-extern GMutex queued_bt_mtx;
extern void queue_backtrace(struct oops *oops);
-extern void submit_queue(void);
extern char *replace_name(char *filename, char *replace, char *new);
+extern void *submit_loop(void * unused);
/* coredump.c */
-extern GMutex processing_queue_mtx;
-extern GMutex gdb_mtx;
extern int move_core(char *fullpath, char *ext);
extern int scan_corefolders(void * unused);
extern char *strip_directories(char *fullpath);
extern char *get_core_filename(char *filename, char *ext);
-extern void remove_pid_from_hash(char *fullpath, GHashTable *ht);
+extern void remove_name_from_hash(char *fullpath, GHashTable *ht);
extern int uid;
extern int sig;
extern const char *core_folder;
diff --git a/src/inotification.c b/src/inotification.c
new file mode 100644
index 0000000..7d788c0
--- /dev/null
+++ b/src/inotification.c
@@ -0,0 +1,132 @@
+#define _GNU_SOURCE
+/*
+ * Copyright 2007, Intel Corporation
+ *
+ * This file is part of corewatcher.org
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program in a file named COPYING; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ *
+ * Authors:
+ * Arjan van de Ven <arjan@linux.intel.com>
+ * William Douglas <william.douglas@intel.com>
+ * Tim Pepper <timothy.c.pepper@linux.intel.com>
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <glib.h>
+
+#include "corewatcher.h"
+
+/*
+ * rather than malloc() on each inotify event, preallocate a decent chunk
+ * of memory so multiple events can be read in one go, trading a little
+ * extra memory for less runtime overhead if/when multiple crashes happen
+ * in short order.
+ */
+#include <sys/inotify.h>
+#define BUF_LEN 2048
+
+gboolean inotify_source_prepare(__unused GSource *source, gint *timeout_)
+{
+ *timeout_ = 0;
+ fprintf(stderr, "+ inotification prepare\n");
+ return FALSE;
+}
+
+gboolean inotify_source_check(__unused GSource *source)
+{
+ int fd, wd;
+ char buffer[BUF_LEN];
+ size_t len;
+
+ fprintf(stderr, "+ inotification check\n");
+ /* inotification of crashes */
+ fd = inotify_init();
+ if (fd < 0) {
+ fprintf(stderr, "corewatcher inotify init failed...exiting\n");
+ return EXIT_FAILURE;
+ }
+ wd = inotify_add_watch(fd, core_folder, IN_CLOSE_WRITE);
+ if (wd < 0) {
+ fprintf(stderr, "corewatcher inotify add failed...exiting\n");
+ return EXIT_FAILURE;
+ }
+ fprintf(stderr, "+ awaiting inotification...\n");
+ len = read(fd, buffer, BUF_LEN);
+ if (len <=0 ) {
+ fprintf(stderr, "corewatcher inotify read failed...exiting\n");
+ return FALSE;
+ }
+ fprintf(stderr, "+ inotification received!\n");
+ /* for now simply ignore the actual crash files we've been notified of
+ * and let our callback be dispatched to go look for any/all crash
+ * files */
+
+ /* slight delay to minimize storms of notifications (the inotify
+ * read() can return a batch of notifications*/
+ sleep(5);
+ return TRUE;
+}
+
+gboolean inotify_source_dispatch(__unused GSource *source,
+ GSourceFunc callback, gpointer user_data)
+{
+ fprintf(stderr, "+ inotify dispatch\n");
+ if(callback(user_data)) {
+ fprintf(stderr, "+ inotify dispatch success\n");
+ return TRUE;
+ } else {
+ //should not happen as our callback always returns 1
+ fprintf(stderr, "+ inotify dispatch failed\n");
+ return FALSE;
+ }
+}
+
+void *inotify_loop(void __unused *unused)
+{
+ /* inotification of crashes */
+ GMainLoop *loop;
+ GMainContext *context;
+ GSource *source;
+ GSourceFuncs InotifySourceFuncs = {
+ inotify_source_prepare,
+ inotify_source_check,
+ inotify_source_dispatch,
+ NULL,
+ NULL,
+ NULL,
+ };
+
+ context = g_main_context_new();
+ loop = g_main_loop_new(context, FALSE);
+ loop = g_main_loop_ref(loop);
+ source = g_source_new(&InotifySourceFuncs, sizeof(GSource));
+ g_source_attach(source, context);
+ g_source_set_callback(source, scan_corefolders, NULL, NULL);
+
+ fprintf(stderr, "+ inotify loop starting\n");
+ g_main_loop_run(loop);
+ fprintf(stderr, "+ inotify loop finished\n");
+
+ g_main_loop_unref(loop);
+
+ return NULL;
+}
+
diff --git a/src/submit.c b/src/submit.c
index a3c9d0d..dca9f11 100644
--- a/src/submit.c
+++ b/src/submit.c
@@ -22,6 +22,7 @@
* Authors:
* Arjan van de Ven <arjan@linux.intel.com>
* William Douglas <william.douglas@intel.com>
+ * Tim Pepper <timothy.c.pepper@linux.intel.com>
*/
#include <unistd.h>
@@ -37,57 +38,37 @@
#include "corewatcher.h"
-/*
- * the application must initialize the GMutex queued_bt_mtx
- * before calling into this file's functions
- */
/* Always pick up the queued_mtx and then the
queued_bt_mtx, reverse for setting down */
-GMutex queued_bt_mtx;
-static struct oops *queued_backtraces = NULL;
-static char result_url[4096];
+GMutex bt_mtx;
+GCond bt_work;
+GHashTable *bt_hash;
+static struct oops *bt_list = NULL;
/*
- * Creates a duplicate of oops and adds it to
- * the submit queue if the oops isn't already
- * there.
- *
- * Expects the queued_mtx to be held
- * Picks up and sets down the queued_bt_mtx.
+ * Adds an oops to the work queue if the oops
+ * isn't already there.
*/
void queue_backtrace(struct oops *oops)
{
- struct oops *new = NULL;
-
if (!oops || !oops->filename)
return;
- /* first, check if we haven't already submitted the oops */
+ g_mutex_lock(&bt_mtx);
- if (g_hash_table_lookup(core_status.queued_oops, oops->filename))
+ /* if this is already on bt_list / bt_hash, free and done */
+ if (g_hash_table_lookup(bt_hash, oops->filename)) {
+ FREE_OOPS(oops);
+ g_mutex_unlock(&bt_mtx);
return;
+ }
- new = malloc(sizeof(struct oops));
- if (!new)
- return;
- g_mutex_lock(&queued_bt_mtx);
- new->next = queued_backtraces;
- if (oops->application)
- new->application = strdup(oops->application);
- else
- new->application = NULL;
- if (oops->text)
- new->text = strdup(oops->text);
- else
- new->text = NULL;
- new->filename = strdup(oops->filename);
- if (oops->detail_filename)
- new->detail_filename = strdup(oops->detail_filename);
- else
- new->detail_filename = NULL;
- queued_backtraces = new;
- g_mutex_unlock(&queued_bt_mtx);
- g_hash_table_insert(core_status.queued_oops, new->filename, new->filename);
+ /* otherwise add to bt_list / bt_hash, signal work */
+ oops->next = bt_list;
+ bt_list = oops;
+ g_hash_table_insert(bt_hash, oops->filename, oops->filename);
+ g_cond_signal(&bt_work);
+ g_mutex_unlock(&bt_mtx);
}
/*
@@ -95,19 +76,15 @@ void queue_backtrace(struct oops *oops)
* be submitted.
*
* Picks up and sets down the processing_mtx.
- * Picks up and sets down the queued_bt_mtx.
- * Expects the queued_mtx to be held.
+ * Picks up and sets down the bt_mtx.
*/
static void print_queue(void)
{
- struct oops *oops = NULL, *next = NULL, *queue = NULL;
+ struct oops *oops = NULL, *next = NULL;
int count = 0;
- g_mutex_lock(&queued_bt_mtx);
- queue = queued_backtraces;
- queued_backtraces = NULL;
- barrier();
- oops = queue;
+ g_mutex_lock(&bt_mtx);
+ oops = bt_list;
while (oops) {
fprintf(stderr, "+ Submit text is:\n---[start of oops]---\n%s\n---[end of oops]---\n", oops->text);
next = oops->next;
@@ -115,44 +92,41 @@ static void print_queue(void)
oops = next;
count++;
}
- g_mutex_unlock(&queued_bt_mtx);
+ g_hash_table_remove_all(bt_hash);
+ g_mutex_unlock(&bt_mtx);
+
g_mutex_lock(&core_status.processing_mtx);
g_hash_table_remove_all(core_status.processing_oops);
g_mutex_unlock(&core_status.processing_mtx);
-
- g_hash_table_remove_all(core_status.queued_oops);
}
-static void write_logfile(int count, char *wsubmit_url)
+static size_t writefunction(void *ptr, size_t size, size_t nmemb, void __attribute((unused)) *stream)
{
- openlog("corewatcher", 0, LOG_KERN);
- syslog(LOG_WARNING, "Submitted %i coredump signatures to %s", count, wsubmit_url);
- closelog();
-}
+ char *httppost_ret = NULL;
+ char *errstr1 = NULL;
+ char *errstr2 = NULL;
+ int ret = 0;
-static size_t writefunction( void *ptr, size_t size, size_t nmemb, void __attribute((unused)) *stream)
-{
- char *c = NULL, *c1 = NULL, *c2 = NULL;
- c = malloc(size * nmemb + 1);
- if (!c)
+ httppost_ret = malloc(size * nmemb + 1);
+ if (!httppost_ret)
return -1;
- memset(c, 0, size * nmemb + 1);
- memcpy(c, ptr, size * nmemb);
- printf("received %s \n", c);
- c1 = strstr(c, "201 ");
- if (c1) {
- c1 += 4;
- if (c1 >= c + strlen(c)) {
- free(c);
- return -1;
- }
- c2 = strchr(c1, '\n');
- if (c2) *c2 = 0;
- strncpy(result_url, c1, 4095);
- }
- free(c);
- return size * nmemb;
+ memset(httppost_ret, 0, size * nmemb + 1);
+ memcpy(httppost_ret, ptr, size * nmemb);
+
+ fprintf(stderr, "+ received:\n");
+ fprintf(stderr, "%s", httppost_ret);
+ fprintf(stderr, "\n\n");
+
+ errstr1 = strstr(httppost_ret, "the server encountered an error");
+ errstr2 = strstr(httppost_ret, "ScannerError at /crash_submit/");
+ if (errstr1 || errstr2)
+ ret = -1;
+ else
+ ret = size * nmemb;
+
+ free(httppost_ret);
+ return ret;
}
/*
@@ -195,143 +169,143 @@ char *replace_name(char *filename, char *replace, char *new)
}
/*
- * Attempts to send the oops queue to the submission url wsubmit_url,
- * will use proxy if configured.
+ * Worker thread for submitting backtraces
*
+ * Picks up and sets down the bt_mtx.
* Picks up and sets down the processing_mtx.
- * Expects queued_mtx to be held.
*/
-static void submit_queue_with_url(struct oops *queue, char *wsubmit_url, char *proxy)
+void *submit_loop(void __unused *unused)
{
- int result = 0;
+ int i = 0, n = 0, sentcount, failcount;
struct oops *oops = NULL;
- CURL *handle = NULL;
- int count = 0;
-
- handle = curl_easy_init();
- curl_easy_setopt(handle, CURLOPT_URL, wsubmit_url);
- if (proxy)
- curl_easy_setopt(handle, CURLOPT_PROXY, proxy);
-
- oops = queue;
- while (oops) {
- struct curl_httppost *post = NULL;
- struct curl_httppost *last = NULL;
-
- /* set up the POST data */
- curl_formadd(&post, &last,
- CURLFORM_COPYNAME, "crash",
- CURLFORM_COPYCONTENTS, oops->text, CURLFORM_END);
-
- curl_easy_setopt(handle, CURLOPT_HTTPPOST, post);
- curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, writefunction);
- result = curl_easy_perform(handle);
- curl_formfree(post);
-
- if (!result) {
- char *nf = NULL;
- nf = replace_name(oops->filename, ".processed", ".submitted");
- rename(oops->filename, nf);
- g_mutex_lock(&core_status.processing_mtx);
- remove_pid_from_hash(oops->filename, core_status.processing_oops);
- g_mutex_unlock(&core_status.processing_mtx);
- free(nf);
-
- g_hash_table_remove(core_status.queued_oops, oops->filename);
- count++;
- } else {
- g_hash_table_remove(core_status.queued_oops, oops->filename);
- queue_backtrace(oops);
- }
- oops = oops->next;
- }
-
- curl_easy_cleanup(handle);
-
- if (count && !testmode)
- write_logfile(count, wsubmit_url);
-}
-
-/*
- * Entry function for submitting oops data.
- *
- * Picks up and sets down the queued_mtx.
- * Picks up and sets down the queued_bt_mtx.
- */
-void submit_queue(void)
-{
- int i = 0, n = 0, submit = 0;
- struct oops *queue = NULL, *oops = NULL, *next = NULL;
- CURL *handle = NULL;
+ struct oops *work_list = NULL;
+ struct oops *requeue_list = NULL;
+ char *newfilename = NULL;
pxProxyFactory *pf = NULL;
char **proxies = NULL;
- char *proxy = NULL;
-
- g_mutex_lock(&core_status.queued_mtx);
+ int result = 0;
+ CURL *handle = NULL;
+ struct curl_httppost *post = NULL;
+ struct curl_httppost *last = NULL;
- if (!g_hash_table_size(core_status.queued_oops)) {
- g_mutex_unlock(&core_status.queued_mtx);
- return;
- }
+ fprintf(stderr, "+ Begin submit_loop()\n");
- memset(result_url, 0, 4096);
+ bt_hash = g_hash_table_new(g_str_hash, g_str_equal);
if (testmode) {
+ fprintf(stderr, "+ The queue contains:\n");
print_queue();
- g_mutex_unlock(&core_status.queued_mtx);
- return;
+ fprintf(stderr, "+ Leaving submit_loop(), testmode\n");
+ return NULL;
}
- g_mutex_lock(&queued_bt_mtx);
- queue = queued_backtraces;
- queued_backtraces = NULL;
- barrier();
- g_mutex_unlock(&queued_bt_mtx);
-
- pf = px_proxy_factory_new();
- handle = curl_easy_init();
- curl_easy_setopt(handle, CURLOPT_NOBODY, 1);
- curl_easy_setopt(handle, CURLOPT_TIMEOUT, 5);
-
- for (i = 0; i < url_count; i++) {
- curl_easy_setopt(handle, CURLOPT_URL, submit_url[i]);
- if (pf)
- proxies = px_proxy_factory_get_proxies(pf, submit_url[i]);
- if (proxies) {
- proxy = proxies[0];
- curl_easy_setopt(handle, CURLOPT_PROXY, proxy);
- } else {
- proxy = NULL;
+ while (1) {
+ g_mutex_lock(&bt_mtx);
+ while (!bt_list) {
+ if (requeue_list) {
+ bt_list = requeue_list;
+ requeue_list = NULL;
+ fprintf(stderr, "+ submit_loop() requeued old work, awaiting new work\n");
+ } else {
+ fprintf(stderr, "+ submit_loop() queue empty, awaiting new work\n");
+ }
+ g_cond_wait(&bt_work, &bt_mtx);
}
- if (!curl_easy_perform(handle)) {
- submit_queue_with_url(queue, submit_url[i], proxy);
- submit = 1;
+ fprintf(stderr, "+ submit_loop() checking for work\n");
+ /* pull out current work and release the mutex */
+ work_list = bt_list;
+ bt_list = NULL;
+ g_mutex_unlock(&bt_mtx);
+
+ /* net init */
+ handle = curl_easy_init();
+ pf = px_proxy_factory_new();
+ curl_easy_setopt(handle, CURLOPT_NOBODY, 1);
+ curl_easy_setopt(handle, CURLOPT_TIMEOUT, 5);
+
+ sentcount = 0;
+ failcount = 0;
+
+ /* try to find a good url/proxy combo */
+ for (i = 0; i < url_count; i++) {
+ curl_easy_setopt(handle, CURLOPT_URL, submit_url[i]);
+
+ /* use a proxy if one is present */
+ if (pf) {
+ proxies = px_proxy_factory_get_proxies(pf, submit_url[i]);
+ if (proxies)
+ curl_easy_setopt(handle, CURLOPT_PROXY, proxies[0]);
+ }
+
+ /* check the connection before POSTing form */
+ result = curl_easy_perform(handle);
+ if (result)
+ continue;
+
+ /* have a good url/proxy now...send reports there */
+ while (work_list) {
+ oops = work_list;
+ work_list = oops->next;
+ oops->next = NULL;
+
+ fprintf(stderr, "+ attempting to POST %s\n", oops->detail_filename);
+
+ /* set up the POST data */
+ curl_formadd(&post, &last,
+ CURLFORM_COPYNAME, "crash",
+ CURLFORM_COPYCONTENTS, oops->text, CURLFORM_END);
+ curl_easy_setopt(handle, CURLOPT_HTTPPOST, post);
+ curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, writefunction);
+ result = curl_easy_perform(handle);
+ curl_formfree(post);
+
+ if (!result) {
+ fprintf(stderr, "+ successfully sent %s\n", oops->detail_filename);
+ sentcount++;
+
+ newfilename = replace_name(oops->filename, ".processed", ".submitted");
+ rename(oops->filename, newfilename);
+ free(newfilename);
+
+ g_mutex_lock(&core_status.processing_mtx);
+ remove_name_from_hash(oops->filename, core_status.processing_oops);
+ g_mutex_unlock(&core_status.processing_mtx);
+
+ g_mutex_lock(&bt_mtx);
+ g_hash_table_remove(bt_hash, oops->filename);
+ g_mutex_unlock(&bt_mtx);
+ FREE_OOPS(oops);
+ } else {
+ fprintf(stderr, "+ requeuing %s\n", oops->detail_filename);
+ failcount++;
+
+ oops->next = requeue_list;
+ requeue_list = oops;
+ }
+ }
+
for (n = 0; proxies[n]; n++)
free(proxies[n]);
free(proxies);
- break;
- }
- for (n = 0; proxies[n]; n++)
- free(proxies[n]);
- free(proxies);
- }
-
- px_proxy_factory_free(pf);
- if (submit) {
- oops = queue;
- while (oops) {
- next = oops->next;
- FREE_OOPS(oops);
- oops = next;
+ openlog("corewatcher", 0, LOG_KERN);
+ if (sentcount)
+ syslog(LOG_WARNING, "Successful sent %d coredump signatures to %s", sentcount, submit_url[i]);
+ if (failcount)
+ syslog(LOG_WARNING, "Failed to send %d coredump signatures to %s", failcount, submit_url[i]);
+ closelog();
}
- } else {
- g_mutex_lock(&queued_bt_mtx);
- queued_backtraces = queue;
- g_mutex_unlock(&queued_bt_mtx);
+
+ px_proxy_factory_free(pf);
+ curl_easy_cleanup(handle);
}
- curl_easy_cleanup(handle);
+ fprintf(stderr, "+ End submit_loop()\n");
+
+ /* curl docs say this is not thread safe...but we never get here*/
curl_global_cleanup();
- g_mutex_unlock(&core_status.queued_mtx);
+
+ g_hash_table_destroy(bt_hash);
+
+ return NULL;
}