summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/perf_event.h12
-rw-r--r--kernel/perf_event.c255
-rw-r--r--tools/perf/Documentation/perf-record.txt4
-rw-r--r--tools/perf/builtin-record.c76
-rw-r--r--tools/perf/util/parse-events.c16
-rw-r--r--tools/perf/util/trace-event-parse.c2
6 files changed, 252 insertions, 113 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6e96cc8225d..bf896d0b2e9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -589,6 +589,14 @@ enum perf_group_flag {
PERF_GROUP_SOFTWARE = 0x1,
};
+#define SWEVENT_HLIST_BITS 8
+#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
+
+struct swevent_hlist {
+ struct hlist_head heads[SWEVENT_HLIST_SIZE];
+ struct rcu_head rcu_head;
+};
+
/**
* struct perf_event - performance event kernel representation:
*/
@@ -597,6 +605,7 @@ struct perf_event {
struct list_head group_entry;
struct list_head event_entry;
struct list_head sibling_list;
+ struct hlist_node hlist_entry;
int nr_siblings;
int group_flags;
struct perf_event *group_leader;
@@ -744,6 +753,9 @@ struct perf_cpu_context {
int active_oncpu;
int max_pertask;
int exclusive;
+ struct swevent_hlist *swevent_hlist;
+ struct mutex hlist_mutex;
+ int hlist_refcount;
/*
* Recursion avoidance:
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index fcf42dcd608..07b7a435bf0 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -16,6 +16,7 @@
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/slab.h>
+#include <linux/hash.h>
#include <linux/sysfs.h>
#include <linux/dcache.h>
#include <linux/percpu.h>
@@ -3966,36 +3967,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
perf_swevent_overflow(event, 0, nmi, data, regs);
}
-static int perf_swevent_is_counting(struct perf_event *event)
-{
- /*
- * The event is active, we're good!
- */
- if (event->state == PERF_EVENT_STATE_ACTIVE)
- return 1;
-
- /*
- * The event is off/error, not counting.
- */
- if (event->state != PERF_EVENT_STATE_INACTIVE)
- return 0;
-
- /*
- * The event is inactive, if the context is active
- * we're part of a group that didn't make it on the 'pmu',
- * not counting.
- */
- if (event->ctx->is_active)
- return 0;
-
- /*
- * We're inactive and the context is too, this means the
- * task is scheduled out, we're counting events that happen
- * to us, like migration events.
- */
- return 1;
-}
-
static int perf_tp_event_match(struct perf_event *event,
struct perf_sample_data *data);
@@ -4019,12 +3990,6 @@ static int perf_swevent_match(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
- if (event->cpu != -1 && event->cpu != smp_processor_id())
- return 0;
-
- if (!perf_swevent_is_counting(event))
- return 0;
-
if (event->attr.type != type)
return 0;
@@ -4041,18 +4006,53 @@ static int perf_swevent_match(struct perf_event *event,
return 1;
}
-static void perf_swevent_ctx_event(struct perf_event_context *ctx,
- enum perf_type_id type,
- u32 event_id, u64 nr, int nmi,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+static inline u64 swevent_hash(u64 type, u32 event_id)
{
+ u64 val = event_id | (type << 32);
+
+ return hash_64(val, SWEVENT_HLIST_BITS);
+}
+
+static struct hlist_head *
+find_swevent_head(struct perf_cpu_context *ctx, u64 type, u32 event_id)
+{
+ u64 hash;
+ struct swevent_hlist *hlist;
+
+ hash = swevent_hash(type, event_id);
+
+ hlist = rcu_dereference(ctx->swevent_hlist);
+ if (!hlist)
+ return NULL;
+
+ return &hlist->heads[hash];
+}
+
+static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
+ u64 nr, int nmi,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct perf_cpu_context *cpuctx;
struct perf_event *event;
+ struct hlist_node *node;
+ struct hlist_head *head;
- list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+ cpuctx = &__get_cpu_var(perf_cpu_context);
+
+ rcu_read_lock();
+
+ head = find_swevent_head(cpuctx, type, event_id);
+
+ if (!head)
+ goto end;
+
+ hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
if (perf_swevent_match(event, type, event_id, data, regs))
perf_swevent_add(event, nr, nmi, data, regs);
}
+end:
+ rcu_read_unlock();
}
int perf_swevent_get_recursion_context(void)
@@ -4090,27 +4090,6 @@ void perf_swevent_put_recursion_context(int rctx)
}
EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
-static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
- u64 nr, int nmi,
- struct perf_sample_data *data,
- struct pt_regs *regs)
-{
- struct perf_cpu_context *cpuctx;
- struct perf_event_context *ctx;
-
- cpuctx = &__get_cpu_var(perf_cpu_context);
- rcu_read_lock();
- perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
- nr, nmi, data, regs);
- /*
- * doesn't really matter which of the child contexts the
- * events ends up in.
- */
- ctx = rcu_dereference(current->perf_event_ctxp);
- if (ctx)
- perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
- rcu_read_unlock();
-}
void __perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr)
@@ -4136,16 +4115,28 @@ static void perf_swevent_read(struct perf_event *event)
static int perf_swevent_enable(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
+ struct perf_cpu_context *cpuctx;
+ struct hlist_head *head;
+
+ cpuctx = &__get_cpu_var(perf_cpu_context);
if (hwc->sample_period) {
hwc->last_period = hwc->sample_period;
perf_swevent_set_period(event);
}
+
+ head = find_swevent_head(cpuctx, event->attr.type, event->attr.config);
+ if (WARN_ON_ONCE(!head))
+ return -EINVAL;
+
+ hlist_add_head_rcu(&event->hlist_entry, head);
+
return 0;
}
static void perf_swevent_disable(struct perf_event *event)
{
+ hlist_del_rcu(&event->hlist_entry);
}
static const struct pmu perf_ops_generic = {
@@ -4173,15 +4164,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
perf_sample_data_init(&data, 0);
data.period = event->hw.last_period;
regs = get_irq_regs();
- /*
- * In case we exclude kernel IPs or are somehow not in interrupt
- * context, provide the next best thing, the user IP.
- */
- if ((event->attr.exclude_kernel || !regs) &&
- !event->attr.exclude_user)
- regs = task_pt_regs(current);
- if (regs) {
+ if (regs && !perf_exclude_event(event, regs)) {
if (!(event->attr.exclude_idle && current->pid == 0))
if (perf_event_overflow(event, 0, &data, regs))
ret = HRTIMER_NORESTART;
@@ -4329,6 +4313,105 @@ static const struct pmu perf_ops_task_clock = {
.read = task_clock_perf_event_read,
};
+static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
+{
+ struct swevent_hlist *hlist;
+
+ hlist = container_of(rcu_head, struct swevent_hlist, rcu_head);
+ kfree(hlist);
+}
+
+static void swevent_hlist_release(struct perf_cpu_context *cpuctx)
+{
+ struct swevent_hlist *hlist;
+
+ if (!cpuctx->swevent_hlist)
+ return;
+
+ hlist = cpuctx->swevent_hlist;
+ rcu_assign_pointer(cpuctx->swevent_hlist, NULL);
+ call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
+}
+
+static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
+{
+ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+
+ mutex_lock(&cpuctx->hlist_mutex);
+
+ if (!--cpuctx->hlist_refcount)
+ swevent_hlist_release(cpuctx);
+
+ mutex_unlock(&cpuctx->hlist_mutex);
+}
+
+static void swevent_hlist_put(struct perf_event *event)
+{
+ int cpu;
+
+ if (event->cpu != -1) {
+ swevent_hlist_put_cpu(event, event->cpu);
+ return;
+ }
+
+ for_each_possible_cpu(cpu)
+ swevent_hlist_put_cpu(event, cpu);
+}
+
+static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
+{
+ struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ int err = 0;
+
+ mutex_lock(&cpuctx->hlist_mutex);
+
+ if (!cpuctx->swevent_hlist && cpu_online(cpu)) {
+ struct swevent_hlist *hlist;
+
+ hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
+ if (!hlist) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
+ }
+ cpuctx->hlist_refcount++;
+ exit:
+ mutex_unlock(&cpuctx->hlist_mutex);
+
+ return err;
+}
+
+static int swevent_hlist_get(struct perf_event *event)
+{
+ int err;
+ int cpu, failed_cpu;
+
+ if (event->cpu != -1)
+ return swevent_hlist_get_cpu(event, event->cpu);
+
+ get_online_cpus();
+ for_each_possible_cpu(cpu) {
+ err = swevent_hlist_get_cpu(event, cpu);
+ if (err) {
+ failed_cpu = cpu;
+ goto fail;
+ }
+ }
+ put_online_cpus();
+
+ return 0;
+ fail:
+ for_each_possible_cpu(cpu) {
+ if (cpu == failed_cpu)
+ break;
+ swevent_hlist_put_cpu(event, cpu);
+ }
+
+ put_online_cpus();
+ return err;
+}
+
#ifdef CONFIG_EVENT_TRACING
void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
@@ -4362,10 +4445,13 @@ static int perf_tp_event_match(struct perf_event *event,
static void tp_perf_event_destroy(struct perf_event *event)
{
perf_trace_disable(event->attr.config);
+ swevent_hlist_put(event);
}
static const struct pmu *tp_perf_event_init(struct perf_event *event)
{
+ int err;
+
/*
* Raw tracepoint data is a severe data leak, only allow root to
* have these.
@@ -4379,6 +4465,11 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
return NULL;
event->destroy = tp_perf_event_destroy;
+ err = swevent_hlist_get(event);
+ if (err) {
+ perf_trace_disable(event->attr.config);
+ return ERR_PTR(err);
+ }
return &perf_ops_generic;
}
@@ -4479,6 +4570,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
WARN_ON(event->parent);
atomic_dec(&perf_swevent_enabled[event_id]);
+ swevent_hlist_put(event);
}
static const struct pmu *sw_perf_event_init(struct perf_event *event)
@@ -4517,6 +4609,12 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
case PERF_COUNT_SW_ALIGNMENT_FAULTS:
case PERF_COUNT_SW_EMULATION_FAULTS:
if (!event->parent) {
+ int err;
+
+ err = swevent_hlist_get(event);
+ if (err)
+ return ERR_PTR(err);
+
atomic_inc(&perf_swevent_enabled[event_id]);
event->destroy = sw_perf_event_destroy;
}
@@ -5389,6 +5487,7 @@ static void __init perf_event_init_all_cpus(void)
for_each_possible_cpu(cpu) {
cpuctx = &per_cpu(perf_cpu_context, cpu);
+ mutex_init(&cpuctx->hlist_mutex);
__perf_event_init_context(&cpuctx->ctx, NULL);
}
}
@@ -5402,6 +5501,16 @@ static void __cpuinit perf_event_init_cpu(int cpu)
spin_lock(&perf_resource_lock);
cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
spin_unlock(&perf_resource_lock);
+
+ mutex_lock(&cpuctx->hlist_mutex);
+ if (cpuctx->hlist_refcount > 0) {
+ struct swevent_hlist *hlist;
+
+ hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
+ WARN_ON_ONCE(!hlist);
+ rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
+ }
+ mutex_unlock(&cpuctx->hlist_mutex);
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -5421,6 +5530,10 @@ static void perf_event_exit_cpu(int cpu)
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
struct perf_event_context *ctx = &cpuctx->ctx;
+ mutex_lock(&cpuctx->hlist_mutex);
+ swevent_hlist_release(cpuctx);
+ mutex_unlock(&cpuctx->hlist_mutex);
+
mutex_lock(&ctx->mutex);
smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
mutex_unlock(&ctx->mutex);
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index fc46c0b40f6..020d871c793 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -58,7 +58,7 @@ OPTIONS
-f::
--force::
- Overwrite existing data file.
+ Overwrite existing data file. (deprecated)
-c::
--count=::
@@ -101,7 +101,7 @@ OPTIONS
-R::
--raw-samples::
-Collect raw sample records from all opened counters (typically for tracepoint counters).
+Collect raw sample records from all opened counters (default for tracepoint counters).
SEE ALSO
--------
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0bde31bc8e2..a1b99eeac3c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -26,13 +26,20 @@
#include <unistd.h>
#include <sched.h>
+enum write_mode_t {
+ WRITE_FORCE,
+ WRITE_APPEND
+};
+
static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
+static unsigned int user_interval = UINT_MAX;
static long default_interval = 0;
static int nr_cpus = 0;
static unsigned int page_size;
static unsigned int mmap_pages = 128;
+static unsigned int user_freq = UINT_MAX;
static int freq = 1000;
static int output;
static int pipe_output = 0;
@@ -48,8 +55,7 @@ static pid_t *all_tids = NULL;
static int thread_num = 0;
static pid_t child_pid = -1;
static bool inherit = true;
-static bool force = false;
-static bool append_file = false;
+static enum write_mode_t write_mode = WRITE_FORCE;
static bool call_graph = false;
static bool inherit_stat = false;
static bool no_samples = false;
@@ -257,10 +263,19 @@ static void create_counter(int counter, int cpu)
if (nr_counters > 1)
attr->sample_type |= PERF_SAMPLE_ID;
- if (freq) {
- attr->sample_type |= PERF_SAMPLE_PERIOD;
- attr->freq = 1;
- attr->sample_freq = freq;
+ /*
+ * We default some events to a 1 default interval. But keep
+ * it a weak assumption overridable by the user.
+ */
+ if (!attr->sample_period || (user_freq != UINT_MAX &&
+ user_interval != UINT_MAX)) {
+ if (freq) {
+ attr->sample_type |= PERF_SAMPLE_PERIOD;
+ attr->freq = 1;
+ attr->sample_freq = freq;
+ } else {
+ attr->sample_period = default_interval;
+ }
}
if (no_samples)
@@ -467,26 +482,19 @@ static int __cmd_record(int argc, const char **argv)
if (!strcmp(output_name, "-"))
pipe_output = 1;
else if (!stat(output_name, &st) && st.st_size) {
- if (!force) {
- if (!append_file) {
- pr_err("Error, output file %s exists, use -A "
- "to append or -f to overwrite.\n",
- output_name);
- exit(-1);
- }
- } else {
+ if (write_mode == WRITE_FORCE) {
char oldname[PATH_MAX];
snprintf(oldname, sizeof(oldname), "%s.old",
output_name);
unlink(oldname);
rename(output_name, oldname);
}
- } else {
- append_file = false;
+ } else if (write_mode == WRITE_APPEND) {
+ write_mode = WRITE_FORCE;
}
flags = O_CREAT|O_RDWR;
- if (append_file)
+ if (write_mode == WRITE_APPEND)
file_new = 0;
else
flags |= O_TRUNC;
@@ -500,7 +508,8 @@ static int __cmd_record(int argc, const char **argv)
exit(-1);
}
- session = perf_session__new(output_name, O_WRONLY, force);
+ session = perf_session__new(output_name, O_WRONLY,
+ write_mode == WRITE_FORCE);
if (session == NULL) {
pr_err("Not enough memory for reading perf file header\n");
return -1;
@@ -721,6 +730,8 @@ static const char * const record_usage[] = {
NULL
};
+static bool force, append_file;
+
static const struct option options[] = {
OPT_CALLBACK('e', "event", NULL, "event",
"event selector. use 'perf list' to list available events",
@@ -742,14 +753,14 @@ static const struct option options[] = {
OPT_INTEGER('C', "profile_cpu", &profile_cpu,
"CPU to profile on"),
OPT_BOOLEAN('f', "force", &force,
- "overwrite existing data file"),
- OPT_LONG('c', "count", &default_interval,
+ "overwrite existing data file (deprecated)"),
+ OPT_LONG('c', "count", &user_interval,
"event period to sample"),
OPT_STRING('o', "output", &output_name, "file",
"output file name"),
OPT_BOOLEAN('i', "inherit", &inherit,
"child tasks inherit counters"),
- OPT_INTEGER('F', "freq", &freq,
+ OPT_INTEGER('F', "freq", &user_freq,
"profile at this frequency"),
OPT_INTEGER('m', "mmap-pages", &mmap_pages,
"number of mmap data pages"),
@@ -770,7 +781,6 @@ static const struct option options[] = {
int cmd_record(int argc, const char **argv, const char *prefix __used)
{
- int counter;
int i,j;
argc = parse_options(argc, argv, options, record_usage,
@@ -779,6 +789,16 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
!system_wide && profile_cpu == -1)
usage_with_options(record_usage, options);
+ if (force && append_file) {
+ fprintf(stderr, "Can't overwrite and append at the same time."
+ " You need to choose between -f and -A");
+ usage_with_options(record_usage, options);
+ } else if (append_file) {
+ write_mode = WRITE_APPEND;
+ } else {
+ write_mode = WRITE_FORCE;
+ }
+
symbol__init();
if (!nr_counters) {
@@ -818,6 +838,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
if (!event_array)
return -ENOMEM;
+ if (user_interval != UINT_MAX)
+ default_interval = user_interval;
+ if (user_freq != UINT_MAX)
+ freq = user_freq;
+
/*
* User specified count overrides default frequency.
*/
@@ -830,12 +855,5 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
exit(EXIT_FAILURE);
}
- for (counter = 0; counter < nr_counters; counter++) {
- if (attrs[counter].sample_period)
- continue;
-
- attrs[counter].sample_period = default_interval;
- }
-
return __cmd_record(argc, argv);
}
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 435781e0c20..3b4ec679756 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -410,7 +410,6 @@ static enum event_result
parse_single_tracepoint_event(char *sys_name,
const char *evt_name,
unsigned int evt_length,
- char *flags,
struct perf_event_attr *attr,
const char **strp)
{
@@ -419,13 +418,11 @@ parse_single_tracepoint_event(char *sys_name,
u64 id;
int fd;
- if (flags) {
- if (!strncmp(flags, "record", strlen(flags))) {
- attr->sample_type |= PERF_SAMPLE_RAW;
- attr->sample_type |= PERF_SAMPLE_TIME;
- attr->sample_type |= PERF_SAMPLE_CPU;
- }
- }
+ attr->sample_type |= PERF_SAMPLE_RAW;
+ attr->sample_type |= PERF_SAMPLE_TIME;
+ attr->sample_type |= PERF_SAMPLE_CPU;
+
+ attr->sample_period = 1;
snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
sys_name, evt_name);
@@ -533,8 +530,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
flags);
} else
return parse_single_tracepoint_event(sys_name, evt_name,
- evt_length, flags,
- attr, strp);
+ evt_length, attr, strp);
}
static enum event_result
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 17d6d66ed76..d6ef414075a 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -761,7 +761,7 @@ static int field_is_string(struct format_field *field)
static int field_is_dynamic(struct format_field *field)
{
- if (!strcmp(field->type, "__data_loc"))
+ if (!strncmp(field->type, "__data_loc", 10))
return 1;
return 0;