summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm/include/asm/perf_event.h4
-rw-r--r--arch/frv/include/asm/perf_event.h2
-rw-r--r--arch/hexagon/include/asm/perf_event.h2
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h2
-rw-r--r--arch/powerpc/kernel/perf_event.c6
-rw-r--r--arch/s390/include/asm/perf_event.h1
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c82
-rw-r--r--arch/x86/kernel/cpu/perf_event.h8
-rw-r--r--include/linux/perf_event.h11
-rw-r--r--kernel/events/core.c53
-rw-r--r--kernel/events/hw_breakpoint.c7
-rw-r--r--tools/perf/builtin-test.c177
13 files changed, 328 insertions, 29 deletions
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 99cfe360798..7523340afb8 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,10 +12,6 @@
#ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__
-/* ARM performance counters start from 1 (in the cp15 accesses) so use the
- * same indexes here for consistency. */
-#define PERF_EVENT_INDEX_OFFSET 1
-
/* ARM perf PMU IDs for use by internal perf clients. */
enum arm_perf_pmu_ids {
ARM_PERF_PMU_ID_XSCALE1 = 0,
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h
index a69e0155d14..c52ea5546b5 100644
--- a/arch/frv/include/asm/perf_event.h
+++ b/arch/frv/include/asm/perf_event.h
@@ -12,6 +12,4 @@
#ifndef _ASM_PERF_EVENT_H
#define _ASM_PERF_EVENT_H
-#define PERF_EVENT_INDEX_OFFSET 0
-
#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h
index 6c2910f9118..8b8526b491c 100644
--- a/arch/hexagon/include/asm/perf_event.h
+++ b/arch/hexagon/include/asm/perf_event.h
@@ -19,6 +19,4 @@
#ifndef _ASM_PERF_EVENT_H
#define _ASM_PERF_EVENT_H
-#define PERF_EVENT_INDEX_OFFSET 0
-
#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 8f1df1208d2..1a8093fa8f7 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -61,8 +61,6 @@ struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs);
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-#define PERF_EVENT_INDEX_OFFSET 1
-
/*
* Only override the default definitions in include/linux/perf_event.h
* if we have hardware PMU support.
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 10a140f82cb..d614ab57ccc 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1187,6 +1187,11 @@ static int power_pmu_event_init(struct perf_event *event)
return err;
}
+static int power_pmu_event_idx(struct perf_event *event)
+{
+ return event->hw.idx;
+}
+
struct pmu power_pmu = {
.pmu_enable = power_pmu_enable,
.pmu_disable = power_pmu_disable,
@@ -1199,6 +1204,7 @@ struct pmu power_pmu = {
.start_txn = power_pmu_start_txn,
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
+ .event_idx = power_pmu_event_idx,
};
/*
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index a75f168d271..4eb444edbe4 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -6,4 +6,3 @@
/* Empty, just to avoid compiling error */
-#define PERF_EVENT_INDEX_OFFSET 0
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 096c975e099..9b922c13625 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void);
#ifdef CONFIG_PERF_EVENTS
extern void perf_events_lapic_init(void);
-#define PERF_EVENT_INDEX_OFFSET 0
-
/*
* Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
* This flag is otherwise unused and ABI specified to be 0, so nobody should
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5adce1040b1..f8bddb5b060 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/bitops.h>
+#include <linux/device.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -31,6 +32,7 @@
#include <asm/compat.h>
#include <asm/smp.h>
#include <asm/alternative.h>
+#include <asm/timer.h>
#include "perf_event.h"
@@ -1210,6 +1212,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
break;
case CPU_STARTING:
+ if (x86_pmu.attr_rdpmc)
+ set_in_cr4(X86_CR4_PCE);
if (x86_pmu.cpu_starting)
x86_pmu.cpu_starting(cpu);
break;
@@ -1319,6 +1323,8 @@ static int __init init_hw_perf_events(void)
}
}
+ x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1542,10 +1548,71 @@ static int x86_pmu_event_init(struct perf_event *event)
return err;
}
+static int x86_pmu_event_idx(struct perf_event *event)
+{
+ int idx = event->hw.idx;
+
+ if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
+ idx -= X86_PMC_IDX_FIXED;
+ idx |= 1 << 30;
+ }
+
+ return idx + 1;
+}
+
+static ssize_t get_attr_rdpmc(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
+}
+
+static void change_rdpmc(void *info)
+{
+ bool enable = !!(unsigned long)info;
+
+ if (enable)
+ set_in_cr4(X86_CR4_PCE);
+ else
+ clear_in_cr4(X86_CR4_PCE);
+}
+
+static ssize_t set_attr_rdpmc(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long val = simple_strtoul(buf, NULL, 0);
+
+ if (!!val != !!x86_pmu.attr_rdpmc) {
+ x86_pmu.attr_rdpmc = !!val;
+ smp_call_function(change_rdpmc, (void *)val, 1);
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
+
+static struct attribute *x86_pmu_attrs[] = {
+ &dev_attr_rdpmc.attr,
+ NULL,
+};
+
+static struct attribute_group x86_pmu_attr_group = {
+ .attrs = x86_pmu_attrs,
+};
+
+static const struct attribute_group *x86_pmu_attr_groups[] = {
+ &x86_pmu_attr_group,
+ NULL,
+};
+
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
+ .attr_groups = x86_pmu_attr_groups,
+
.event_init = x86_pmu_event_init,
.add = x86_pmu_add,
@@ -1557,8 +1624,23 @@ static struct pmu pmu = {
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
+
+ .event_idx = x86_pmu_event_idx,
};
+void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ return;
+
+ if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+ return;
+
+ userpg->time_mult = this_cpu_read(cyc2ns);
+ userpg->time_shift = CYC2NS_SCALE_FACTOR;
+ userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+}
+
/*
* callchain support
*/
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8944062f46e..513d617b93c 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -307,6 +307,14 @@ struct x86_pmu {
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
+ /*
+ * sysfs attrs
+ */
+ int attr_rdpmc;
+
+ /*
+ * CPU Hotplug hooks
+ */
int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 08855613ceb..0b91db2522c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -291,12 +291,14 @@ struct perf_event_mmap_page {
__s64 offset; /* add to hardware event value */
__u64 time_enabled; /* time event active */
__u64 time_running; /* time event on cpu */
+ __u32 time_mult, time_shift;
+ __u64 time_offset;
/*
* Hole for extension of the self monitor capabilities
*/
- __u64 __reserved[123]; /* align to 1k */
+ __u64 __reserved[121]; /* align to 1k */
/*
* Control data for the mmap() data buffer.
@@ -615,6 +617,7 @@ struct pmu {
struct list_head entry;
struct device *dev;
+ const struct attribute_group **attr_groups;
char *name;
int type;
@@ -680,6 +683,12 @@ struct pmu {
* for each successful ->add() during the transaction.
*/
void (*cancel_txn) (struct pmu *pmu); /* optional */
+
+ /*
+ * Will return the value for perf_event_mmap_page::index for this event,
+ * if no implementation is provided it will default to: event->hw.idx + 1.
+ */
+ int (*event_idx) (struct perf_event *event); /*optional */
};
/**
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 32b48c88971..de859fb4038 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3208,10 +3208,6 @@ int perf_event_task_disable(void)
return 0;
}
-#ifndef PERF_EVENT_INDEX_OFFSET
-# define PERF_EVENT_INDEX_OFFSET 0
-#endif
-
static int perf_event_index(struct perf_event *event)
{
if (event->hw.state & PERF_HES_STOPPED)
@@ -3220,21 +3216,26 @@ static int perf_event_index(struct perf_event *event)
if (event->state != PERF_EVENT_STATE_ACTIVE)
return 0;
- return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
+ return event->pmu->event_idx(event);
}
static void calc_timer_values(struct perf_event *event,
+ u64 *now,
u64 *enabled,
u64 *running)
{
- u64 now, ctx_time;
+ u64 ctx_time;
- now = perf_clock();
- ctx_time = event->shadow_ctx_time + now;
+ *now = perf_clock();
+ ctx_time = event->shadow_ctx_time + *now;
*enabled = ctx_time - event->tstamp_enabled;
*running = ctx_time - event->tstamp_running;
}
+void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+}
+
/*
* Callers need to ensure there can be no nesting of this function, otherwise
* the seqlock logic goes bad. We can not serialize this because the arch
@@ -3244,7 +3245,7 @@ void perf_event_update_userpage(struct perf_event *event)
{
struct perf_event_mmap_page *userpg;
struct ring_buffer *rb;
- u64 enabled, running;
+ u64 enabled, running, now;
rcu_read_lock();
/*
@@ -3256,7 +3257,7 @@ void perf_event_update_userpage(struct perf_event *event)
* because of locking issue as we can be called in
* NMI context
*/
- calc_timer_values(event, &enabled, &running);
+ calc_timer_values(event, &now, &enabled, &running);
rb = rcu_dereference(event->rb);
if (!rb)
goto unlock;
@@ -3272,7 +3273,7 @@ void perf_event_update_userpage(struct perf_event *event)
barrier();
userpg->index = perf_event_index(event);
userpg->offset = perf_event_count(event);
- if (event->state == PERF_EVENT_STATE_ACTIVE)
+ if (userpg->index)
userpg->offset -= local64_read(&event->hw.prev_count);
userpg->time_enabled = enabled +
@@ -3281,6 +3282,8 @@ void perf_event_update_userpage(struct perf_event *event)
userpg->time_running = running +
atomic64_read(&event->child_total_time_running);
+ perf_update_user_clock(userpg, now);
+
barrier();
++userpg->lock;
preempt_enable();
@@ -3538,6 +3541,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
event->mmap_user = get_current_user();
vma->vm_mm->pinned_vm += event->mmap_locked;
+ perf_event_update_userpage(event);
+
unlock:
if (!ret)
atomic_inc(&event->mmap_count);
@@ -3769,7 +3774,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
static void perf_output_read(struct perf_output_handle *handle,
struct perf_event *event)
{
- u64 enabled = 0, running = 0;
+ u64 enabled = 0, running = 0, now;
u64 read_format = event->attr.read_format;
/*
@@ -3782,7 +3787,7 @@ static void perf_output_read(struct perf_output_handle *handle,
* NMI context
*/
if (read_format & PERF_FORMAT_TOTAL_TIMES)
- calc_timer_values(event, &enabled, &running);
+ calc_timer_values(event, &now, &enabled, &running);
if (event->attr.read_format & PERF_FORMAT_GROUP)
perf_output_read_group(handle, event, enabled, running);
@@ -4994,6 +4999,11 @@ static int perf_swevent_init(struct perf_event *event)
return 0;
}
+static int perf_swevent_event_idx(struct perf_event *event)
+{
+ return 0;
+}
+
static struct pmu perf_swevent = {
.task_ctx_nr = perf_sw_context,
@@ -5003,6 +5013,8 @@ static struct pmu perf_swevent = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
+
+ .event_idx = perf_swevent_event_idx,
};
#ifdef CONFIG_EVENT_TRACING
@@ -5089,6 +5101,8 @@ static struct pmu perf_tracepoint = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
+
+ .event_idx = perf_swevent_event_idx,
};
static inline void perf_tp_register(void)
@@ -5308,6 +5322,8 @@ static struct pmu perf_cpu_clock = {
.start = cpu_clock_event_start,
.stop = cpu_clock_event_stop,
.read = cpu_clock_event_read,
+
+ .event_idx = perf_swevent_event_idx,
};
/*
@@ -5380,6 +5396,8 @@ static struct pmu perf_task_clock = {
.start = task_clock_event_start,
.stop = task_clock_event_stop,
.read = task_clock_event_read,
+
+ .event_idx = perf_swevent_event_idx,
};
static void perf_pmu_nop_void(struct pmu *pmu)
@@ -5407,6 +5425,11 @@ static void perf_pmu_cancel_txn(struct pmu *pmu)
perf_pmu_enable(pmu);
}
+static int perf_event_idx_default(struct perf_event *event)
+{
+ return event->hw.idx + 1;
+}
+
/*
* Ensures all contexts with the same task_ctx_nr have the same
* pmu_cpu_context too.
@@ -5493,6 +5516,7 @@ static int pmu_dev_alloc(struct pmu *pmu)
if (!pmu->dev)
goto out;
+ pmu->dev->groups = pmu->attr_groups;
device_initialize(pmu->dev);
ret = dev_set_name(pmu->dev, "%s", pmu->name);
if (ret)
@@ -5596,6 +5620,9 @@ got_cpu_context:
pmu->pmu_disable = perf_pmu_nop_void;
}
+ if (!pmu->event_idx)
+ pmu->event_idx = perf_event_idx_default;
+
list_add_rcu(&pmu->entry, &pmus);
ret = 0;
unlock:
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index b7971d6f38b..b0309f76d77 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -613,6 +613,11 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags)
bp->hw.state = PERF_HES_STOPPED;
}
+static int hw_breakpoint_event_idx(struct perf_event *bp)
+{
+ return 0;
+}
+
static struct pmu perf_breakpoint = {
.task_ctx_nr = perf_sw_context, /* could eventually get its own */
@@ -622,6 +627,8 @@ static struct pmu perf_breakpoint = {
.start = hw_breakpoint_start,
.stop = hw_breakpoint_stop,
.read = hw_breakpoint_pmu_read,
+
+ .event_idx = hw_breakpoint_event_idx,
};
int __init init_hw_breakpoint(void)
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 3ce709e9746..70c4eb2bdf7 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -15,6 +15,8 @@
#include "util/thread_map.h"
#include "../../include/linux/hw_breakpoint.h"
+#include <sys/mman.h>
+
static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym)
{
bool *visited = symbol__priv(sym);
@@ -1296,6 +1298,173 @@ out:
return (err < 0 || errs > 0) ? -1 : 0;
}
+
+#if defined(__x86_64__) || defined(__i386__)
+
+#define barrier() asm volatile("" ::: "memory")
+
+static u64 rdpmc(unsigned int counter)
+{
+ unsigned int low, high;
+
+ asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+ return low | ((u64)high) << 32;
+}
+
+static u64 rdtsc(void)
+{
+ unsigned int low, high;
+
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+ return low | ((u64)high) << 32;
+}
+
+static u64 mmap_read_self(void *addr)
+{
+ struct perf_event_mmap_page *pc = addr;
+ u32 seq, idx, time_mult = 0, time_shift = 0;
+ u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
+
+ do {
+ seq = pc->lock;
+ barrier();
+
+ enabled = pc->time_enabled;
+ running = pc->time_running;
+
+ if (enabled != running) {
+ cyc = rdtsc();
+ time_mult = pc->time_mult;
+ time_shift = pc->time_shift;
+ time_offset = pc->time_offset;
+ }
+
+ idx = pc->index;
+ count = pc->offset;
+ if (idx)
+ count += rdpmc(idx - 1);
+
+ barrier();
+ } while (pc->lock != seq);
+
+ if (enabled != running) {
+ u64 quot, rem;
+
+ quot = (cyc >> time_shift);
+ rem = cyc & ((1 << time_shift) - 1);
+ delta = time_offset + quot * time_mult +
+ ((rem * time_mult) >> time_shift);
+
+ enabled += delta;
+ if (idx)
+ running += delta;
+
+ quot = count / running;
+ rem = count % running;
+ count = quot * enabled + (rem * enabled) / running;
+ }
+
+ return count;
+}
+
+/*
+ * If the RDPMC instruction faults then signal this back to the test parent task:
+ */
+static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used)
+{
+ exit(-1);
+}
+
+static int __test__rdpmc(void)
+{
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ volatile int tmp = 0;
+ u64 i, loops = 1000;
+ int n;
+ int fd;
+ void *addr;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .exclude_kernel = 1,
+ };
+ u64 delta_sum = 0;
+ struct sigaction sa;
+
+ sigfillset(&sa.sa_mask);
+ sa.sa_sigaction = segfault_handler;
+ sigaction(SIGSEGV, &sa, NULL);
+
+ fprintf(stderr, "\n\n");
+
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd < 0) {
+ die("Error: sys_perf_event_open() syscall returned "
+ "with %d (%s)\n", fd, strerror(errno));
+ }
+
+ addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (addr == (void *)(-1)) {
+ die("Error: mmap() syscall returned "
+ "with (%s)\n", strerror(errno));
+ }
+
+ for (n = 0; n < 6; n++) {
+ u64 stamp, now, delta;
+
+ stamp = mmap_read_self(addr);
+
+ for (i = 0; i < loops; i++)
+ tmp++;
+
+ now = mmap_read_self(addr);
+ loops *= 10;
+
+ delta = now - stamp;
+ fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta);
+
+ delta_sum += delta;
+ }
+
+ munmap(addr, page_size);
+ close(fd);
+
+ fprintf(stderr, " ");
+
+ if (!delta_sum)
+ return -1;
+
+ return 0;
+}
+
+static int test__rdpmc(void)
+{
+ int status = 0;
+ int wret = 0;
+ int ret;
+ int pid;
+
+ pid = fork();
+ if (pid < 0)
+ return -1;
+
+ if (!pid) {
+ ret = __test__rdpmc();
+
+ exit(ret);
+ }
+
+ wret = waitpid(pid, &status, 0);
+ if (wret < 0 || status)
+ return -1;
+
+ return 0;
+}
+
+#endif
+
static struct test {
const char *desc;
int (*func)(void);
@@ -1320,6 +1489,12 @@ static struct test {
.desc = "parse events tests",
.func = test__parse_events,
},
+#if defined(__x86_64__) || defined(__i386__)
+ {
+ .desc = "x86 rdpmc test",
+ .func = test__rdpmc,
+ },
+#endif
{
.desc = "Validate PERF_RECORD_* events & perf_sample fields",
.func = test__PERF_RECORD,
@@ -1412,7 +1587,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
if (symbol__init() < 0)
return -1;
- setup_pager();
-
return __cmd_test(argc, argv);
}