diff options
author | Ingo Molnar <mingo@elte.hu> | 2012-03-12 20:46:35 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2012-03-12 20:47:05 +0100 |
commit | bea95c152dee1791dd02cbc708afbb115bb00f9a (patch) | |
tree | af9994c42c5fdd81ba3dadd7b812e2fa85273353 /tools | |
parent | f9b4eeb809c6d031cc9561cc34dd691701cb2c2a (diff) | |
parent | 24bff2dc0f77b1f186b7bdf30060caf3df191a68 (diff) | |
download | linux-3.10-bea95c152dee1791dd02cbc708afbb115bb00f9a.tar.gz linux-3.10-bea95c152dee1791dd02cbc708afbb115bb00f9a.tar.bz2 linux-3.10-bea95c152dee1791dd02cbc708afbb115bb00f9a.zip |
Merge branch 'perf/hw-branch-sampling' into perf/core
Merge reason: The 'perf record -b' hardware branch sampling feature is ready for upstream.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/perf/Documentation/perf-record.txt | 30 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-report.txt | 10 | ||||
-rw-r--r-- | tools/perf/builtin-record.c | 95 | ||||
-rw-r--r-- | tools/perf/builtin-report.c | 178 | ||||
-rw-r--r-- | tools/perf/perf.h | 18 | ||||
-rw-r--r-- | tools/perf/util/event.h | 1 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 14 | ||||
-rw-r--r-- | tools/perf/util/header.c | 207 | ||||
-rw-r--r-- | tools/perf/util/header.h | 2 | ||||
-rw-r--r-- | tools/perf/util/hist.c | 122 | ||||
-rw-r--r-- | tools/perf/util/hist.h | 11 | ||||
-rw-r--r-- | tools/perf/util/session.c | 77 | ||||
-rw-r--r-- | tools/perf/util/session.h | 4 | ||||
-rw-r--r-- | tools/perf/util/sort.c | 287 | ||||
-rw-r--r-- | tools/perf/util/sort.h | 11 | ||||
-rw-r--r-- | tools/perf/util/symbol.h | 20 | ||||
-rw-r--r-- | tools/perf/util/ui/browsers/hists.c | 102 |
17 files changed, 1023 insertions, 166 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index a5766b4b012..a1386b2fff0 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -152,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha corresponding events, i.e., they always refer to events defined earlier on the command line. +-b:: +--branch-any:: +Enable taken branch stack sampling. Any type of taken branch may be sampled. +This is a shortcut for --branch-filter any. See --branch-filter for more infos. + +-j:: +--branch-filter:: +Enable taken branch stack sampling. Each sample captures a series of consecutive +taken branches. The number of branches captured with each sample depends on the +underlying hardware, the type of branches of interest, and the executed code. +It is possible to select the types of branches captured by enabling filters. The +following filters are defined: + + - any: any type of branches + - any_call: any function call or system call + - any_ret: any function return or system call return + - any_ind: any indirect branch + - u: only when the branch target is at the user level + - k: only when the branch target is in the kernel + - hv: only when the target is at the hypervisor level + ++ +The option requires at least one branch type among any, any_call, any_ret, ind_call. +The privilege levels may be ommitted, in which case, the privilege levels of the associated +event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege +levels are subject to permissions. When sampling on multiple events, branch stack sampling +is enabled for all the sampling events. The sampled branch type is the same for all events. +The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k +Note that this feature may not be available on all processors. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 9b430e98712..87feeee8b90 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -153,6 +153,16 @@ OPTIONS information which may be very large and thus may clutter the display. It currently includes: cpu and numa topology of the host system. +-b:: +--branch-stack:: + Use the addresses of sampled taken branches instead of the instruction + address to build the histograms. To generate meaningful output, the + perf.data file must have been obtained using perf record -b or + perf record --branch-filter xxx where xxx is a branch filter option. + perf report is able to auto-detect whether a perf.data file contains + branch stacks and it will automatically switch to the branch view mode, + unless --no-branch-stack is used. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-annotate[1] diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 75d230fef20..be4e1eee782 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) if (!have_tracepoints(&evsel_list->entries)) perf_header__clear_feat(&session->header, HEADER_TRACE_INFO); + if (!rec->opts.branch_stack) + perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); + if (!rec->file_new) { err = perf_session__read_header(session, output); if (err < 0) @@ -638,6 +641,90 @@ out_delete_session: return err; } +#define BRANCH_OPT(n, m) \ + { .name = n, .mode = (m) } + +#define BRANCH_END { .name = NULL } + +struct branch_mode { + const char *name; + int mode; +}; + +static const struct branch_mode branch_modes[] = { + BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), + BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), + BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), + BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), + BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), + BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), + BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), + BRANCH_END +}; + +static int +parse_branch_stack(const struct option *opt, const char *str, int unset) +{ +#define ONLY_PLM \ + (PERF_SAMPLE_BRANCH_USER |\ + PERF_SAMPLE_BRANCH_KERNEL |\ + PERF_SAMPLE_BRANCH_HV) + + uint64_t *mode = (uint64_t *)opt->value; + const struct branch_mode *br; + char *s, *os = NULL, *p; + int ret = -1; + + if (unset) + return 0; + + /* + * cannot set it twice, -b + --branch-filter for instance + */ + if (*mode) + return -1; + + /* str may be NULL in case no arg is passed to -b */ + if (str) { + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + for (br = branch_modes; br->name; br++) { + if (!strcasecmp(s, br->name)) + break; + } + if (!br->name) { + ui__warning("unknown branch filter %s," + " check man page\n", s); + goto error; + } + + *mode |= br->mode; + + if (!p) + break; + + s = p + 1; + } + } + ret = 0; + + /* default to any branch */ + if ((*mode & ~ONLY_PLM) == 0) { + *mode = PERF_SAMPLE_BRANCH_ANY; + } +error: + free(os); + return ret; +} + static const char * const record_usage[] = { "perf record [<options>] [<command>]", "perf record [<options>] -- <command> [<options>]", @@ -727,6 +814,14 @@ const struct option record_options[] = { "monitor event in cgroup name only", parse_cgroups), OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), + + OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, + "branch any", "sample any taken branches", + parse_branch_stack), + + OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, + "branch filter mask", "branch stack filter modes", + parse_branch_stack), OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 25d34d483e4..8e91c6eba18 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -53,6 +53,82 @@ struct perf_report { DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; +static int perf_report__add_branch_hist_entry(struct perf_tool *tool, + struct addr_location *al, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct perf_report *rep = container_of(tool, struct perf_report, tool); + struct symbol *parent = NULL; + int err = 0; + unsigned i; + struct hist_entry *he; + struct branch_info *bi, *bx; + + if ((sort__has_parent || symbol_conf.use_callchain) + && sample->callchain) { + err = machine__resolve_callchain(machine, evsel, al->thread, + sample->callchain, &parent); + if (err) + return err; + } + + bi = machine__resolve_bstack(machine, al->thread, + sample->branch_stack); + if (!bi) + return -ENOMEM; + + for (i = 0; i < sample->branch_stack->nr; i++) { + if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) + continue; + /* + * The report shows the percentage of total branches captured + * and not events sampled. Thus we use a pseudo period of 1. + */ + he = __hists__add_branch_entry(&evsel->hists, al, parent, + &bi[i], 1); + if (he) { + struct annotation *notes; + err = -ENOMEM; + bx = he->branch_info; + if (bx->from.sym && use_browser > 0) { + notes = symbol__annotation(bx->from.sym); + if (!notes->src + && symbol__alloc_hist(bx->from.sym) < 0) + goto out; + + err = symbol__inc_addr_samples(bx->from.sym, + bx->from.map, + evsel->idx, + bx->from.al_addr); + if (err) + goto out; + } + + if (bx->to.sym && use_browser > 0) { + notes = symbol__annotation(bx->to.sym); + if (!notes->src + && symbol__alloc_hist(bx->to.sym) < 0) + goto out; + + err = symbol__inc_addr_samples(bx->to.sym, + bx->to.map, + evsel->idx, + bx->to.al_addr); + if (err) + goto out; + } + evsel->hists.stats.total_period += 1; + hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + err = 0; + } else + return -ENOMEM; + } +out: + return err; +} + static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, struct addr_location *al, struct perf_sample *sample, @@ -126,14 +202,21 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) return 0; - if (al.map != NULL) - al.map->dso->hit = 1; + if (sort__branch_mode == 1) { + if (perf_report__add_branch_hist_entry(tool, &al, sample, + evsel, machine)) { + pr_debug("problem adding lbr entry, skipping event\n"); + return -1; + } + } else { + if (al.map != NULL) + al.map->dso->hit = 1; - if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { - pr_debug("problem incrementing symbol period, skipping event\n"); - return -1; + if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { + pr_debug("problem incrementing symbol period, skipping event\n"); + return -1; + } } - return 0; } @@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep) } } + if (sort__branch_mode == 1) { + if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) { + fprintf(stderr, "selected -b but no branch data." + " Did you call perf record without" + " -b?\n"); + return -1; + } + } + return 0; } @@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep) { int ret = -EINVAL; u64 nr_samples; - struct perf_session *session; + struct perf_session *session = rep->session; struct perf_evsel *pos; struct map *kernel_map; struct kmap *kernel_kmap; @@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep) signal(SIGINT, sig_handler); - session = perf_session__new(rep->input_name, O_RDONLY, - rep->force, false, &rep->tool); - if (session == NULL) - return -ENOMEM; - - rep->session = session; - if (rep->cpu_list) { ret = perf_session__cpu_bitmap(session, rep->cpu_list, rep->cpu_bitmap); @@ -427,9 +512,19 @@ setup: return 0; } +static int +parse_branch_mode(const struct option *opt __used, const char *str __used, int unset) +{ + sort__branch_mode = !unset; + return 0; +} + int cmd_report(int argc, const char **argv, const char *prefix __used) { + struct perf_session *session; struct stat st; + bool has_br_stack = false; + int ret = -1; char callchain_default_opt[] = "fractal,0.5,callee"; const char * const report_usage[] = { "perf report [<options>]", @@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) OPT_BOOLEAN(0, "stdio", &report.use_stdio, "Use the stdio interface"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol, parent"), + "sort by key(s): pid, comm, dso, symbol, parent, dso_to," + " dso_from, symbol_to, symbol_from, mispredict"), OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_STRING('p', "parent", &parent_pattern, "regex", @@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), + OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", + "use branch records for histogram filling", parse_branch_mode), OPT_END() }; @@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) else report.input_name = "perf.data"; } + session = perf_session__new(report.input_name, O_RDONLY, + report.force, false, &report.tool); + if (session == NULL) + return -ENOMEM; - if (strcmp(report.input_name, "-") != 0) + report.session = session; + + has_br_stack = perf_header__has_feat(&session->header, + HEADER_BRANCH_STACK); + + if (sort__branch_mode == -1 && has_br_stack) + sort__branch_mode = 1; + + /* sort__branch_mode could be 0 if --no-branch-stack */ + if (sort__branch_mode == 1) { + /* + * if no sort_order is provided, then specify + * branch-mode specific order + */ + if (sort_order == default_sort_order) + sort_order = "comm,dso_from,symbol_from," + "dso_to,symbol_to"; + + } + + if (strcmp(report.input_name, "-") != 0) { setup_browser(true); - else + } else { use_browser = 0; + } /* * Only in the newt browser we are doing integrated annotation, @@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) } if (symbol__init() < 0) - return -1; + goto error; setup_sorting(report_usage, options); if (parent_pattern != default_parent_pattern) { if (sort_dimension__add("parent") < 0) - return -1; + goto error; /* * Only show the parent fields if we explicitly @@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) if (argc) usage_with_options(report_usage, options); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); - sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); - return __cmd_report(&report); + if (sort__branch_mode == 1) { + sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout); + sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout); + sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); + sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); + } else { + sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); + sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); + } + + ret = __cmd_report(&report); +error: + perf_session__delete(session); + return ret; } diff --git a/tools/perf/perf.h b/tools/perf/perf.h index f0227e93665..eec392e4806 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -179,6 +179,23 @@ struct ip_callchain { u64 ips[0]; }; +struct branch_flags { + u64 mispred:1; + u64 predicted:1; + u64 reserved:62; +}; + +struct branch_entry { + u64 from; + u64 to; + struct branch_flags flags; +}; + +struct branch_stack { + u64 nr; + struct branch_entry entries[0]; +}; + extern bool perf_host, perf_guest; extern const char perf_version_string[]; @@ -205,6 +222,7 @@ struct perf_record_opts { unsigned int freq; unsigned int mmap_pages; unsigned int user_freq; + int branch_stack; u64 default_interval; u64 user_interval; const char *cpu_list; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index cbdeaad9c5e..1b197280c62 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -81,6 +81,7 @@ struct perf_sample { u32 raw_size; void *raw_data; struct ip_callchain *callchain; + struct branch_stack *branch_stack; }; #define BUILD_ID_SIZE 20 diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 302d49a9f98..f421f7cbc0d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -126,6 +126,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) attr->watermark = 0; attr->wakeup_events = 1; } + if (opts->branch_stack) { + attr->sample_type |= PERF_SAMPLE_BRANCH_STACK; + attr->branch_sample_type = opts->branch_stack; + } attr->mmap = track; attr->comm = track; @@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, data->raw_data = (void *) pdata; } + if (type & PERF_SAMPLE_BRANCH_STACK) { + u64 sz; + + data->branch_stack = (struct branch_stack *)array; + array++; /* nr */ + + sz = data->branch_stack->nr * sizeof(struct branch_entry); + sz /= sizeof(u64); + array += sz; + } return 0; } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 9f867d96c6a..0d9b6da86a3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1023,6 +1023,12 @@ write_it: return do_write_string(fd, buffer); } +static int write_branch_stack(int fd __used, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + return 0; +} + static void print_hostname(struct perf_header *ph, int fd, FILE *fp) { char *str = do_read_string(fd, ph); @@ -1144,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) uint64_t id; void *buf = NULL; char *str; - u32 nre, sz, nr, i, j, msz; - int ret; + u32 nre, sz, nr, i, j; + ssize_t ret; + size_t msz; /* number of events */ ret = read(fd, &nre, sizeof(nre)); @@ -1162,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) if (ph->needs_swap) sz = bswap_32(sz); - /* - * ensure it is at least to our ABI rev - */ - if (sz < (u32)sizeof(attr)) - goto error; - memset(&attr, 0, sizeof(attr)); - /* read entire region to sync up to next field */ + /* buffer to hold on file attr struct */ buf = malloc(sz); if (!buf) goto error; msz = sizeof(attr); - if (sz < msz) + if (sz < (ssize_t)msz) msz = sz; for (i = 0 ; i < nre; i++) { + /* + * must read entire on-file attr struct to + * sync up with layout. + */ ret = read(fd, buf, sz); if (ret != (ssize_t)sz) goto error; @@ -1316,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) free(str); } +static void print_branch_stack(struct perf_header *ph __used, int fd __used, + FILE *fp) +{ + fprintf(fp, "# contains samples with branch stack\n"); +} + static int __event_process_build_id(struct build_id_event *bev, char *filename, struct perf_session *session) @@ -1520,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPA(HEADER_CMDLINE, cmdline), FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), + FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), }; struct header_print_data { @@ -1804,35 +1816,101 @@ out_free: return err; } -static int check_magic_endian(u64 *magic, struct perf_file_header *header, - struct perf_header *ph) +static const int attr_file_abi_sizes[] = { + [0] = PERF_ATTR_SIZE_VER0, + [1] = PERF_ATTR_SIZE_VER1, + 0, +}; + +/* + * In the legacy file format, the magic number is not used to encode endianness. + * hdr_sz was used to encode endianness. But given that hdr_sz can vary based + * on ABI revisions, we need to try all combinations for all endianness to + * detect the endianness. + */ +static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph) { - int ret; + uint64_t ref_size, attr_size; + int i; - /* check for legacy format */ - ret = memcmp(magic, __perf_magic1, sizeof(*magic)); - if (ret == 0) { - pr_debug("legacy perf.data format\n"); - if (!header) - return -1; + for (i = 0 ; attr_file_abi_sizes[i]; i++) { + ref_size = attr_file_abi_sizes[i] + + sizeof(struct perf_file_section); + if (hdr_sz != ref_size) { + attr_size = bswap_64(hdr_sz); + if (attr_size != ref_size) + continue; - if (header->attr_size != sizeof(struct perf_file_attr)) { - u64 attr_size = bswap_64(header->attr_size); + ph->needs_swap = true; + } + pr_debug("ABI%d perf.data file detected, need_swap=%d\n", + i, + ph->needs_swap); + return 0; + } + /* could not determine endianness */ + return -1; +} - if (attr_size != sizeof(struct perf_file_attr)) - return -1; +#define PERF_PIPE_HDR_VER0 16 + +static const size_t attr_pipe_abi_sizes[] = { + [0] = PERF_PIPE_HDR_VER0, + 0, +}; + +/* + * In the legacy pipe format, there is an implicit assumption that endiannesss + * between host recording the samples, and host parsing the samples is the + * same. This is not always the case given that the pipe output may always be + * redirected into a file and analyzed on a different machine with possibly a + * different endianness and perf_event ABI revsions in the perf tool itself. + */ +static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph) +{ + u64 attr_size; + int i; + + for (i = 0 ; attr_pipe_abi_sizes[i]; i++) { + if (hdr_sz != attr_pipe_abi_sizes[i]) { + attr_size = bswap_64(hdr_sz); + if (attr_size != hdr_sz) + continue; ph->needs_swap = true; } + pr_debug("Pipe ABI%d perf.data file detected\n", i); return 0; } + return -1; +} + +static int check_magic_endian(u64 magic, uint64_t hdr_sz, + bool is_pipe, struct perf_header *ph) +{ + int ret; + + /* check for legacy format */ + ret = memcmp(&magic, __perf_magic1, sizeof(magic)); + if (ret == 0) { + pr_debug("legacy perf.data format\n"); + if (is_pipe) + return try_all_pipe_abis(hdr_sz, ph); + + return try_all_file_abis(hdr_sz, ph); + } + /* + * the new magic number serves two purposes: + * - unique number to identify actual perf.data files + * - encode endianness of file + */ - /* check magic number with same endianness */ - if (*magic == __perf_magic2) + /* check magic number with one endianness */ + if (magic == __perf_magic2) return 0; - /* check magic number but opposite endianness */ - if (*magic != __perf_magic2_sw) + /* check magic number with opposite endianness */ + if (magic != __perf_magic2_sw) return -1; ph->needs_swap = true; @@ -1851,8 +1929,11 @@ int perf_file_header__read(struct perf_file_header *header, if (ret <= 0) return -1; - if (check_magic_endian(&header->magic, header, ph) < 0) + if (check_magic_endian(header->magic, + header->attr_size, false, ph) < 0) { + pr_debug("magic/endian check failed\n"); return -1; + } if (ph->needs_swap) { mem_bswap_64(header, offsetof(struct perf_file_header, @@ -1939,21 +2020,17 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, if (ret <= 0) return -1; - if (check_magic_endian(&header->magic, NULL, ph) < 0) + if (check_magic_endian(header->magic, header->size, true, ph) < 0) { + pr_debug("endian/magic failed\n"); return -1; + } + + if (ph->needs_swap) + header->size = bswap_64(header->size); if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) return -1; - if (header->size != sizeof(*header)) { - u64 size = bswap_64(header->size); - - if (size != sizeof(*header)) - return -1; - - ph->needs_swap = true; - } - return 0; } @@ -1973,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd) return 0; } +static int read_attr(int fd, struct perf_header *ph, + struct perf_file_attr *f_attr) +{ + struct perf_event_attr *attr = &f_attr->attr; + size_t sz, left; + size_t our_sz = sizeof(f_attr->attr); + int ret; + + memset(f_attr, 0, sizeof(*f_attr)); + + /* read minimal guaranteed structure */ + ret = readn(fd, attr, PERF_ATTR_SIZE_VER0); + if (ret <= 0) { + pr_debug("cannot read %d bytes of header attr\n", + PERF_ATTR_SIZE_VER0); + return -1; + } + + /* on file perf_event_attr size */ + sz = attr->size; + + if (ph->needs_swap) + sz = bswap_32(sz); + + if (sz == 0) { + /* assume ABI0 */ + sz = PERF_ATTR_SIZE_VER0; + } else if (sz > our_sz) { + pr_debug("file uses a more recent and unsupported ABI" + " (%zu bytes extra)\n", sz - our_sz); + return -1; + } + /* what we have not yet read and that we know about */ + left = sz - PERF_ATTR_SIZE_VER0; + if (left) { + void *ptr = attr; + ptr += PERF_ATTR_SIZE_VER0; + + ret = readn(fd, ptr, left); + } + /* read perf_file_section, ids are read in caller */ + ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids)); + + return ret <= 0 ? -1 : 0; +} + int perf_session__read_header(struct perf_session *session, int fd) { struct perf_header *header = &session->header; @@ -1988,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd) if (session->fd_pipe) return perf_header__read_pipe(session, fd); - if (perf_file_header__read(&f_header, header, fd) < 0) { - pr_debug("incompatible file format\n"); + if (perf_file_header__read(&f_header, header, fd) < 0) return -EINVAL; - } - nr_attrs = f_header.attrs.size / sizeof(f_attr); + nr_attrs = f_header.attrs.size / f_header.attr_size; lseek(fd, f_header.attrs.offset, SEEK_SET); for (i = 0; i < nr_attrs; i++) { struct perf_evsel *evsel; off_t tmp; - if (readn(fd, &f_attr, sizeof(f_attr)) <= 0) + if (read_attr(fd, header, &f_attr) < 0) goto out_errno; if (header->needs_swap) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index e68f617d082..21a6be09c12 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -27,7 +27,7 @@ enum { HEADER_EVENT_DESC, HEADER_CPU_TOPOLOGY, HEADER_NUMA_TOPOLOGY, - + HEADER_BRANCH_STACK, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6f505d1abac..8380c3db1c9 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists) hists__set_col_len(hists, col, 0); } +static void hists__set_unres_dso_col_len(struct hists *hists, int dso) +{ + const unsigned int unresolved_col_width = BITS_PER_LONG / 4; + + if (hists__col_len(hists, dso) < unresolved_col_width && + !symbol_conf.col_width_list_str && !symbol_conf.field_sep && + !symbol_conf.dso_list) + hists__set_col_len(hists, dso, unresolved_col_width); +} + static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) { + const unsigned int unresolved_col_width = BITS_PER_LONG / 4; u16 len; if (h->ms.sym) - hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen); - else { - const unsigned int unresolved_col_width = BITS_PER_LONG / 4; - - if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width && - !symbol_conf.col_width_list_str && !symbol_conf.field_sep && - !symbol_conf.dso_list) - hists__set_col_len(hists, HISTC_DSO, - unresolved_col_width); - } + hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4); + else + hists__set_unres_dso_col_len(hists, HISTC_DSO); len = thread__comm_len(h->thread); if (hists__new_col_len(hists, HISTC_COMM, len)) @@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) len = dso__name_len(h->ms.map->dso); hists__new_col_len(hists, HISTC_DSO, len); } + + if (h->branch_info) { + int symlen; + /* + * +4 accounts for '[x] ' priv level info + * +2 account of 0x prefix on raw addresses + */ + if (h->branch_info->from.sym) { + symlen = (int)h->branch_info->from.sym->namelen + 4; + hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); + + symlen = dso__name_len(h->branch_info->from.map->dso); + hists__new_col_len(hists, HISTC_DSO_FROM, symlen); + } else { + symlen = unresolved_col_width + 4 + 2; + hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); + hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM); + } + + if (h->branch_info->to.sym) { + symlen = (int)h->branch_info->to.sym->namelen + 4; + hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); + + symlen = dso__name_len(h->branch_info->to.map->dso); + hists__new_col_len(hists, HISTC_DSO_TO, symlen); + } else { + symlen = unresolved_col_width + 4 + 2; + hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); + hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); + } + } } static void hist_entry__add_cpumode_period(struct hist_entry *he, @@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent) return 0; } -struct hist_entry *__hists__add_entry(struct hists *hists, +static struct hist_entry *add_hist_entry(struct hists *hists, + struct hist_entry *entry, struct addr_location *al, - struct symbol *sym_parent, u64 period) + u64 period) { struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; - struct hist_entry entry = { - .thread = al->thread, - .ms = { - .map = al->map, - .sym = al->sym, - }, - .cpu = al->cpu, - .ip = al->addr, - .level = al->level, - .period = period, - .parent = sym_parent, - .filtered = symbol__parent_filter(sym_parent), - }; int cmp; pthread_mutex_lock(&hists->lock); @@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, parent = *p; he = rb_entry(parent, struct hist_entry, rb_node_in); - cmp = hist_entry__cmp(&entry, he); + cmp = hist_entry__cmp(entry, he); if (!cmp) { he->period += period; @@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, p = &(*p)->rb_right; } - he = hist_entry__new(&entry); + he = hist_entry__new(entry); if (!he) goto out_unlock; @@ -252,6 +275,51 @@ out_unlock: return he; } +struct hist_entry *__hists__add_branch_entry(struct hists *self, + struct addr_location *al, + struct symbol *sym_parent, + struct branch_info *bi, + u64 period) +{ + struct hist_entry entry = { + .thread = al->thread, + .ms = { + .map = bi->to.map, + .sym = bi->to.sym, + }, + .cpu = al->cpu, + .ip = bi->to.addr, + .level = al->level, + .period = period, + .parent = sym_parent, + .filtered = symbol__parent_filter(sym_parent), + .branch_info = bi, + }; + + return add_hist_entry(self, &entry, al, period); +} + +struct hist_entry *__hists__add_entry(struct hists *self, + struct addr_location *al, + struct symbol *sym_parent, u64 period) +{ + struct hist_entry entry = { + .thread = al->thread, + .ms = { + .map = al->map, + .sym = al->sym, + }, + .cpu = al->cpu, + .ip = al->addr, + .level = al->level, + .period = period, + .parent = sym_parent, + .filtered = symbol__parent_filter(sym_parent), + }; + + return add_hist_entry(self, &entry, al, period); +} + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 48e5acd1e86..9413f3e31fe 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -42,6 +42,11 @@ enum hist_column { HISTC_COMM, HISTC_PARENT, HISTC_CPU, + HISTC_MISPREDICT, + HISTC_SYMBOL_FROM, + HISTC_SYMBOL_TO, + HISTC_DSO_FROM, + HISTC_DSO_TO, HISTC_NR_COLS, /* Last entry */ }; @@ -74,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, struct hists *hists); void hist_entry__free(struct hist_entry *); +struct hist_entry *__hists__add_branch_entry(struct hists *self, + struct addr_location *al, + struct symbol *sym_parent, + struct branch_info *bi, + u64 period); + void hists__output_resort(struct hists *self); void hists__output_resort_threaded(struct hists *hists); void hists__collapse_resort(struct hists *self); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 9f833cf9c6a..002ebbf59f4 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force) self->fd = STDIN_FILENO; if (perf_session__read_header(self, self->fd) < 0) - pr_err("incompatible file format"); + pr_err("incompatible file format (rerun with -v to learn more)"); return 0; } @@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force) } if (perf_session__read_header(self, self->fd) < 0) { - pr_err("incompatible file format"); + pr_err("incompatible file format (rerun with -v to learn more)"); goto out_close; } @@ -229,6 +229,64 @@ static bool symbol__match_parent_regex(struct symbol *sym) return 0; } +static const u8 cpumodes[] = { + PERF_RECORD_MISC_USER, + PERF_RECORD_MISC_KERNEL, + PERF_RECORD_MISC_GUEST_USER, + PERF_RECORD_MISC_GUEST_KERNEL +}; +#define NCPUMODES (sizeof(cpumodes)/sizeof(u8)) + +static void ip__resolve_ams(struct machine *self, struct thread *thread, + struct addr_map_symbol *ams, + u64 ip) +{ + struct addr_location al; + size_t i; + u8 m; + + memset(&al, 0, sizeof(al)); + + for (i = 0; i < NCPUMODES; i++) { + m = cpumodes[i]; + /* + * We cannot use the header.misc hint to determine whether a + * branch stack address is user, kernel, guest, hypervisor. + * Branches may straddle the kernel/user/hypervisor boundaries. + * Thus, we have to try consecutively until we find a match + * or else, the symbol is unknown + */ + thread__find_addr_location(thread, self, m, MAP__FUNCTION, + ip, &al, NULL); + if (al.sym) + goto found; + } +found: + ams->addr = ip; + ams->al_addr = al.addr; + ams->sym = al.sym; + ams->map = al.map; +} + +struct branch_info *machine__resolve_bstack(struct machine *self, + struct thread *thr, + struct branch_stack *bs) +{ + struct branch_info *bi; + unsigned int i; + + bi = calloc(bs->nr, sizeof(struct branch_info)); + if (!bi) + return NULL; + + for (i = 0; i < bs->nr; i++) { + ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to); + ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from); + bi[i].flags = bs->entries[i].flags; + } + return bi; +} + int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, struct thread *thread, struct ip_callchain *chain, @@ -697,6 +755,18 @@ static void callchain__printf(struct perf_sample *sample) i, sample->callchain->ips[i]); } +static void branch_stack__printf(struct perf_sample *sample) +{ + uint64_t i; + + printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); + + for (i = 0; i < sample->branch_stack->nr; i++) + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", + i, sample->branch_stack->entries[i].from, + sample->branch_stack->entries[i].to); +} + static void perf_session__print_tstamp(struct perf_session *session, union perf_event *event, struct perf_sample *sample) @@ -744,6 +814,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event, if (session->sample_type & PERF_SAMPLE_CALLCHAIN) callchain__printf(sample); + + if (session->sample_type & PERF_SAMPLE_BRANCH_STACK) + branch_stack__printf(sample); } static struct machine * diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index c8d90178e7d..7a5434c0056 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel struct ip_callchain *chain, struct symbol **parent); +struct branch_info *machine__resolve_bstack(struct machine *self, + struct thread *thread, + struct branch_stack *bs); + bool perf_session__has_traces(struct perf_session *self, const char *msg); void mem_bswap_64(void *src, int byte_size); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 16da30d8d76..88dbcf6f957 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -8,6 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol"; const char *sort_order = default_sort_order; int sort__need_collapse = 0; int sort__has_parent = 0; +int sort__branch_mode = -1; /* -1 = means not set */ enum sort_type sort__first_dimension; @@ -94,6 +95,26 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf, return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); } +static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r) +{ + struct dso *dso_l = map_l ? map_l->dso : NULL; + struct dso *dso_r = map_r ? map_r->dso : NULL; + const char *dso_name_l, *dso_name_r; + + if (!dso_l || !dso_r) + return cmp_null(dso_l, dso_r); + + if (verbose) { + dso_name_l = dso_l->long_name; + dso_name_r = dso_r->long_name; + } else { + dso_name_l = dso_l->short_name; + dso_name_r = dso_r->short_name; + } + + return strcmp(dso_name_l, dso_name_r); +} + struct sort_entry sort_comm = { .se_header = "Command", .se_cmp = sort__comm_cmp, @@ -107,36 +128,74 @@ struct sort_entry sort_comm = { static int64_t sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) { - struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL; - struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL; - const char *dso_name_l, *dso_name_r; + return _sort__dso_cmp(left->ms.map, right->ms.map); +} - if (!dso_l || !dso_r) - return cmp_null(dso_l, dso_r); - if (verbose) { - dso_name_l = dso_l->long_name; - dso_name_r = dso_r->long_name; - } else { - dso_name_l = dso_l->short_name; - dso_name_r = dso_r->short_name; +static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r, + u64 ip_l, u64 ip_r) +{ + if (!sym_l || !sym_r) + return cmp_null(sym_l, sym_r); + + if (sym_l == sym_r) + return 0; + + if (sym_l) + ip_l = sym_l->start; + if (sym_r) + ip_r = sym_r->start; + + return (int64_t)(ip_r - ip_l); +} + +static int _hist_entry__dso_snprintf(struct map *map, char *bf, + size_t size, unsigned int width) +{ + if (map && map->dso) { + const char *dso_name = !verbose ? map->dso->short_name : + map->dso->long_name; + return repsep_snprintf(bf, size, "%-*s", width, dso_name); } - return strcmp(dso_name_l, dso_name_r); + return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); } static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, size_t size, unsigned int width) { - if (self->ms.map && self->ms.map->dso) { - const char *dso_name = !verbose ? self->ms.map->dso->short_name : - self->ms.map->dso->long_name; - return repsep_snprintf(bf, size, "%-*s", width, dso_name); + return _hist_entry__dso_snprintf(self->ms.map, bf, size, width); +} + +static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, + u64 ip, char level, char *bf, size_t size, + unsigned int width __used) +{ + size_t ret = 0; + + if (verbose) { + char o = map ? dso__symtab_origin(map->dso) : '!'; + ret += repsep_snprintf(bf, size, "%-#*llx %c ", + BITS_PER_LONG / 4, ip, o); } - return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); + ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); + if (sym) + ret += repsep_snprintf(bf + ret, size - ret, "%-*s", + width - ret, + sym->name); + else { + size_t len = BITS_PER_LONG / 4; + ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", + len, ip); + ret += repsep_snprintf(bf + ret, size - ret, "%-*s", + width - ret, ""); + } + + return ret; } + struct sort_entry sort_dso = { .se_header = "Shared Object", .se_cmp = sort__dso_cmp, @@ -144,8 +203,14 @@ struct sort_entry sort_dso = { .se_width_idx = HISTC_DSO, }; -/* --sort symbol */ +static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, + self->level, bf, size, width); +} +/* --sort symbol */ static int64_t sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -163,31 +228,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) ip_l = left->ms.sym->start; ip_r = right->ms.sym->start; - return (int64_t)(ip_r - ip_l); -} - -static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width __used) -{ - size_t ret = 0; - - if (verbose) { - char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; - ret += repsep_snprintf(bf, size, "%-#*llx %c ", - BITS_PER_LONG / 4, self->ip, o); - } - - if (!sort_dso.elide) - ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level); - - if (self->ms.sym) - ret += repsep_snprintf(bf + ret, size - ret, "%s", - self->ms.sym->name); - else - ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx", - BITS_PER_LONG / 4, self->ip); - - return ret; + return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r); } struct sort_entry sort_sym = { @@ -246,19 +287,155 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +static int64_t +sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return _sort__dso_cmp(left->branch_info->from.map, + right->branch_info->from.map); +} + +static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + return _hist_entry__dso_snprintf(self->branch_info->from.map, + bf, size, width); +} + +struct sort_entry sort_dso_from = { + .se_header = "Source Shared Object", + .se_cmp = sort__dso_from_cmp, + .se_snprintf = hist_entry__dso_from_snprintf, + .se_width_idx = HISTC_DSO_FROM, +}; + +static int64_t +sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return _sort__dso_cmp(left->branch_info->to.map, + right->branch_info->to.map); +} + +static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + return _hist_entry__dso_snprintf(self->branch_info->to.map, + bf, size, width); +} + +static int64_t +sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct addr_map_symbol *from_l = &left->branch_info->from; + struct addr_map_symbol *from_r = &right->branch_info->from; + + if (!from_l->sym && !from_r->sym) + return right->level - left->level; + + return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr, + from_r->addr); +} + +static int64_t +sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct addr_map_symbol *to_l = &left->branch_info->to; + struct addr_map_symbol *to_r = &right->branch_info->to; + + if (!to_l->sym && !to_r->sym) + return right->level - left->level; + + return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr); +} + +static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + struct addr_map_symbol *from = &self->branch_info->from; + return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, + self->level, bf, size, width); + +} + +static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + struct addr_map_symbol *to = &self->branch_info->to; + return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, + self->level, bf, size, width); + +} + +struct sort_entry sort_dso_to = { + .se_header = "Target Shared Object", + .se_cmp = sort__dso_to_cmp, + .se_snprintf = hist_entry__dso_to_snprintf, + .se_width_idx = HISTC_DSO_TO, +}; + +struct sort_entry sort_sym_from = { + .se_header = "Source Symbol", + .se_cmp = sort__sym_from_cmp, + .se_snprintf = hist_entry__sym_from_snprintf, + .se_width_idx = HISTC_SYMBOL_FROM, +}; + +struct sort_entry sort_sym_to = { + .se_header = "Target Symbol", + .se_cmp = sort__sym_to_cmp, + .se_snprintf = hist_entry__sym_to_snprintf, + .se_width_idx = HISTC_SYMBOL_TO, +}; + +static int64_t +sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right) +{ + const unsigned char mp = left->branch_info->flags.mispred != + right->branch_info->flags.mispred; + const unsigned char p = left->branch_info->flags.predicted != + right->branch_info->flags.predicted; + + return mp || p; +} + +static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width){ + static const char *out = "N/A"; + + if (self->branch_info->flags.predicted) + out = "N"; + else if (self->branch_info->flags.mispred) + out = "Y"; + + return repsep_snprintf(bf, size, "%-*s", width, out); +} + +struct sort_entry sort_mispredict = { + .se_header = "Branch Mispredicted", + .se_cmp = sort__mispredict_cmp, + .se_snprintf = hist_entry__mispredict_snprintf, + .se_width_idx = HISTC_MISPREDICT, +}; + struct sort_dimension { const char *name; struct sort_entry *entry; int taken; }; +#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } + static struct sort_dimension sort_dimensions[] = { - { .name = "pid", .entry = &sort_thread, }, - { .name = "comm", .entry = &sort_comm, }, - { .name = "dso", .entry = &sort_dso, }, - { .name = "symbol", .entry = &sort_sym, }, - { .name = "parent", .entry = &sort_parent, }, - { .name = "cpu", .entry = &sort_cpu, }, + DIM(SORT_PID, "pid", sort_thread), + DIM(SORT_COMM, "comm", sort_comm), + DIM(SORT_DSO, "dso", sort_dso), + DIM(SORT_DSO_FROM, "dso_from", sort_dso_from), + DIM(SORT_DSO_TO, "dso_to", sort_dso_to), + DIM(SORT_SYM, "symbol", sort_sym), + DIM(SORT_SYM_TO, "symbol_from", sort_sym_from), + DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to), + DIM(SORT_PARENT, "parent", sort_parent), + DIM(SORT_CPU, "cpu", sort_cpu), + DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), }; int sort_dimension__add(const char *tok) @@ -270,7 +447,6 @@ int sort_dimension__add(const char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sd->entry == &sort_parent) { int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); if (ret) { @@ -302,6 +478,16 @@ int sort_dimension__add(const char *tok) sort__first_dimension = SORT_PARENT; else if (!strcmp(sd->name, "cpu")) sort__first_dimension = SORT_CPU; + else if (!strcmp(sd->name, "symbol_from")) + sort__first_dimension = SORT_SYM_FROM; + else if (!strcmp(sd->name, "symbol_to")) + sort__first_dimension = SORT_SYM_TO; + else if (!strcmp(sd->name, "dso_from")) + sort__first_dimension = SORT_DSO_FROM; + else if (!strcmp(sd->name, "dso_to")) + sort__first_dimension = SORT_DSO_TO; + else if (!strcmp(sd->name, "mispredict")) + sort__first_dimension = SORT_MISPREDICT; } list_add_tail(&sd->entry->list, &hist_entry__sort_list); @@ -309,7 +495,6 @@ int sort_dimension__add(const char *tok) return 0; } - return -ESRCH; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 3f67ae39575..472aa5a63a5 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,11 +31,16 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern int sort__need_collapse; extern int sort__has_parent; +extern int sort__branch_mode; extern char *field_sep; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; extern struct sort_entry sort_sym; extern struct sort_entry sort_parent; +extern struct sort_entry sort_dso_from; +extern struct sort_entry sort_dso_to; +extern struct sort_entry sort_sym_from; +extern struct sort_entry sort_sym_to; extern enum sort_type sort__first_dimension; /** @@ -72,6 +77,7 @@ struct hist_entry { struct hist_entry *pair; struct rb_root sorted_chain; }; + struct branch_info *branch_info; struct callchain_root callchain[0]; }; @@ -82,6 +88,11 @@ enum sort_type { SORT_SYM, SORT_PARENT, SORT_CPU, + SORT_DSO_FROM, + SORT_DSO_TO, + SORT_SYM_FROM, + SORT_SYM_TO, + SORT_MISPREDICT, }; /* diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2a683d4fc91..ac49ef208a5 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -5,6 +5,7 @@ #include <stdbool.h> #include <stdint.h> #include "map.h" +#include "../perf.h" #include <linux/list.h> #include <linux/rbtree.h> #include <stdio.h> @@ -96,7 +97,11 @@ struct symbol_conf { *col_width_list_str; struct strlist *dso_list, *comm_list, - *sym_list; + *sym_list, + *dso_from_list, + *dso_to_list, + *sym_from_list, + *sym_to_list; const char *symfs; }; @@ -120,6 +125,19 @@ struct map_symbol { bool has_children; }; +struct addr_map_symbol { + struct map *map; + struct symbol *sym; + u64 addr; + u64 al_addr; +}; + +struct branch_info { + struct addr_map_symbol from; + struct addr_map_symbol to; + struct branch_flags flags; +}; + struct addr_location { struct thread *thread; struct map *map; diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index bfba0490c09..de8ece8bcce 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -805,8 +805,11 @@ static struct hist_browser *hist_browser__new(struct hists *hists) self->hists = hists; self->b.refresh = hist_browser__refresh; self->b.seek = ui_browser__hists_seek; - self->b.use_navkeypressed = true, - self->has_symbols = sort_sym.list.next != NULL; + self->b.use_navkeypressed = true; + if (sort__branch_mode == 1) + self->has_symbols = sort_sym_from.list.next != NULL; + else + self->has_symbols = sort_sym.list.next != NULL; } return self; @@ -853,6 +856,16 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, return printed; } +static inline void free_popup_options(char **options, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + free(options[i]); + options[i] = NULL; + } +} + static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, const char *helpline, const char *ev_name, bool left_exits, @@ -861,7 +874,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, { struct hists *self = &evsel->hists; struct hist_browser *browser = hist_browser__new(self); + struct branch_info *bi; struct pstack *fstack; + char *options[16]; + int nr_options = 0; int key = -1; if (browser == NULL) @@ -873,13 +889,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, ui_helpline__push(helpline); + memset(options, 0, sizeof(options)); + while (1) { const struct thread *thread = NULL; const struct dso *dso = NULL; - char *options[16]; - int nr_options = 0, choice = 0, i, + int choice = 0, annotate = -2, zoom_dso = -2, zoom_thread = -2, - browse_map = -2; + annotate_f = -2, annotate_t = -2, browse_map = -2; + + nr_options = 0; key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); @@ -887,7 +906,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, thread = hist_browser__selected_thread(browser); dso = browser->selection->map ? browser->selection->map->dso : NULL; } - switch (key) { case K_TAB: case K_UNTAB: @@ -902,7 +920,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (!browser->has_symbols) { ui_browser__warning(&browser->b, delay_secs * 2, "Annotation is only available for symbolic views, " - "include \"sym\" in --sort to use it."); + "include \"sym*\" in --sort to use it."); continue; } @@ -972,12 +990,34 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (!browser->has_symbols) goto add_exit_option; - if (browser->selection != NULL && - browser->selection->sym != NULL && - !browser->selection->map->dso->annotate_warned && - asprintf(&options[nr_options], "Annotate %s", - browser->selection->sym->name) > 0) - annotate = nr_options++; + if (sort__branch_mode == 1) { + bi = browser->he_selection->branch_info; + if (browser->selection != NULL && + bi && + bi->from.sym != NULL && + !bi->from.map->dso->annotate_warned && + asprintf(&options[nr_options], "Annotate %s", + bi->from.sym->name) > 0) + annotate_f = nr_options++; + + if (browser->selection != NULL && + bi && + bi->to.sym != NULL && + !bi->to.map->dso->annotate_warned && + (bi->to.sym != bi->from.sym || + bi->to.map->dso != bi->from.map->dso) && + asprintf(&options[nr_options], "Annotate %s", + bi->to.sym->name) > 0) + annotate_t = nr_options++; + } else { + + if (browser->selection != NULL && + browser->selection->sym != NULL && + !browser->selection->map->dso->annotate_warned && + asprintf(&options[nr_options], "Annotate %s", + browser->selection->sym->name) > 0) + annotate = nr_options++; + } if (thread != NULL && asprintf(&options[nr_options], "Zoom %s %s(%d) thread", @@ -998,25 +1038,39 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, browse_map = nr_options++; add_exit_option: options[nr_options++] = (char *)"Exit"; - +retry_popup_menu: choice = ui__popup_menu(nr_options, options); - for (i = 0; i < nr_options - 1; ++i) - free(options[i]); - if (choice == nr_options - 1) break; - if (choice == -1) + if (choice == -1) { + free_popup_options(options, nr_options - 1); continue; + } - if (choice == annotate) { + if (choice == annotate || choice == annotate_t || choice == annotate_f) { struct hist_entry *he; int err; do_annotate: he = hist_browser__selected_entry(browser); if (he == NULL) continue; + + /* + * we stash the branch_info symbol + map into the + * the ms so we don't have to rewrite all the annotation + * code to use branch_info. + * in branch mode, the ms struct is not used + */ + if (choice == annotate_f) { + he->ms.sym = he->branch_info->from.sym; + he->ms.map = he->branch_info->from.map; + } else if (choice == annotate_t) { + he->ms.sym = he->branch_info->to.sym; + he->ms.map = he->branch_info->to.map; + } + /* * Don't let this be freed, say, by hists__decay_entry. */ @@ -1024,9 +1078,18 @@ do_annotate: err = hist_entry__tui_annotate(he, evsel->idx, timer, arg, delay_secs); he->used = false; + /* + * offer option to annotate the other branch source or target + * (if they exists) when returning from annotate + */ + if ((err == 'q' || err == CTRL('c')) + && annotate_t != -2 && annotate_f != -2) + goto retry_popup_menu; + ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); if (err) ui_browser__handle_resize(&browser->b); + } else if (choice == browse_map) map__browse(browser->selection->map); else if (choice == zoom_dso) { @@ -1072,6 +1135,7 @@ out_free_stack: pstack__delete(fstack); out: hist_browser__delete(browser); + free_popup_options(options, nr_options - 1); return key; } |