summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPeter Collingbourne <pcc@google.com>2017-11-02 16:53:21 -0700
committerPeter Collingbourne <pcc@google.com>2017-11-08 14:15:02 -0800
commitc4d37a78cc8aeeb68ae09d37152114fac24c0cea (patch)
treed4f00fac3db75a0fda16db13eb56c85d218ee0b4 /src
parentd72e3be82c832601e1d8f6fba580d2edf8be13da (diff)
downloadglog-c4d37a78cc8aeeb68ae09d37152114fac24c0cea.tar.gz
glog-c4d37a78cc8aeeb68ae09d37152114fac24c0cea.tar.bz2
glog-c4d37a78cc8aeeb68ae09d37152114fac24c0cea.zip
Compute base addresses from program headers while reading /proc/self/maps.
We previously had logic to compute the base address from program headers as part of symbolization. The problem is that we need a correct base address earlier in order to adjust a PC into the image's address space, as these addresses can appear in unsymbolized output. There was previously an assumption that only the mapping that was lowest in the address space did not need to be adjusted. This assumption is not guaranteed (for example, the kernel may choose to map an ET_DYN lowest) and in fact turned out to be wrong in binaries linked with lld because the first mapping is read-only. The solution is to move the program header reading logic into the code that reads /proc/self/maps. There is a change in semantics for clients that install a callback using the InstallSymbolizeOpenObjectFileCallback function. Any such clients will need to return a correct base address from the callback by reading program headers using code similar to that in the function OpenObjectFileContainingPcAndGetStartAddress.
Diffstat (limited to 'src')
-rw-r--r--src/symbolize.cc111
1 files changed, 59 insertions, 52 deletions
diff --git a/src/symbolize.cc b/src/symbolize.cc
index 953f1db..98a754f 100644
--- a/src/symbolize.cc
+++ b/src/symbolize.cc
@@ -56,6 +56,8 @@
#if defined(HAVE_SYMBOLIZE)
+#include <string.h>
+
#include <limits>
#include "symbolize.h"
@@ -325,41 +327,17 @@ FindSymbol(uint64_t pc, const int fd, char *out, int out_size,
// both regular and dynamic symbol tables if necessary. On success,
// write the symbol name to "out" and return true. Otherwise, return
// false.
-static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
- char *out, int out_size,
- uint64_t map_base_address) {
+static bool GetSymbolFromObjectFile(const int fd,
+ uint64_t pc,
+ char* out,
+ int out_size,
+ uint64_t base_address) {
// Read the ELF header.
ElfW(Ehdr) elf_header;
if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
return false;
}
- uint64_t symbol_offset = 0;
- if (elf_header.e_type == ET_DYN) { // DSO needs offset adjustment.
- ElfW(Phdr) phdr;
- // We need to find the PT_LOAD segment corresponding to the read-execute
- // file mapping in order to correctly perform the offset adjustment.
- for (unsigned i = 0; i != elf_header.e_phnum; ++i) {
- if (!ReadFromOffsetExact(fd, &phdr, sizeof(phdr),
- elf_header.e_phoff + i * sizeof(phdr)))
- return false;
- if (phdr.p_type == PT_LOAD &&
- (phdr.p_flags & (PF_R | PF_X)) == (PF_R | PF_X)) {
- // Find the mapped address corresponding to virtual address zero. We do
- // this by first adding p_offset. This gives us the mapped address of
- // the start of the segment, or in other words the mapped address
- // corresponding to the virtual address of the segment. (Note that this
- // is distinct from the start address, as p_offset is not guaranteed to
- // be page aligned.) We then subtract p_vaddr, which takes us to virtual
- // address zero.
- symbol_offset = map_base_address + phdr.p_offset - phdr.p_vaddr;
- break;
- }
- }
- if (symbol_offset == 0)
- return false;
- }
-
ElfW(Shdr) symtab, strtab;
// Consult a regular symbol table first.
@@ -369,8 +347,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
symtab.sh_link * sizeof(symtab))) {
return false;
}
- if (FindSymbol(pc, fd, out, out_size, symbol_offset,
- &strtab, &symtab)) {
+ if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) {
return true; // Found the symbol in a regular symbol table.
}
}
@@ -382,8 +359,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
symtab.sh_link * sizeof(symtab))) {
return false;
}
- if (FindSymbol(pc, fd, out, out_size, symbol_offset,
- &strtab, &symtab)) {
+ if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) {
return true; // Found the symbol in a dynamic symbol table.
}
}
@@ -532,7 +508,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
int out_file_name_size) {
int object_fd;
- // Open /proc/self/maps.
int maps_fd;
NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY));
FileDescriptor wrapped_maps_fd(maps_fd);
@@ -540,6 +515,13 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
return -1;
}
+ int mem_fd;
+ NO_INTR(mem_fd = open("/proc/self/mem", O_RDONLY));
+ FileDescriptor wrapped_mem_fd(mem_fd);
+ if (wrapped_mem_fd.get() < 0) {
+ return -1;
+ }
+
// Iterate over maps and look for the map containing the pc. Then
// look into the symbol tables inside.
char buf[1024]; // Big enough for line of sane /proc/self/maps
@@ -575,11 +557,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
}
++cursor; // Skip ' '.
- // Check start and end addresses.
- if (!(start_address <= pc && pc < end_address)) {
- continue; // We skip this map. PC isn't in this map.
- }
-
// Read flags. Skip flags until we encounter a space or eol.
const char * const flags_start = cursor;
while (cursor < eol && *cursor != ' ') {
@@ -590,6 +567,49 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
return -1; // Malformed line.
}
+ // Determine the base address by reading ELF headers in process memory.
+ ElfW(Ehdr) ehdr;
+ // Skip non-readable maps.
+ if (flags_start[0] == 'r' &&
+ ReadFromOffsetExact(mem_fd, &ehdr, sizeof(ElfW(Ehdr)), start_address) &&
+ memcmp(ehdr.e_ident, ELFMAG, SELFMAG) == 0) {
+ switch (ehdr.e_type) {
+ case ET_EXEC:
+ base_address = 0;
+ break;
+ case ET_DYN:
+ // Find the segment containing file offset 0. This will correspond
+ // to the ELF header that we just read. Normally this will have
+ // virtual address 0, but this is not guaranteed. We must subtract
+ // the virtual address from the address where the ELF header was
+ // mapped to get the base address.
+ //
+ // If we fail to find a segment for file offset 0, use the address
+ // of the ELF header as the base address.
+ base_address = start_address;
+ for (unsigned i = 0; i != ehdr.e_phnum; ++i) {
+ ElfW(Phdr) phdr;
+ if (ReadFromOffsetExact(
+ mem_fd, &phdr, sizeof(phdr),
+ start_address + ehdr.e_phoff + i * sizeof(phdr)) &&
+ phdr.p_type == PT_LOAD && phdr.p_offset == 0) {
+ base_address = start_address - phdr.p_vaddr;
+ break;
+ }
+ }
+ break;
+ default:
+ // ET_REL or ET_CORE. These aren't directly executable, so they don't
+ // affect the base address.
+ break;
+ }
+ }
+
+ // Check start and end addresses.
+ if (!(start_address <= pc && pc < end_address)) {
+ continue; // We skip this map. PC isn't in this map.
+ }
+
// Check flags. We are only interested in "r*x" maps.
if (flags_start[0] != 'r' || flags_start[2] != 'x') {
continue; // We skip this map.
@@ -604,19 +624,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
}
++cursor; // Skip ' '.
- // Don't subtract 'start_address' from the first entry:
- // * If a binary is compiled w/o -pie, then the first entry in
- // process maps is likely the binary itself (all dynamic libs
- // are mapped higher in address space). For such a binary,
- // instruction offset in binary coincides with the actual
- // instruction address in virtual memory (as code section
- // is mapped to a fixed memory range).
- // * If a binary is compiled with -pie, all the modules are
- // mapped high at address space (in particular, higher than
- // shadow memory of the tool), so the module can't be the
- // first entry.
- base_address = ((num_maps == 1) ? 0U : start_address) - file_offset;
-
// Skip to file name. "cursor" now points to dev. We need to
// skip at least two spaces for dev and inode.
int num_spaces = 0;