diff options
-rw-r--r-- | Makefile.am | 13 | ||||
-rw-r--r-- | src/symbolize.cc | 111 |
2 files changed, 71 insertions, 53 deletions
diff --git a/Makefile.am b/Makefile.am index 3756465..d578a40 100644 --- a/Makefile.am +++ b/Makefile.am @@ -63,7 +63,10 @@ TEST_LIBS = $(GTEST_LIBS) $(GMOCK_LIBS) $(GFLAGS_LIBS) ## TESTS_ENVIRONMENT sets environment variables for when you run unittest, ## but it only seems to take effect for *binary* unittests (argh!) TESTS = -TESTS_ENVIRONMENT = +# Set a small stack size so that (at least on Linux) PIEs are mapped at a lower +# address than DSOs. This is used by symbolize_pie_unittest to check that we can +# successfully symbolize PIEs loaded at low addresses. +TESTS_ENVIRONMENT = ulimit -s 8192; check_SCRIPTS = # Every time you add a unittest to check_SCRIPTS, add it here too noinst_SCRIPTS = @@ -145,6 +148,14 @@ symbolize_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(TEST_CFLAGS) symbolize_unittest_LDFLAGS = $(PTHREAD_CFLAGS) symbolize_unittest_LDADD = libglog.la $(COMMON_LIBS) $(TEST_LIBS) +TESTS += symbolize_pie_unittest +symbolize_pie_unittest_SOURCES = $(gloginclude_HEADERS) \ + src/symbolize_unittest.cc +nodist_symbolize_pie_unittest_SOURCES = $(nodist_gloginclude_HEADERS) +symbolize_pie_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(TEST_CFLAGS) -fPIE +symbolize_pie_unittest_LDFLAGS = $(PTHREAD_CFLAGS) -pie +symbolize_pie_unittest_LDADD = libglog.la $(COMMON_LIBS) $(TEST_LIBS) + TESTS += stl_logging_unittest stl_logging_unittest_SOURCES = $(gloginclude_HEADERS) \ src/stl_logging_unittest.cc diff --git a/src/symbolize.cc b/src/symbolize.cc index 953f1db..98a754f 100644 --- a/src/symbolize.cc +++ b/src/symbolize.cc @@ -56,6 +56,8 @@ #if defined(HAVE_SYMBOLIZE) +#include <string.h> + #include <limits> #include "symbolize.h" @@ -325,41 +327,17 @@ FindSymbol(uint64_t pc, const int fd, char *out, int out_size, // both regular and dynamic symbol tables if necessary. On success, // write the symbol name to "out" and return true. Otherwise, return // false. -static bool GetSymbolFromObjectFile(const int fd, uint64_t pc, - char *out, int out_size, - uint64_t map_base_address) { +static bool GetSymbolFromObjectFile(const int fd, + uint64_t pc, + char* out, + int out_size, + uint64_t base_address) { // Read the ELF header. ElfW(Ehdr) elf_header; if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { return false; } - uint64_t symbol_offset = 0; - if (elf_header.e_type == ET_DYN) { // DSO needs offset adjustment. - ElfW(Phdr) phdr; - // We need to find the PT_LOAD segment corresponding to the read-execute - // file mapping in order to correctly perform the offset adjustment. - for (unsigned i = 0; i != elf_header.e_phnum; ++i) { - if (!ReadFromOffsetExact(fd, &phdr, sizeof(phdr), - elf_header.e_phoff + i * sizeof(phdr))) - return false; - if (phdr.p_type == PT_LOAD && - (phdr.p_flags & (PF_R | PF_X)) == (PF_R | PF_X)) { - // Find the mapped address corresponding to virtual address zero. We do - // this by first adding p_offset. This gives us the mapped address of - // the start of the segment, or in other words the mapped address - // corresponding to the virtual address of the segment. (Note that this - // is distinct from the start address, as p_offset is not guaranteed to - // be page aligned.) We then subtract p_vaddr, which takes us to virtual - // address zero. - symbol_offset = map_base_address + phdr.p_offset - phdr.p_vaddr; - break; - } - } - if (symbol_offset == 0) - return false; - } - ElfW(Shdr) symtab, strtab; // Consult a regular symbol table first. @@ -369,8 +347,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t pc, symtab.sh_link * sizeof(symtab))) { return false; } - if (FindSymbol(pc, fd, out, out_size, symbol_offset, - &strtab, &symtab)) { + if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) { return true; // Found the symbol in a regular symbol table. } } @@ -382,8 +359,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t pc, symtab.sh_link * sizeof(symtab))) { return false; } - if (FindSymbol(pc, fd, out, out_size, symbol_offset, - &strtab, &symtab)) { + if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) { return true; // Found the symbol in a dynamic symbol table. } } @@ -532,7 +508,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, int out_file_name_size) { int object_fd; - // Open /proc/self/maps. int maps_fd; NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY)); FileDescriptor wrapped_maps_fd(maps_fd); @@ -540,6 +515,13 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, return -1; } + int mem_fd; + NO_INTR(mem_fd = open("/proc/self/mem", O_RDONLY)); + FileDescriptor wrapped_mem_fd(mem_fd); + if (wrapped_mem_fd.get() < 0) { + return -1; + } + // Iterate over maps and look for the map containing the pc. Then // look into the symbol tables inside. char buf[1024]; // Big enough for line of sane /proc/self/maps @@ -575,11 +557,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, } ++cursor; // Skip ' '. - // Check start and end addresses. - if (!(start_address <= pc && pc < end_address)) { - continue; // We skip this map. PC isn't in this map. - } - // Read flags. Skip flags until we encounter a space or eol. const char * const flags_start = cursor; while (cursor < eol && *cursor != ' ') { @@ -590,6 +567,49 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, return -1; // Malformed line. } + // Determine the base address by reading ELF headers in process memory. + ElfW(Ehdr) ehdr; + // Skip non-readable maps. + if (flags_start[0] == 'r' && + ReadFromOffsetExact(mem_fd, &ehdr, sizeof(ElfW(Ehdr)), start_address) && + memcmp(ehdr.e_ident, ELFMAG, SELFMAG) == 0) { + switch (ehdr.e_type) { + case ET_EXEC: + base_address = 0; + break; + case ET_DYN: + // Find the segment containing file offset 0. This will correspond + // to the ELF header that we just read. Normally this will have + // virtual address 0, but this is not guaranteed. We must subtract + // the virtual address from the address where the ELF header was + // mapped to get the base address. + // + // If we fail to find a segment for file offset 0, use the address + // of the ELF header as the base address. + base_address = start_address; + for (unsigned i = 0; i != ehdr.e_phnum; ++i) { + ElfW(Phdr) phdr; + if (ReadFromOffsetExact( + mem_fd, &phdr, sizeof(phdr), + start_address + ehdr.e_phoff + i * sizeof(phdr)) && + phdr.p_type == PT_LOAD && phdr.p_offset == 0) { + base_address = start_address - phdr.p_vaddr; + break; + } + } + break; + default: + // ET_REL or ET_CORE. These aren't directly executable, so they don't + // affect the base address. + break; + } + } + + // Check start and end addresses. + if (!(start_address <= pc && pc < end_address)) { + continue; // We skip this map. PC isn't in this map. + } + // Check flags. We are only interested in "r*x" maps. if (flags_start[0] != 'r' || flags_start[2] != 'x') { continue; // We skip this map. @@ -604,19 +624,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, } ++cursor; // Skip ' '. - // Don't subtract 'start_address' from the first entry: - // * If a binary is compiled w/o -pie, then the first entry in - // process maps is likely the binary itself (all dynamic libs - // are mapped higher in address space). For such a binary, - // instruction offset in binary coincides with the actual - // instruction address in virtual memory (as code section - // is mapped to a fixed memory range). - // * If a binary is compiled with -pie, all the modules are - // mapped high at address space (in particular, higher than - // shadow memory of the tool), so the module can't be the - // first entry. - base_address = ((num_maps == 1) ? 0U : start_address) - file_offset; - // Skip to file name. "cursor" now points to dev. We need to // skip at least two spaces for dev and inode. int num_spaces = 0; |