diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 7aae0432..f387edba 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -45,130 +45,34 @@ #include #include #include -#include +#include #include #include -#include #include -#include +#include +#include #include "common/linux/dump_symbols.h" #include "common/linux/file_id.h" #include "common/linux/guid_creator.h" +#include "common/linux/module.h" +#include "common/linux/stabs_reader.h" +#include "processor/scoped_ptr.h" // This namespace contains helper functions. namespace { -struct SourceFileInfo; - -// Infomation of a line. -struct LineInfo { - // Offset from start of the function. - // Load from stab symbol. - ElfW(Off) rva_to_func; - // Offset from base of the loading binary. - ElfW(Off) rva_to_base; - // Size of the line. - // It is the difference of the starting address of the line and starting - // address of the next N_SLINE, N_FUN or N_SO. - uint32_t size; - // Line number. - uint32_t line_num; - // The source file this line belongs to. - SourceFileInfo *file; -}; - -typedef std::list LineInfoList; - -// Information of a function. -struct FuncInfo { - // Name of the function. - std::string name; - // Offset from the base of the loading address. - ElfW(Off) rva_to_base; - // Virtual address of the function. - // Load from stab symbol. - ElfW(Addr) addr; - // Size of the function. - // It is the difference of the starting address of the function and starting - // address of the next N_FUN or N_SO. - uint32_t size; - // Total size of stack parameters. - uint32_t stack_param_size; - // Line information array. - LineInfoList line_info; -}; - -typedef std::list FuncInfoList; - -// Information of a source file. -struct SourceFileInfo { - // Name of the source file. - const char *name; - // Starting address of the source file. - ElfW(Addr) addr; - // Id of the source file. - int source_id; - // Functions information. - FuncInfoList func_info; -}; - -// A simple std::list of pointers to SourceFileInfo structures, that -// owns the structures pointed to: destroying the list destroys them, -// as well. -class SourceFileInfoList : public std::list { - public: - ~SourceFileInfoList() { - for (iterator it = this->begin(); it != this->end(); it++) - delete *it; - } -}; - -typedef std::map NameToFileMap; - -// Information of a symbol table. -// This is the root of all types of symbol. -struct SymbolInfo { - // The main files used in this module. This does not include header - // files; it includes only files that were provided as the primary - // source file for the compilation unit. In STABS, these are files - // named in 'N_SO' entries. - SourceFileInfoList main_files; - - // Map from file names to source file structures. Note that this - // map's keys are compared as pointers, not strings, so if the same - // name appears at two different addresses in stabstr, the map will - // treat that as two different names. If the linker didn't unify - // names in .stabstr (which it does), this would result in duplicate - // FILE lines, which is benign. - NameToFileMap name_to_file; - - // An array of some addresses at which a file boundary occurs. - // - // The STABS information describing a compilation unit gives the - // unit's start address, but not its ending address or size. Those - // must be inferred by finding the start address of the next file. - // For the last compilation unit, or when one compilation unit ends - // before the next one starts, STABS includes an N_SO entry whose - // filename is the empty string; such an entry's address serves - // simply to mark the end of the preceding compilation unit. Rather - // than create FuncInfoList for such entries, we record their - // addresses here. These are not necessarily sorted. - std::vector file_boundaries; - - // The current source file, for line number information. This is - // persistent across functions. - SourceFileInfo *current_source_file; -}; +using google_breakpad::Module; +using std::vector; // Stab section name. static const char *kStabName = ".stab"; // Demangle using abi call. // Older GCC may not support it. -static std::string Demangle(const char *mangled) { +static std::string Demangle(const std::string &mangled) { int status = 0; - char *demangled = abi::__cxa_demangle(mangled, NULL, NULL, &status); + char *demangled = abi::__cxa_demangle(mangled.c_str(), NULL, NULL, &status); if (status == 0 && demangled != NULL) { std::string str(demangled); free(demangled); @@ -228,278 +132,212 @@ static const ElfW(Shdr) *FindSectionByName(const char *name, return NULL; } -// Return the SourceFileInfo for the file named NAME in SYMBOLS, as -// recorden in the name_to_file map. If none exists, create a new -// one. -// -// If the file is a main file, it is the caller's responsibility to -// set its address and add it to the list of main files. -// -// When creating a new file, this function does not make a copy of -// NAME; NAME must stay alive for as long as the symbol table does. -static SourceFileInfo *FindSourceFileInfo(SymbolInfo *symbols, - const char *name) { - SourceFileInfo **map_entry = &symbols->name_to_file[name]; - SourceFileInfo *file; - if (*map_entry) - file = *map_entry; - else { - file = new SourceFileInfo; - file->name = name; - file->source_id = -1; - file->addr = 0; - *map_entry = file; - } - return file; -} +// Our handler class for STABS data. +class DumpStabsHandler: public google_breakpad::StabsHandler { + public: + DumpStabsHandler(Module *module) : + module_(module), + comp_unit_base_address_(0), + current_function_(NULL), + current_source_file_(NULL), + current_source_file_name_(NULL) { } -static int LoadLineInfo(struct nlist *list, - struct nlist *list_end, - SymbolInfo *symbols, - struct SourceFileInfo *source_file_info, - struct FuncInfo *func_info, - const ElfW(Shdr) *stabstr_section) { - struct nlist *cur_list = list; - // The name of the file any subsequent lines would belong to. - const char *last_source_name = symbols->current_source_file->name; - do { - // Skip non line information. - while (cur_list < list_end && cur_list->n_type != N_SLINE) { - // Only exit when got another function, or source file. - if (cur_list->n_type == N_FUN || cur_list->n_type == N_SO) - return cur_list - list; - // N_SOL means source lines following it will be from another - // source file. But don't actually create a file entry yet; - // wait until we see executable code attributed to the file. - if (cur_list->n_type == N_SOL - && cur_list->n_un.n_strx > 0) - last_source_name = reinterpret_cast(cur_list->n_un.n_strx - + stabstr_section->sh_offset); - ++cur_list; - } - struct LineInfo line; - while (cur_list < list_end && cur_list->n_type == N_SLINE) { - // If this line is attributed to a new file, create its entry now. - if (last_source_name != symbols->current_source_file->name) { - symbols->current_source_file - = FindSourceFileInfo(symbols, last_source_name); - } - line.file = symbols->current_source_file; - line.rva_to_func = cur_list->n_value; - // n_desc is a signed short - line.line_num = (unsigned short)cur_list->n_desc; - // We will compute these later. For now, pacify compiler warnings. - line.size = 0; - line.rva_to_base = 0; - func_info->line_info.push_back(line); - ++cur_list; - } - } while (list < list_end); + bool StartCompilationUnit(const char *name, uint64_t address, + const char *build_directory); + bool EndCompilationUnit(uint64_t address); + bool StartFunction(const std::string &name, uint64_t address); + bool EndFunction(uint64_t address); + bool Line(uint64_t address, const char *name, int number); - return cur_list - list; -} + // Do any final processing necessary to make module_ contain all the + // data provided by the STABS reader. + // + // Because STABS does not provide reliable size information for + // functions and lines, we need to make a pass over the data after + // processing all the STABS to compute those sizes. We take care of + // that here. + void Finalize(); -static int LoadFuncSymbols(struct nlist *list, - struct nlist *list_end, - SymbolInfo *symbols, - struct SourceFileInfo *source_file_info, - const ElfW(Shdr) *stabstr_section) { - struct nlist *cur_list = list; - assert(cur_list->n_type == N_SO); - ++cur_list; - source_file_info->func_info.clear(); - while (cur_list < list_end) { - // Go until the function symbol. - while (cur_list < list_end && cur_list->n_type != N_FUN) { - if (cur_list->n_type == N_SO) { - return cur_list - list; - } - ++cur_list; - continue; - } - if (cur_list->n_type == N_FUN) { - struct FuncInfo func_info; - // The STABS data for an N_FUN entry is the function's (mangled) - // name, followed by a colon, followed by type information. We - // want to retain the name only. - const char *stabs_name - = reinterpret_cast(cur_list->n_un.n_strx + - stabstr_section->sh_offset); - const char *name_end = strchr(stabs_name, ':'); - if (! name_end) - name_end = stabs_name + strlen(stabs_name); - func_info.name = std::string(stabs_name, name_end - stabs_name); - func_info.addr = cur_list->n_value; - func_info.rva_to_base = 0; - func_info.size = 0; - func_info.stack_param_size = 0; - cur_list++; + private: - // Line info. - cur_list += LoadLineInfo(cur_list, - list_end, - symbols, - source_file_info, - &func_info, - stabstr_section); + // An arbitrary, but very large, size to use for functions whose + // size we can't compute properly. + static const uint64_t kFallbackSize = 0x10000000; - // Functions in this module should have address bigger than the module - // startring address. - // There maybe a lot of duplicated entry for a function in the symbol, - // only one of them can met this. - if (func_info.addr >= source_file_info->addr) { - source_file_info->func_info.push_back(func_info); - } - } - } - return cur_list - list; -} + // The module we're contributing debugging info to. + Module *module_; -// Compute size and rva information based on symbols loaded from stab section. -static bool ComputeSizeAndRVA(ElfW(Addr) loading_addr, - struct SymbolInfo *symbols) { - SourceFileInfoList::iterator file_it; - FuncInfoList::iterator func_it; - LineInfoList::iterator line_it; + // The functions we've generated so far. We don't add these to + // module_ as we parse them. Instead, we wait until we've computed + // their ending address, and their lines' ending addresses. + // + // We could just stick them in module_ from the outset, but if + // module_ already contains data gathered from other debugging + // formats, that would complicate the size computation. + vector functions_; - // A table of all the addresses at which files and functions start - // or end. We build this from the file boundary list and our lists - // of files and functions, sort it, and then use it to find the ends - // of functions and source lines for which we have no size - // information. - std::vector boundaries = symbols->file_boundaries; - for (file_it = symbols->main_files.begin(); - file_it != symbols->main_files.end(); file_it++) { - boundaries.push_back((*file_it)->addr); - for (func_it = (*file_it)->func_info.begin(); - func_it != (*file_it)->func_info.end(); func_it++) - boundaries.push_back(func_it->addr); - } - std::sort(boundaries.begin(), boundaries.end()); + // Boundary addresses. STABS doesn't necessarily supply sizes for + // functions and lines, so we need to compute them ourselves by + // finding the next object. + vector boundaries_; - int no_next_addr_count = 0; - for (file_it = symbols->main_files.begin(); - file_it != symbols->main_files.end(); file_it++) { - for (func_it = (*file_it)->func_info.begin(); - func_it != (*file_it)->func_info.end(); func_it++) { - struct FuncInfo &func_info = *func_it; - assert(func_info.addr >= loading_addr); - func_info.rva_to_base = func_info.addr - loading_addr; - func_info.size = 0; - std::vector::iterator boundary - = std::upper_bound(boundaries.begin(), boundaries.end(), - func_info.addr); - ElfW(Addr) next_addr = (boundary == boundaries.end()) ? 0 : *boundary; - // I've noticed functions with an address bigger than any other functions - // and source files modules, this is probably the last function in the - // module, due to limitions of Linux stab symbol, it is impossible to get - // the exact size of this kind of function, thus we give it a default - // very big value. This should be safe since this is the last function. - // But it is a ugly hack..... - // The following code can reproduce the case: - // template - // void Foo(T value) { - // } - // - // int main(void) { - // Foo(10); - // Foo(std::string("hello")); - // return 0; - // } - // TODO(liuli): Find a better solution. - static const int kDefaultSize = 0x10000000; - if (next_addr != 0) { - func_info.size = next_addr - func_info.addr; - } else { - if (no_next_addr_count > 1) { - fprintf(stderr, "Got more than one function without the following "); - fprintf(stderr, "symbol. Ignore this function.\n"); - fprintf(stderr, "The dumped symbol may not correct.\n"); - assert(!"This should not happen!\n"); - func_info.size = 0; - continue; - } + // The base address of the current compilation unit. We use this to + // recognize functions we should omit from the symbol file. (If you + // know the details of why we omit these, please patch this + // comment.) + Module::Address comp_unit_base_address_; - no_next_addr_count++; - func_info.size = kDefaultSize; - } - // Compute line size. - for (line_it = func_info.line_info.begin(); - line_it != func_info.line_info.end(); line_it++) { - struct LineInfo &line_info = *line_it; - LineInfoList::iterator next_line_it = line_it; - next_line_it++; - line_info.size = 0; - if (next_line_it != func_info.line_info.end()) { - line_info.size = - next_line_it->rva_to_func - line_info.rva_to_func; - } else { - // The last line in the function. - // If we can find a function or source file symbol immediately - // following the line, we can get the size of the line by computing - // the difference of the next address to the starting address of this - // line. - // Otherwise, we need to set a default big enough value. This occurs - // mostly because the this function is the last one in the module. - if (next_addr != 0) { - ElfW(Off) next_addr_offset = next_addr - func_info.addr; - line_info.size = next_addr_offset - line_info.rva_to_func; - } else { - line_info.size = kDefaultSize; - } - } - line_info.rva_to_base = line_info.rva_to_func + func_info.rva_to_base; - } // for each line. - } // for each function. - } // for each source file. + // The function we're currently contributing lines to. + Module::Function *current_function_; + + // The last Module::File we got a line number in. + Module::File *current_source_file_; + + // The pointer in the .stabstr section of the name that + // current_source_file_ is built from. This allows us to quickly + // recognize when the current line is in the same file as the + // previous one (which it usually is). + const char *current_source_file_name_; +}; + +bool DumpStabsHandler::StartCompilationUnit(const char *name, uint64_t address, + const char *build_directory) { + assert(! comp_unit_base_address_); + current_source_file_name_ = name; + current_source_file_ = module_->FindFile(name); + comp_unit_base_address_ = address; + boundaries_.push_back(static_cast(address)); return true; } +bool DumpStabsHandler::EndCompilationUnit(uint64_t address) { + assert(comp_unit_base_address_); + comp_unit_base_address_ = 0; + current_source_file_ = NULL; + current_source_file_name_ = NULL; + if (address) + boundaries_.push_back(static_cast(address)); + return true; +} + +bool DumpStabsHandler::StartFunction(const std::string &name, + uint64_t address) { + assert(! current_function_); + Module::Function *f = new Module::Function; + f->name_ = Demangle(name); + f->address_ = address; + f->size_ = 0; // We compute this in DumpStabsHandler::Finalize(). + f->parameter_size_ = 0; // We don't provide this information. + current_function_ = f; + boundaries_.push_back(static_cast(address)); + return true; +} + +bool DumpStabsHandler::EndFunction(uint64_t address) { + assert(current_function_); + // Functions in this compilation unit should have address bigger + // than the compilation unit's starting address. There may be a lot + // of duplicated entries for functions in the STABS data; only one + // entry can meet this requirement. + // + // (I don't really understand the above comment; just bringing it + // along from the previous code, and leaving the behaivor unchanged. + // If you know the whole story, please patch this comment. --jimb) + if (current_function_->address_ >= comp_unit_base_address_) + functions_.push_back(current_function_); + else + delete current_function_; + current_function_ = NULL; + if (address) + boundaries_.push_back(static_cast(address)); + return true; +} + +bool DumpStabsHandler::Line(uint64_t address, const char *name, int number) { + assert(current_function_); + assert(current_source_file_); + if (name != current_source_file_name_) { + current_source_file_ = module_->FindFile(name); + current_source_file_name_ = name; + } + Module::Line line; + line.address_ = address; + line.size_ = 0; // We compute this in DumpStabsHandler::Finalize(). + line.file_ = current_source_file_; + line.number_ = number; + current_function_->lines_.push_back(line); + return true; +} + +void DumpStabsHandler::Finalize() { + // Sort our boundary list, so we can search it quickly. + sort(boundaries_.begin(), boundaries_.end()); + // Sort all functions by address, just for neatness. + sort(functions_.begin(), functions_.end(), + Module::Function::CompareByAddress); + for (vector::iterator func_it = functions_.begin(); + func_it != functions_.end(); + func_it++) { + Module::Function *f = *func_it; + // Compute the function f's size. + vector::iterator boundary + = std::upper_bound(boundaries_.begin(), boundaries_.end(), f->address_); + if (boundary != boundaries_.end()) + f->size_ = *boundary - f->address_; + else + // If this is the last function in the module, and the STABS + // reader was unable to give us its ending address, then assign + // it a bogus, very large value. This will happen at most once + // per module: since we've added all functions' addresses to the + // boundary table, only one can be the last. + f->size_ = kFallbackSize; + + // Compute sizes for each of the function f's lines --- if it has any. + if (! f->lines_.empty()) { + stable_sort(f->lines_.begin(), f->lines_.end(), + Module::Line::CompareByAddress); + vector::iterator last_line = f->lines_.end() - 1; + for (vector::iterator line_it = f->lines_.begin(); + line_it != last_line; line_it++) + line_it[0].size_ = line_it[1].address_ - line_it[0].address_; + // Compute the size of the last line from f's end address. + last_line->size_ = (f->address_ + f->size_) - last_line->address_; + } + } + // Now that everything has a size, add our functions to the module, and + // dispose of our private list. + module_->AddFunctions(functions_.begin(), functions_.end()); + functions_.clear(); +} + static bool LoadSymbols(const ElfW(Shdr) *stab_section, const ElfW(Shdr) *stabstr_section, - ElfW(Addr) loading_addr, - struct SymbolInfo *symbols) { + Module *module) { if (stab_section == NULL || stabstr_section == NULL) return false; - struct nlist *lists = - reinterpret_cast(stab_section->sh_offset); - int nstab = stab_section->sh_size / sizeof(struct nlist); - // First pass, load all symbols from the object file. - for (int i = 0; i < nstab;) { - int step = 1; - struct nlist *cur_list = lists + i; - if (cur_list->n_type == N_SO) { - if (cur_list->n_un.n_strx) { - const char *name = reinterpret_cast(cur_list->n_un.n_strx - + stabstr_section->sh_offset); - struct SourceFileInfo *source_file_info - = FindSourceFileInfo(symbols, name); - // Add it to the list; use ADDR to tell whether we've already done so. - if (! source_file_info->addr) - symbols->main_files.push_back(source_file_info); - source_file_info->addr = cur_list->n_value; - symbols->current_source_file = source_file_info; - step = LoadFuncSymbols(cur_list, lists + nstab, symbols, - source_file_info, stabstr_section); - } else { - // N_SO entries with no name mark file boundary addresses. - symbols->file_boundaries.push_back(cur_list->n_value); - } - } - i += step; - } - - // Second pass, compute the size of functions and lines. - return ComputeSizeAndRVA(loading_addr, symbols); + // A callback object to handle data from the STABS reader. + DumpStabsHandler handler(module); + // Find the addresses of the STABS data, and create a STABS reader object. + uint8_t *stabs = reinterpret_cast(stab_section->sh_offset); + uint8_t *stabstr = reinterpret_cast(stabstr_section->sh_offset); + google_breakpad::StabsReader reader(stabs, stab_section->sh_size, + stabstr, stabstr_section->sh_size, + &handler); + // Read the STABS data, and do post-processing. + if (! reader.Process()) + return false; + handler.Finalize(); + return true; } -static bool LoadSymbols(ElfW(Ehdr) *elf_header, struct SymbolInfo *symbols) { +static bool LoadSymbols(ElfW(Ehdr) *elf_header, Module *module) { // Translate all offsets in section headers into address. FixAddress(elf_header); ElfW(Addr) loading_addr = GetLoadingAddress( reinterpret_cast(elf_header->e_phoff), elf_header->e_phnum); + module->SetLoadAddress(loading_addr); const ElfW(Shdr) *sections = reinterpret_cast(elf_header->e_shoff); @@ -513,153 +351,7 @@ static bool LoadSymbols(ElfW(Ehdr) *elf_header, struct SymbolInfo *symbols) { const ElfW(Shdr) *stabstr_section = stab_section->sh_link + sections; // Load symbols. - return LoadSymbols(stab_section, stabstr_section, loading_addr, symbols); -} - -static bool WriteModuleInfo(FILE *file, - ElfW(Half) arch, - const std::string &obj_file) { - const char *arch_name = NULL; - if (arch == EM_386) - arch_name = "x86"; - else if (arch == EM_X86_64) - arch_name = "x86_64"; - else - return false; - - uint8_t identifier[google_breakpad::kMDGUIDSize]; - google_breakpad::FileID file_id(obj_file.c_str()); - if (file_id.ElfFileIdentifier(identifier)) { - char identifier_str[40]; - file_id.ConvertIdentifierToString(identifier, - identifier_str, sizeof(identifier_str)); - char id_no_dash[40]; - int id_no_dash_len = 0; - memset(id_no_dash, 0, sizeof(id_no_dash)); - for (int i = 0; identifier_str[i] != '\0'; ++i) - if (identifier_str[i] != '-') - id_no_dash[id_no_dash_len++] = identifier_str[i]; - // Add an extra "0" by the end. - id_no_dash[id_no_dash_len++] = '0'; - std::string filename = obj_file; - size_t slash_pos = obj_file.find_last_of("/"); - if (slash_pos != std::string::npos) - filename = obj_file.substr(slash_pos + 1); - return 0 <= fprintf(file, "MODULE Linux %s %s %s\n", arch_name, - id_no_dash, filename.c_str()); - } - return false; -} - -// Set *INCLUDED_FILES to the list of included files in SYMBOLS, -// ordered appropriately for output. Included files should appear in -// the order in which they are first referenced by source line info. -// Assign these files source id numbers starting with NEXT_SOURCE_ID. -// -// Note that the name_to_file map may contain #included files that are -// unreferenced; these are the result of LoadFuncSymbols omitting -// functions from the list whose addresses fall outside the address -// range of the file that contains them. -static void CollectIncludedFiles(const struct SymbolInfo &symbols, - std::vector *included_files, - int next_source_id) { - for (SourceFileInfoList::const_iterator file_it = symbols.main_files.begin(); - file_it != symbols.main_files.end(); file_it++) { - for (FuncInfoList::const_iterator func_it = (*file_it)->func_info.begin(); - func_it != (*file_it)->func_info.end(); func_it++) { - for (LineInfoList::const_iterator line_it = func_it->line_info.begin(); - line_it != func_it->line_info.end(); line_it++) { - SourceFileInfo *file = line_it->file; - if (file->source_id == -1) { - file->source_id = next_source_id++; - // Here we use the source id as a mark, ensuring that each - // file appears in the list only once. - included_files->push_back(file); - } - } - } - } -} - -// Write 'FILE' lines for all source files in SYMBOLS to FILE. We -// assign source id numbers to files here. -static bool WriteSourceFileInfo(FILE *file, struct SymbolInfo &symbols) { - int next_source_id = 0; - // Assign source id numbers to main files, and write them out to the file. - for (SourceFileInfoList::iterator file_it = symbols.main_files.begin(); - file_it != symbols.main_files.end(); file_it++) { - SourceFileInfo *file_info = *file_it; - assert(file_info->addr); - // We only output 'FILE' lines for main files if their names - // contain '.'. The extensionless C++ header files are #included, - // not main files, so it wouldn't affect them. If you know the - // story, please patch this comment. - if (strchr(file_info->name, '.')) { - file_info->source_id = next_source_id++; - if (0 > fprintf(file, "FILE %d %s\n", - file_info->source_id, file_info->name)) - return false; - } - } - // Compute the list of included files, and write them out. - // Can't use SourceFileInfoList here, because that owns the files it - // points to. - std::vector included_files; - std::vector::const_iterator file_it; - CollectIncludedFiles(symbols, &included_files, next_source_id); - for (file_it = included_files.begin(); file_it != included_files.end(); - file_it++) { - if (0 > fprintf(file, "FILE %d %s\n", - (*file_it)->source_id, (*file_it)->name)) - return false; - } - return true; -} - -static bool WriteOneFunction(FILE *file, - const struct FuncInfo &func_info) { - std::string func_name = Demangle(func_info.name.c_str()); - - if (func_info.size <= 0) - return true; - - if (0 <= fprintf(file, "FUNC %lx %lx %d %s\n", - (unsigned long) func_info.rva_to_base, - (unsigned long) func_info.size, - func_info.stack_param_size, - func_name.c_str())) { - for (LineInfoList::const_iterator it = func_info.line_info.begin(); - it != func_info.line_info.end(); it++) { - const struct LineInfo &line_info = *it; - if (0 > fprintf(file, "%lx %lx %d %d\n", - (unsigned long) line_info.rva_to_base, - (unsigned long) line_info.size, - line_info.line_num, - line_info.file->source_id)) - return false; - } - return true; - } - return false; -} - -static bool WriteFunctionInfo(FILE *file, const struct SymbolInfo &symbols) { - for (SourceFileInfoList::const_iterator it = symbols.main_files.begin(); - it != symbols.main_files.end(); it++) { - const struct SourceFileInfo &file_info = **it; - for (FuncInfoList::const_iterator fiIt = file_info.func_info.begin(); - fiIt != file_info.func_info.end(); fiIt++) { - const struct FuncInfo &func_info = *fiIt; - if (!WriteOneFunction(file, func_info)) - return false; - } - } - return true; -} - -static bool DumpStabSymbols(FILE *file, struct SymbolInfo &symbols) { - return WriteSourceFileInfo(file, symbols) && - WriteFunctionInfo(file, symbols); + return LoadSymbols(stab_section, stabstr_section, module); } // @@ -714,6 +406,48 @@ class MmapWrapper { size_t size_; }; +// Return the breakpad symbol file identifier for the architecture of +// ELF_HEADER. +const char *ElfArchitecture(const ElfW(Ehdr) *elf_header) { + ElfW(Half) arch = elf_header->e_machine; + if (arch == EM_386) + return "x86"; + else if (arch == EM_X86_64) + return "x86_64"; + else + return NULL; +} + +// Format the Elf file identifier in IDENTIFIER as a UUID with the +// dashes removed. +std::string FormatIdentifier(unsigned char identifier[16]) { + char identifier_str[40]; + google_breakpad::FileID::ConvertIdentifierToString( + identifier, + identifier_str, + sizeof(identifier_str)); + std::string id_no_dash; + for (int i = 0; identifier_str[i] != '\0'; ++i) + if (identifier_str[i] != '-') + id_no_dash += identifier_str[i]; + // Add an extra "0" by the end. PDB files on Windows have an 'age' + // number appended to the end of the file identifier; this isn't + // really used or necessary on other platforms, but let's preserve + // the pattern. + id_no_dash += '0'; + return id_no_dash; +} + +// Return the non-directory portion of FILENAME: the portion after the +// last slash, or the whole filename if there are no slashes. +std::string BaseFileName(const std::string &filename) { + // Lots of copies! basename's behavior is less than ideal. + char *c_filename = strdup(filename.c_str()); + std::string base = basename(c_filename); + free(c_filename); + return base; +} + } // namespace namespace google_breakpad { @@ -735,16 +469,27 @@ bool DumpSymbols::WriteSymbolFile(const std::string &obj_file, ElfW(Ehdr) *elf_header = reinterpret_cast(obj_base); if (!IsValidElf(elf_header)) return false; - struct SymbolInfo symbols; - if (!LoadSymbols(elf_header, &symbols)) - return false; - // Write to symbol file. - if (WriteModuleInfo(sym_file, elf_header->e_machine, obj_file) && - DumpStabSymbols(sym_file, symbols)) - return true; + unsigned char identifier[16]; + google_breakpad::FileID file_id(obj_file.c_str()); + if (! file_id.ElfFileIdentifier(identifier)) + return false; - return false; + const char *architecture = ElfArchitecture(elf_header); + if (! architecture) + return false; + + std::string name = BaseFileName(obj_file); + std::string os = "Linux"; + std::string id = FormatIdentifier(identifier); + + Module module(name, os, architecture, id); + if (!LoadSymbols(elf_header, &module)) + return false; + if (!module.Write(sym_file)) + return false; + + return true; } } // namespace google_breakpad diff --git a/src/common/linux/file_id.cc b/src/common/linux/file_id.cc index e8fd3650..34c9e508 100644 --- a/src/common/linux/file_id.cc +++ b/src/common/linux/file_id.cc @@ -52,7 +52,7 @@ FileID::FileID(const char* path) { } bool FileID::ElfFileIdentifier(uint8_t identifier[kMDGUIDSize]) { - const size_t mapped_len = 4096; // Page size (matches WriteMappings()) + const ssize_t mapped_len = 4096; // Page size (matches WriteMappings()) int fd = open(path_, O_RDONLY); if (fd < 0) return false; @@ -95,7 +95,7 @@ void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], *data3 = htons(*data3); int buffer_idx = 0; - for (int idx = 0; + for (unsigned int idx = 0; (buffer_idx < buffer_length) && (idx < kMDGUIDSize); ++idx) { int hi = (identifier_swapped[idx] >> 4) & 0x0F; diff --git a/src/common/linux/module.cc b/src/common/linux/module.cc new file mode 100644 index 00000000..69bec9cd --- /dev/null +++ b/src/common/linux/module.cc @@ -0,0 +1,167 @@ +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include "common/linux/module.h" + +namespace google_breakpad { + +Module::Module(const string &name, const string &os, + const string &architecture, const string &id) : + name_(name), + os_(os), + architecture_(architecture), + id_(id), + load_address_(0) { } + +Module::~Module() { + for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); it++) + delete it->second; + for (vector::iterator it = functions_.begin(); + it != functions_.end(); it++) + delete *it; +} + +void Module::SetLoadAddress(Address address) { + load_address_ = address; +} + +void Module::AddFunction(Function *function) { + functions_.push_back(function); +} + +void Module::AddFunctions(vector::iterator begin, + vector::iterator end) { + functions_.insert(functions_.end(), begin, end); +} + +Module::File *Module::FindFile(const string &name) { + // A tricky bit here. The key of each map entry needs to be a + // pointer to the entry's File's name string. This means that we + // can't do the initial lookup with any operation that would create + // an empty entry for us if the name isn't found (like, say, + // operator[] or insert do), because such a created entry's key will + // be a pointer the string passed as our argument. Since the key of + // a map's value type is const, we can't fix it up once we've + // created our file. lower_bound does the lookup without doing an + // insertion, and returns a good hint iterator to pass to insert. + // Our "destiny" is where we belong, whether we're there or not now. + FileByNameMap::iterator destiny = files_.lower_bound(&name); + if (destiny == files_.end() + || *destiny->first != name) { // Repeated string comparison, boo hoo. + File *file = new File; + file->name_ = name; + file->source_id_ = -1; + destiny = files_.insert(destiny, + FileByNameMap::value_type(&file->name_, file)); + } + return destiny->second; +} + +Module::File *Module::FindFile(const char *name) { + string name_string = name; + return FindFile(name_string); +} + +void Module::AssignSourceIds() { + // First, give every source file an id of -1. + for (FileByNameMap::iterator file_it = files_.begin(); + file_it != files_.end(); file_it++) + file_it->second->source_id_ = -1; + + // Next, mark all files actually cited by our functions' line number + // info, by setting each one's source id to zero. + for (vector::const_iterator func_it = functions_.begin(); + func_it != functions_.end(); func_it++) { + Function *func = *func_it; + for (vector::iterator line_it = func->lines_.begin(); + line_it != func->lines_.end(); line_it++) + line_it->file_->source_id_ = 0; + } + + // Finally, assign source ids to those files that have been marked. + // We could have just assigned source id numbers while traversing + // the line numbers, but doing it this way numbers the files in + // lexicographical order by name, which is neat. + int next_source_id = 0; + for (FileByNameMap::iterator file_it = files_.begin(); + file_it != files_.end(); file_it++) + if (! file_it->second->source_id_) + file_it->second->source_id_ = next_source_id++; +} + +bool Module::ReportError() { + fprintf(stderr, "error writing symbol file: %s\n", + strerror (errno)); + return false; +} + +bool Module::Write(FILE *stream) { + if (0 > fprintf(stream, "MODULE %s %s %s %s\n", + os_.c_str(), architecture_.c_str(), id_.c_str(), + name_.c_str())) + return ReportError(); + + // Write out files. + AssignSourceIds(); + for (FileByNameMap::iterator file_it = files_.begin(); + file_it != files_.end(); file_it++) { + File *file = file_it->second; + if (file->source_id_ >= 0) { + if (0 > fprintf(stream, "FILE %d %s\n", + file->source_id_, file->name_.c_str())) + return ReportError(); + } + } + + // Write out functions and their lines. + for (vector::const_iterator func_it = functions_.begin(); + func_it != functions_.end(); func_it++) { + Function *func = *func_it; + if (0 > fprintf(stream, "FUNC %lx %lx %lu %s\n", + (unsigned long) (func->address_ - load_address_), + (unsigned long) func->size_, + (unsigned long) func->parameter_size_, + func->name_.c_str())) + return ReportError(); + for (vector::iterator line_it = func->lines_.begin(); + line_it != func->lines_.end(); line_it++) + if (0 > fprintf(stream, "%lx %lx %d %d\n", + (unsigned long) (line_it->address_ - load_address_), + (unsigned long) line_it->size_, + line_it->number_, + line_it->file_->source_id_)) + return ReportError(); + } + + return true; +} + +} // namespace google_breakpad diff --git a/src/common/linux/module.h b/src/common/linux/module.h new file mode 100644 index 00000000..b91c0f90 --- /dev/null +++ b/src/common/linux/module.h @@ -0,0 +1,193 @@ +// Copyright (c) 2009, Google Inc. -*- mode: c++ -*- +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// module.h: defines google_breakpad::Module, for writing breakpad symbol files + +#ifndef COMMON_LINUX_MODULE_H__ +#define COMMON_LINUX_MODULE_H__ + +#include +#include +#include +#include + +#include "google_breakpad/common/breakpad_types.h" + +namespace google_breakpad { + +using std::string; +using std::vector; +using std::map; + +// A Module represents the contents of a module, and supports methods +// for adding information produced by parsing STABS or DWARF data +// --- possibly both from the same file --- and then writing out the +// unified contents as a Breakpad-format symbol file. +class Module { + public: + // The type of addresses and sizes in a symbol table. + typedef u_int64_t Address; + struct File; + struct Function; + struct Line; + + // Addresses appearing in File, Function, and Line structures are + // absolute, not relative to the the module's load address. That + // is, if the module were loaded at its nominal load address, the + // addresses would be correct. + + // A source file. + struct File { + // The name of the source file. + string name_; + + // The file's source id. The Write member function clears this + // field and assigns source ids a fresh, so any value placed here + // before calling Write will be lost. + int source_id_; + }; + + // A function. + struct Function { + // For sorting by address. (Not style-guide compliant, but it's + // stupid not to put this in the struct.) + static bool CompareByAddress(const Function *x, const Function *y) { + return x->address_ < y->address_; + } + + // The function's name. + string name_; + + // The start address and length of the function's code. + Address address_, size_; + + // The function's parameter size. + Address parameter_size_; + + // Source lines belonging to this function, sorted by increasing + // address. + vector lines_; + }; + + // A source line. + struct Line { + // For sorting by address. (Not style-guide compliant, but it's + // stupid not to put this in the struct.) + static bool CompareByAddress(const Module::Line &x, const Module::Line &y) { + return x.address_ < y.address_; + } + + Address address_, size_; // The address and size of the line's code. + File *file_; // The source file. + int number_; // The source line number. + }; + + // Create a new module with the given name, operating system, + // architecture, and ID string. + Module(const string &name, const string &os, const string &architecture, + const string &id); + ~Module(); + + // Set the module's load address to LOAD_ADDRESS; addresses given + // for functions and lines will be written to the Breakpad symbol + // file as offsets from this address. Construction initializes this + // module's load address to zero: addresses written to the symbol + // file will be the same as they appear in the File and Line + // structures. + void SetLoadAddress(Address load_address); + + // Add FUNCTION to the module. + // Destroying this module frees all Function objects that have been + // added with this function. + void AddFunction(Function *function); + + // Add all the functions in [BEGIN,END) to the module. + // Destroying this module frees all Function objects that have been + // added with this function. + void AddFunctions(vector::iterator begin, + vector::iterator end); + + // If this module has a file named NAME, return a pointer to it. If + // it has none, then create one and return a pointer to the new + // file. Destroying this module frees all File objects that have + // been created using this function, or with Insert. + File *FindFile(const string &name); + File *FindFile(const char *name); + + // Write this module to STREAM in the breakpad symbol format. + // Return true if all goes well, or false if an error occurs. This + // method writes out: + // - a header based on the values given to the constructor, + // - the source files added via FindFile, and finally + // - the functions added via AddFunctions, each with its lines. + // Addresses in the output are all relative to the load address + // established by SetLoadAddress. + bool Write(FILE *stream); + +private: + + // Find those files in this module that are actually referred to by + // functions' line number data, and assign them source id numbers. + // Set the source id numbers for all other files --- unused by the + // source line data --- to -1. We do this before writing out the + // symbol file, at which point we omit any unused files. + void AssignSourceIds(); + + // Report an error that has occurred writing the symbol file, using + // errno to find the appropriate cause. Return false. + static bool ReportError(); + + // Module header entries. + string name_, os_, architecture_, id_; + + // The module's nominal load address. Addresses for functions and + // lines are absolute, assuming the module is loaded at this + // address. + Address load_address_; + + // Relation for maps whose keys are strings shared with some other + // structure. + struct CompareStringPtrs { + bool operator()(const string *x, const string *y) { return *x < *y; }; + }; + + // A map from filenames to File structures. The map's keys are + // pointers to the Files' names. + typedef map FileByNameMap; + + // The module owns all the files and functions that have been added + // to it; destroying the module frees the Files and Functions these + // point to. + FileByNameMap files_; // This module's source files. + vector functions_; // This module's functions. +}; + +} // namespace google_breakpad + +#endif // COMMON_LINUX_MODULE_H__ diff --git a/src/common/linux/stabs_reader.cc b/src/common/linux/stabs_reader.cc new file mode 100644 index 00000000..57073361 --- /dev/null +++ b/src/common/linux/stabs_reader.cc @@ -0,0 +1,195 @@ +// Copyright 2009 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file implements the google_breakpad::StabsReader class. + +#include +#include +#include +#include + +#include "common/linux/stabs_reader.h" + +namespace google_breakpad { + +StabsReader::StabsReader(const uint8_t *stab, size_t stab_size, + const uint8_t *stabstr, size_t stabstr_size, + StabsHandler *handler) : + stabstr_(stabstr), + stabstr_size_(stabstr_size), + handler_(handler), + symbol_(NULL), + current_source_file_(NULL) { + symbols_ = reinterpret_cast(stab); + symbols_end_ = symbols_ + (stab_size / sizeof (*symbols_)); +} + +const char *StabsReader::SymbolString() { + ptrdiff_t offset = symbol_->n_un.n_strx; + if (offset < 0 || (size_t) offset >= stabstr_size_) { + handler_->Warning("symbol %d: name offset outside the string section", + symbol_ - symbols_); + // Return our null string, to keep our promise about all names being + // taken from the string section. + offset = 0; + } + return reinterpret_cast(stabstr_ + offset); +} + +bool StabsReader::Process() { + symbol_ = symbols_; + while (symbol_ < symbols_end_) { + if (symbol_->n_type == N_SO) { + if (! ProcessCompilationUnit()) + return false; + } else + symbol_++; + } + return true; +} + +bool StabsReader::ProcessCompilationUnit() { + assert(symbol_ < symbols_end_ && symbol_->n_type == N_SO); + + // There may be an N_SO entry whose name ends with a slash, + // indicating the directory in which the compilation occurred. + // The build directory defaults to NULL. + const char *build_directory = NULL; + { + const char *name = SymbolString(); + if (name[0] && name[strlen(name) - 1] == '/') { + build_directory = name; + symbol_++; + } + } + + // We expect to see an N_SO entry with a filename next, indicating + // the start of the compilation unit. + { + if (symbol_ >= symbols_end_ || symbol_->n_type != N_SO) + return true; + const char *name = SymbolString(); + if (name[0] == '\0') + return true; + current_source_file_ = name; + } + + if (! handler_->StartCompilationUnit(current_source_file_, + SymbolValue(), + build_directory)) + return false; + + symbol_++; + + // The STABS documentation says that some compilers may emit + // additional N_SO units with names immediately following the first, + // and that they should be ignored. However, the original Breakpad + // STABS reader doesn't ignore them, so we won't either. + + // Process the body of the compilation unit, up to the next N_SO. + while (symbol_ < symbols_end_ && symbol_->n_type != N_SO) { + if (symbol_->n_type == N_FUN) { + if (! ProcessFunction()) + return false; + } else + // Ignore anything else. + symbol_++; + } + + // An N_SO with an empty name indicates the end of the compilation + // unit. Default to zero. + uint64_t ending_address = 0; + if (symbol_ < symbols_end_) { + assert(symbol_->n_type == N_SO); + const char *name = SymbolString(); + if (name[0] == '\0') { + ending_address = SymbolValue(); + symbol_++; + } + } + + if (! handler_->EndCompilationUnit(ending_address)) + return false; + + return true; +} + +bool StabsReader::ProcessFunction() { + assert(symbol_ < symbols_end_ && symbol_->n_type == N_FUN); + + uint64_t function_address = SymbolValue(); + // The STABS string for an N_FUN entry is the name of the function, + // followed by a colon, followed by type information for the + // function. We want to pass the name alone to StartFunction. + const char *stab_string = SymbolString(); + const char *name_end = strchr(stab_string, ':'); + if (! name_end) + name_end = stab_string + strlen(stab_string); + std::string name(stab_string, name_end - stab_string); + if (! handler_->StartFunction(name, function_address)) + return false; + symbol_++; + + while (symbol_ < symbols_end_) { + if (symbol_->n_type == N_SO || symbol_->n_type == N_FUN) + break; + else if (symbol_->n_type == N_SLINE) { + // The value of an N_SLINE entry is the offset of the line from + // the function's start address. + uint64_t line_address = function_address + SymbolValue(); + // The n_desc of a N_SLINE entry is the line number. It's a + // signed 16-bit field; line numbers from 32768 to 65535 are + // stored as n-65536. + uint16_t line_number = symbol_->n_desc; + if (! handler_->Line(line_address, current_source_file_, line_number)) + return false; + symbol_++; + } else if (symbol_->n_type == N_SOL) { + current_source_file_ = SymbolString(); + symbol_++; + } else + // Ignore anything else. + symbol_++; + } + + // If there is a subsequent N_SO or N_FUN entry, its address is our + // end address. + uint64_t ending_address = 0; + if (symbol_ < symbols_end_) { + assert(symbol_->n_type == N_SO || symbol_->n_type == N_FUN); + ending_address = SymbolValue(); + // Note: we do not increment symbol_ here, since we haven't consumed it. + } + + if (! handler_->EndFunction(ending_address)) + return false; + + return true; +} + +} // namespace google_breakpad diff --git a/src/common/linux/stabs_reader.h b/src/common/linux/stabs_reader.h new file mode 100644 index 00000000..7ebc30b1 --- /dev/null +++ b/src/common/linux/stabs_reader.h @@ -0,0 +1,188 @@ +// Copyright 2009 Google Inc. All Rights Reserved. -*- mode: c++ -*- +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file contains definitions related to the STABS reader and +// its handler interfaces. +// A description of the STABS debugging format can be found at +// http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html +// The comments here assume you understand the format. +// +// This reader assumes that the system's and +// headers accurately describe the layout of the STABS data; this code +// is not cross-platform safe. + +#ifndef COMMON_LINUX_STABS_READER_H__ +#define COMMON_LINUX_STABS_READER_H__ + +#include +#include +#include + +#include + +namespace google_breakpad { + +class StabsHandler; + +class StabsReader { + public: + // Create a reader for the STABS debug information whose .stab + // section is the STAB_SIZE bytes at STAB, and whose .stabstr + // section is the STABSTR_SIZE bytes at STABSTR. The reader will + // call the methods of HANDLER to report the information it finds, + // when the reader's 'process' method is called. + // + // Note that, in ELF, the .stabstr section should be found using the + // 'sh_link' field of the .stab section header, not by name. + StabsReader(const uint8_t *stab, size_t stab_size, + const uint8_t *stabstr, size_t stabstr_size, + StabsHandler *handler); + + // Process the STAB data, calling the handler's methods to report + // what we find. While the handler functions return true, continue + // to process until we reach the end of the section. If we + // processed the entire section and all handlers returned true, + // return true. If any handler returned false, return false. + bool Process(); + + private: + // Return the name of the current symbol. + const char *SymbolString(); + + // Return the value of the current symbol. + const uint64_t SymbolValue() { + return symbol_->n_value; + } + + // Process a compilation unit starting at symbol_. Return true + // to continue processing, or false to abort. + bool ProcessCompilationUnit(); + + // Process a function in current_source_file_ starting at symbol_. + // Return true to continue processing, or false to abort. + bool ProcessFunction(); + + // The debugging information we're reading. + const struct nlist *symbols_, *symbols_end_; + const uint8_t *stabstr_; + size_t stabstr_size_; + + StabsHandler *handler_; + + // The current symbol we're processing. + const struct nlist *symbol_; + + // The current source file name. + const char *current_source_file_; +}; + +// Consumer-provided callback structure for the STABS reader. +// Clients of the STABS reader provide an instance of this structure. +// The reader then invokes the methods of that instance to report the +// information it finds. +// +// The default definitions of the methods do nothing. +class StabsHandler { + public: + StabsHandler() { } + virtual ~StabsHandler() { } + + // Some general notes about the handler callback functions: + + // Processing proceeds until the end of the .stabs section, or until + // one of these functions returns false. + + // The addresses given are as reported in the STABS info, without + // regard for whether the module may be loaded at different + // addresses at different times (a shared library, say). When + // processing STABS from an ELF shared library, the addresses given + // all assume the library is loaded at its nominal load address. + // They are *not* offsets from the nominal load address. If you + // want offsets, you must subtract off the library's nominal load + // address. + + // The arguments to these functions named FILENAME are all + // references to strings stored in the .stabstr section. Because + // both the Linux and Solaris linkers factor out duplicate strings + // from the .stabstr section, the consumer can assume that if two + // FILENAME values are different addresses, they represent different + // file names. + // + // Thus, it's safe to use (say) std::map, which does + // address comparisons. Since all the pointers are into the array + // holding the .stabstr section's contents, comparing them produces + // predictable results. + + // Begin processing a compilation unit whose main source file is + // named FILENAME, and whose base address is ADDRESS. If + // BUILD_DIRECTORY is non-NULL, it is the name of the build + // directory in which the compilation occurred. + virtual bool StartCompilationUnit(const char *filename, uint64_t address, + const char *build_directory) { + return true; + } + + // Finish processing the compilation unit. If END_ADDRESS is + // non-zero, it is the ending address of the compilation unit. This + // information may not be available, in which case the consumer must + // infer it by other means. + virtual bool EndCompilationUnit(uint64_t address) { return true; } + + // Begin processing a function named NAME, whose starting address is + // ADDRESS. This function belongs to the compilation unit that was + // most recently started but not ended. + // + // Note that, unlike filenames, NAME is not a pointer into the + // .stabstr section; this is because the name as it appears in the + // STABS data is followed by type information. The value passed to + // StartFunction is the function name alone. + virtual bool StartFunction(const std::string &name, uint64_t address) { + return true; + } + + // Finishing processing the function. If END_ADDRESS is non-zero, + // it is the ending address for the function. This information may + // not be available, in which case the consumer must infer it by + // other means. + virtual bool EndFunction(uint64_t address) { return true; } + + // Report that the code at ADDRESS is attributable to line NUMBER of + // the source file named FILENAME. The caller must infer the ending + // address of the line. + virtual bool Line(uint64_t address, const char *filename, int number) { + return true; + } + + // Report a warning. FORMAT is a printf-like format string, + // specifying how to format the subsequent arguments. + virtual void Warning(const char *format, ...) { } +}; + +} // namespace google_breakpad + +#endif // COMMON_LINUX_STABS_READER_H__ diff --git a/src/tools/linux/dump_syms/Makefile b/src/tools/linux/dump_syms/Makefile index 21d848d7..59db7374 100644 --- a/src/tools/linux/dump_syms/Makefile +++ b/src/tools/linux/dump_syms/Makefile @@ -16,7 +16,8 @@ BIN=dump_syms all:$(BIN) -DUMP_OBJ=dump_symbols.o guid_creator.o dump_syms.o file_id.o md5.o +DUMP_OBJ=dump_symbols.o guid_creator.o dump_syms.o file_id.o md5.o \ + stabs_reader.o module.o dump_syms:$(DUMP_OBJ) $(CXX) $(CPPFLAGS) $(CXXFLAGS) -o $@ $^ @@ -24,6 +25,12 @@ dump_syms:$(DUMP_OBJ) dump_symbols.o:../../../common/linux/dump_symbols.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $^ +stabs_reader.o:../../../common/linux/stabs_reader.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $^ + +module.o:../../../common/linux/module.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $^ + guid_creator.o:../../../common/linux/guid_creator.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $^