Breakpad Linux/Mac symbol dumper: Share duplicate strings that arise in DWARF data.

This patch avoids allocating many copies of identical strings appearing in
debugging information. Without this patch, running dump_syms on Mozilla's
libxul.so (with 173MiB of debugging information) has a peak resident set of
around 450MiB. With this patch, the peak is around 365MiB.

a=jimblandy, r=mark


git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@626 4c0a9323-5329-0410-9bdc-e9ce6186880e
This commit is contained in:
jimblandy
2010-07-17 15:14:30 +00:00
parent c5f5e0ae65
commit 786275e719
3 changed files with 62 additions and 7 deletions

View File

@@ -36,12 +36,16 @@
#include <assert.h>
#include <algorithm>
#include <set>
#include <utility>
#include "common/dwarf_line_to_module.h"
namespace google_breakpad {
using std::map;
using std::pair;
using std::set;
using std::vector;
// Data provided by a DWARF specification DIE.
@@ -83,6 +87,17 @@ typedef map<uint64, AbstractOrigin> AbstractOriginByOffset;
// Data global to the DWARF-bearing file that is private to the
// DWARF-to-Module process.
struct DwarfCUToModule::FilePrivate {
// A set of strings used in this CU. Before storing a string in one of
// our data structures, insert it into this set, and then use the string
// from the set.
//
// Because std::string uses reference counting internally, simply using
// strings from this set, even if passed by value, assigned, or held
// directly in structures and containers (map<string, ...>, for example),
// causes those strings to share a single instance of each distinct piece
// of text.
set<string> common_strings;
// A map from offsets of DIEs within the .debug_info section to
// Specifications describing those DIEs. Specification references can
// cross compilation unit boundaries.
@@ -256,7 +271,17 @@ void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
enum DwarfForm form,
const string &data) {
switch (attr) {
case dwarf2reader::DW_AT_name: name_attribute_ = data; break;
case dwarf2reader::DW_AT_name: {
// Place the name in our global set of strings, and then use the
// string from the set. Even though the assignment looks like a copy,
// all the major std::string implementations use reference counting
// internally, so the effect is to have all our data structures share
// copies of strings whenever possible.
pair<set<string>::iterator, bool> result =
cu_context_->file_context->file_private->common_strings.insert(data);
name_attribute_ = *result.first;
break;
}
default: break;
}
}