From a2fa3dda54c9684c3d85ed116ea8b916a8fe210e Mon Sep 17 00:00:00 2001 From: waylonis Date: Sat, 16 Dec 2006 01:01:19 +0000 Subject: [PATCH] Add better support for UTF character conversions. Fixes Issue 78. git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@91 4c0a9323-5329-0410-9bdc-e9ce6186880e --- src/client/minidump_file_writer.cc | 164 +++++++++++++++-------------- src/client/minidump_file_writer.h | 56 +++++++--- 2 files changed, 127 insertions(+), 93 deletions(-) diff --git a/src/client/minidump_file_writer.cc b/src/client/minidump_file_writer.cc index 852bc297..766bddbe 100644 --- a/src/client/minidump_file_writer.cc +++ b/src/client/minidump_file_writer.cc @@ -38,9 +38,12 @@ #include #include "client/minidump_file_writer-inl.h" +#include "common/string_conversion.h" namespace google_airbag { +const MDRVA MinidumpFileWriter::kInvalidMDRVA = static_cast(-1); + MinidumpFileWriter::MinidumpFileWriter() : file_(-1), position_(0), size_(0) { } @@ -48,9 +51,9 @@ MinidumpFileWriter::~MinidumpFileWriter() { Close(); } -bool MinidumpFileWriter::Open(const std::string &path) { +bool MinidumpFileWriter::Open(const char *path) { assert(file_ == -1); - file_ = open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); + file_ = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666); return file_ != -1; } @@ -60,58 +63,98 @@ bool MinidumpFileWriter::Close() { if (file_ != -1) { ftruncate(file_, position_); - result = close(file_) == 0; + result = (close(file_) == 0); file_ = -1; } return result; } -bool MinidumpFileWriter::WriteString(const wchar_t *str, - unsigned int length, - MDLocationDescriptor *location) { +bool MinidumpFileWriter::CopyStringToMDString(const wchar_t *str, + unsigned int length, + TypedMDRVA *mdstring) { + bool result = true; + if (sizeof(wchar_t) == sizeof(u_int16_t)) { + // Shortcut if wchar_t is the same size as MDString's buffer + result = mdstring->Copy(str, mdstring->get()->length); + } else { + u_int16_t out[2]; + int out_idx = 0; + + // Copy the string character by character + while (length && result) { + UTF32ToUTF16Char(*str, out); + if (!out[0]) + return false; + + // Process one character at a time + --length; + ++str; + + // Append the one or two UTF-16 characters. The first one will be non- + // zero, but the second one may be zero, depending on the conversion from + // UTF-32. + int out_count = out[1] ? 2 : 1; + int out_size = sizeof(u_int16_t) * out_count; + result = mdstring->CopyIndexAfterObject(out_idx, out, out_size); + out_idx += out_count; + } + } + return result; +} + +bool MinidumpFileWriter::CopyStringToMDString(const char *str, + unsigned int length, + TypedMDRVA *mdstring) { + bool result = true; + u_int16_t out[2]; + int out_idx = 0; + + // Copy the string character by character + while (length && result) { + int conversion_count = UTF8ToUTF16Char(str, length, out); + if (!conversion_count) + return false; + + // Move the pointer along based on the nubmer of converted characters + length -= conversion_count; + str += conversion_count; + + // Append the one or two UTF-16 characters + int out_count = out[1] ? 2 : 1; + int out_size = sizeof(u_int16_t) * out_count; + result = mdstring->CopyIndexAfterObject(out_idx, out, out_size); + out_idx += out_count; + } + return result; +} + +template +bool MinidumpFileWriter::WriteStringCore(const CharType *str, + unsigned int length, + MDLocationDescriptor *location) { assert(str); assert(location); // Calculate the mdstring length by either limiting to |length| as passed in // or by finding the location of the NULL character. + unsigned int mdstring_length = 0; if (!length) length = INT_MAX; - - unsigned int mdstring_length = 0; - for (; mdstring_length < length && str[mdstring_length]; ++mdstring_length) { - } + for (; mdstring_length < length && str[mdstring_length]; ++mdstring_length) + ; // Allocate the string buffer TypedMDRVA mdstring(this); - if (!mdstring.AllocateObjectAndArray(mdstring_length + 1, sizeof(u_int16_t))) return false; - // Set length excluding the NULL + // Set length excluding the NULL and copy the string mdstring.get()->length = mdstring_length * sizeof(u_int16_t); - - u_int16_t ch; - bool result = true; - - if (sizeof(wchar_t) == sizeof(u_int16_t)) { - // Shortcut if wchar_t is the same size as MDString's buffer - result = mdstring.Copy(str, mdstring.get()->length); - } else { - // Copy the string character by character - for (unsigned int c = 0; c < mdstring_length && result == true; c++) { - ch = str[c]; - // TODO: For the UTF-32->UTF-16 conversion, it's possible that there - // are characters that will require more than one UTF-16 character to - // represent it. Fully supporting this will require a more sophisticated - // calculation of the size of the resulting string and for converting the - // UTF-32 character into the two UTF-16 characters. - result = mdstring.CopyIndexAfterObject(c, &ch, sizeof(ch)); - } - } + bool result = CopyStringToMDString(str, mdstring_length, &mdstring); // NULL terminate if (result) { - ch = 0; + u_int16_t ch = 0; result = mdstring.CopyIndexAfterObject(mdstring_length, &ch, sizeof(ch)); if (result) @@ -121,52 +164,14 @@ bool MinidumpFileWriter::WriteString(const wchar_t *str, return result; } +bool MinidumpFileWriter::WriteString(const wchar_t *str, unsigned int length, + MDLocationDescriptor *location) { + return WriteStringCore(str, length, location); +} + bool MinidumpFileWriter::WriteString(const char *str, unsigned int length, - MDLocationDescriptor *location) { - assert(str); - assert(location); - // Calculate the mdstring length by either limiting to |length| as passed in - // or by finding the location of the NULL character. - if (!length) - length = INT_MAX; - - unsigned int mdstring_length = 0; - for (; mdstring_length < length && str[mdstring_length]; ++mdstring_length) { - } - - // Allocate the string buffer - TypedMDRVA mdstring(this); - - if (!mdstring.AllocateObjectAndArray(mdstring_length + 1, sizeof(u_int16_t))) - return false; - - // Set length excluding the NULL - mdstring.get()->length = mdstring_length * sizeof(u_int16_t); - - u_int16_t ch; - bool result = true; - - // Copy the string character by character - for (unsigned int c = 0; c < mdstring_length && result == true; c++) { - ch = str[c]; - // TODO: For the UTF-8->UTF-16 conversion, it's possible that there are - // characters that will convert one or more UTF-8 character into a single - // UTF-16 character. Fully supporting this will require a more - // sophisticated calculation of the size of the resulting string and for - // converting the UTF-8 characters into a UTF-16 character. - result = mdstring.CopyIndexAfterObject(c, &ch, sizeof(ch)); - } - - // NULL terminate - if (result) { - ch = 0; - result = mdstring.CopyIndexAfterObject(mdstring_length, &ch, sizeof(ch)); - - if (result) - *location = mdstring.location(); - } - - return result; + MDLocationDescriptor *location) { + return WriteStringCore(str, length, location); } bool MinidumpFileWriter::WriteMemory(const void *src, size_t size, @@ -177,7 +182,6 @@ bool MinidumpFileWriter::WriteMemory(const void *src, size_t size, if (!mem.Allocate(size)) return false; - if (!mem.Copy(src, mem.size())) return false; @@ -190,7 +194,6 @@ bool MinidumpFileWriter::WriteMemory(const void *src, size_t size, MDRVA MinidumpFileWriter::Allocate(size_t size) { assert(size); assert(file_ != -1); - size_t aligned_size = (size + 7) & ~7; // 64-bit alignment if (position_ + aligned_size > size_) { @@ -202,7 +205,6 @@ MDRVA MinidumpFileWriter::Allocate(size_t size) { growth = minimal_growth; size_t new_size = size_ + growth; - if (ftruncate(file_, new_size) != 0) return kInvalidMDRVA; @@ -215,7 +217,7 @@ MDRVA MinidumpFileWriter::Allocate(size_t size) { return current_position; } -bool MinidumpFileWriter::Copy(MDRVA position, const void* src, ssize_t size) { +bool MinidumpFileWriter::Copy(MDRVA position, const void *src, ssize_t size) { assert(src); assert(size); assert(file_ != -1); diff --git a/src/client/minidump_file_writer.h b/src/client/minidump_file_writer.h index 9f270b1e..1f3a3680 100644 --- a/src/client/minidump_file_writer.h +++ b/src/client/minidump_file_writer.h @@ -27,7 +27,9 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// minidump_file_writer.h: Implements file-based minidump generation +// minidump_file_writer.h: Implements file-based minidump generation. It's +// intended to be used with the Google Airbag open source crash handling +// project. #ifndef CLIENT_MINIDUMP_FILE_WRITER_H__ #define CLIENT_MINIDUMP_FILE_WRITER_H__ @@ -38,11 +40,26 @@ namespace google_airbag { +class UntypedMDRVA; +template class TypedMDRVA; + +// The user of this class can Open() a file and add minidump streams, data, and +// strings using the definitions in minidump_format.h. Since this class is +// expected to be used in a situation where the current process may be +// damaged, it will not allocate heap memory. +// Sample usage: +// MinidumpFileWriter writer; +// writer.Open("/tmp/minidump.dmp"); +// TypedMDRVA header(&writer_); +// header.Allocate(); +// header->get()->signature = MD_HEADER_SIGNATURE; +// : +// writer.Close(); class MinidumpFileWriter { - public: +public: // Invalid MDRVA (Minidump Relative Virtual Address) // returned on failed allocation - static const MDRVA kInvalidMDRVA = static_cast(-1); + static const MDRVA kInvalidMDRVA; MinidumpFileWriter(); ~MinidumpFileWriter(); @@ -50,13 +67,13 @@ class MinidumpFileWriter { // Open |path| as the destination of the minidump data. Any existing file // will be overwritten. // Return true on success, or false on failure - bool Open(const std::string &path); + bool Open(const char *path); // Close the current file // Return true on success, or false on failure bool Close(); - // Write |str| to a MDString. + // Copy the contents of |str| to a MDString and write it to the file. // |str| is expected to be either UTF-16 or UTF-32 depending on the size // of wchar_t. // Maximum |length| of characters to copy from |str|, or specify 0 to use the @@ -66,7 +83,7 @@ class MinidumpFileWriter { bool WriteString(const wchar_t *str, unsigned int length, MDLocationDescriptor *location); - // Similar to above with |str| as an UTF-8 encoded string + // Same as above, except with |str| as a UTF-8 string bool WriteString(const char *str, unsigned int length, MDLocationDescriptor *location); @@ -79,7 +96,7 @@ class MinidumpFileWriter { bool Copy(MDRVA position, const void *src, ssize_t size); // Return the current position for writing to the minidump - MDRVA position() const { return position_; } + inline MDRVA position() const { return position_; } private: friend class UntypedMDRVA; @@ -97,6 +114,21 @@ class MinidumpFileWriter { // Current allocated size size_t size_; + + // Copy |length| characters from |str| to |mdstring|. These are distinct + // because the underlying MDString is a UTF-16 based string. The wchar_t + // variant may need to create a MDString that has more characters than the + // source |str|, whereas the UTF-8 variant may coalesce characters to form + // a single UTF-16 character. + bool CopyStringToMDString(const wchar_t *str, unsigned int length, + TypedMDRVA *mdstring); + bool CopyStringToMDString(const char *str, unsigned int length, + TypedMDRVA *mdstring); + + // The common templated code for writing a string + template + bool WriteStringCore(const CharType *str, unsigned int length, + MDLocationDescriptor *location); }; // Represents an untyped allocated chunk @@ -112,13 +144,13 @@ class UntypedMDRVA { bool Allocate(size_t size); // Returns the current position or kInvalidMDRVA if allocation failed - MDRVA position() const { return position_; } + inline MDRVA position() const { return position_; } // Number of bytes allocated - size_t size() const { return size_; } + inline size_t size() const { return size_; } // Return size and position - MDLocationDescriptor location() const { + inline MDLocationDescriptor location() const { MDLocationDescriptor location = { size_, position_ }; return location; } @@ -128,7 +160,7 @@ class UntypedMDRVA { bool Copy(MDRVA position, const void *src, size_t size); // Copy |size| bytes from |src| to the current position - bool Copy(const void *src, size_t size) { + inline bool Copy(const void *src, size_t size) { return Copy(position_, src, size); } @@ -157,7 +189,7 @@ class TypedMDRVA : public UntypedMDRVA { data_(), allocation_state_(UNALLOCATED) {} - ~TypedMDRVA() { + inline ~TypedMDRVA() { // Ensure that the data_ object is written out if (allocation_state_ != ARRAY) Flush();