Merge pull request #1982 from pocoproject/feature/mail-msg-decode

Feature/mail msg decode
This commit is contained in:
Günter Obiltschnig 2017-11-12 20:54:07 +01:00 committed by GitHub
commit c5ba9b1e18
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 832 additions and 226 deletions

View File

@ -38,7 +38,7 @@ public:
int convert(int ch, unsigned char* bytes, int length) const;
int queryConvert(const unsigned char* bytes, int length) const;
int sequenceLength(const unsigned char* bytes, int length) const;
private:
static const char* _names[];
static const CharacterMap _charMap;

View File

@ -103,6 +103,9 @@ public:
/// Returns true iff the given character is an uppercase alphabetic
/// character.
static bool isPrintable(int ch);
/// Returns true iff the given character is printable.
static int toLower(int ch);
/// If the given character is an uppercase character,
/// return its lowercase counterpart, otherwise return
@ -196,6 +199,12 @@ inline bool Ascii::isUpper(int ch)
}
inline bool Ascii::isPrintable(int ch)
{
return hasProperties(ch, ACP_PRINT);
}
inline int Ascii::toLower(int ch)
{
if (isUpper(ch))

View File

@ -28,79 +28,81 @@ namespace Poco {
std::string Foundation_API format(const std::string& fmt, const Any& value);
/// This function implements sprintf-style formatting in a typesafe way.
/// Various variants of the function are available, supporting a
/// different number of arguments (up to six).
///
/// The formatting is controlled by the format string in fmt.
/// Format strings are quite similar to those of the std::printf() function, but
/// there are some minor differences.
///
/// The format string can consist of any sequence of characters; certain
/// characters have a special meaning. Characters without a special meaning
/// are copied verbatim to the result. A percent sign (%) marks the beginning
/// of a format specification. Format specifications have the following syntax:
///
/// %[<index>][<flags>][<width>][.<precision>][<modifier>]<type>
///
/// Index, flags, width, precision and prefix are optional. The only required part of
/// the format specification, apart from the percent sign, is the type.
///
/// The optional index argument has the format "[<n>]" and allows to
/// address an argument by its zero-based position (see the example below).
///
/// Following are valid type specifications and their meaning:
///
/// * b boolean (true = 1, false = 0)
/// * c character
/// * d signed decimal integer
/// * i signed decimal integer
/// * o unsigned octal integer
/// * u unsigned decimal integer
/// * x unsigned hexadecimal integer (lower case)
/// * X unsigned hexadecimal integer (upper case)
/// * e signed floating-point value in the form [-]d.dddde[<sign>]dd[d]
/// * E signed floating-point value in the form [-]d.ddddE[<sign>]dd[d]
/// * f signed floating-point value in the form [-]dddd.dddd
/// * s std::string
/// * z std::size_t
///
/// The following flags are supported:
///
/// * - left align the result within the given field width
/// * + prefix the output value with a sign (+ or -) if the output value is of a signed type
/// * 0 if width is prefixed with 0, zeros are added until the minimum width is reached
/// * # For o, x, X, the # flag prefixes any nonzero output value with 0, 0x, or 0X, respectively;
/// for e, E, f, the # flag forces the output value to contain a decimal point in all cases.
///
/// The following modifiers are supported:
///
/// * (none) argument is char (c), int (d, i), unsigned (o, u, x, X) double (e, E, f, g, G) or string (s)
/// * l argument is long (d, i), unsigned long (o, u, x, X) or long double (e, E, f, g, G)
/// * L argument is long long (d, i), unsigned long long (o, u, x, X)
/// * h argument is short (d, i), unsigned short (o, u, x, X) or float (e, E, f, g, G)
/// * ? argument is any signed or unsigned int, short, long, or 64-bit integer (d, i, o, x, X)
///
/// The width argument is a nonnegative decimal integer or '*' with an additional nonnegative integer value preceding the value to be formated, controlling the minimum number of characters printed.
/// If the number of characters in the output value is less than the specified width, blanks or
/// leading zeros are added, according to the specified flags (-, +, 0).
///
/// Precision is a nonnegative decimal integer or '*' with an additional nonnegative integer value preceding the value to be formated, preceded by a period (.), which specifies the number of characters
/// to be printed, the number of decimal places, or the number of significant digits.
///
/// Throws an InvalidArgumentException if an argument index is out of range.
///
/// Starting with release 1.4.3, an argument that does not match the format
/// specifier no longer results in a BadCastException. The string [ERRFMT] is
/// written to the result string instead.
///
/// If there are more format specifiers than values, the format specifiers without a corresponding value
/// are copied verbatim to output.
///
/// If there are more values than format specifiers, the superfluous values are ignored.
///
/// Usage Examples:
/// std::string s1 = format("The answer to life, the universe, and everything is %d", 42);
/// This function implements sprintf-style formatting in a typesafe way.
/// Various variants of the function are available, supporting a
/// different number of arguments (up to six).
///
/// The formatting is controlled by the format string in fmt.
/// Format strings are quite similar to those of the std::printf() function, but
/// there are some minor differences.
///
/// The format string can consist of any sequence of characters; certain
/// characters have a special meaning. Characters without a special meaning
/// are copied verbatim to the result. A percent sign (%) marks the beginning
/// of a format specification. Format specifications have the following syntax:
///
/// %[<index>][<flags>][<width>][.<precision>][<modifier>]<type>
///
/// Index, flags, width, precision and prefix are optional. The only required part of
/// the format specification, apart from the percent sign, is the type.
///
/// The optional index argument has the format "[<n>]" and allows to
/// address an argument by its zero-based position (see the example below).
///
/// Following are valid type specifications and their meaning:
///
/// * b boolean (true = 1, false = 0)
/// * c character
/// * d signed decimal integer
/// * i signed decimal integer
/// * o unsigned octal integer
/// * u unsigned decimal integer
/// * x unsigned hexadecimal integer (lower case)
/// * X unsigned hexadecimal integer (upper case)
/// * e signed floating-point value in the form [-]d.dddde[<sign>]dd[d]
/// * E signed floating-point value in the form [-]d.ddddE[<sign>]dd[d]
/// * f signed floating-point value in the form [-]dddd.dddd
/// * s std::string
/// * z std::size_t
///
/// The following flags are supported:
///
/// * - left align the result within the given field width
/// * + prefix the output value with a sign (+ or -) if the output value is of a signed type
/// * 0 if width is prefixed with 0, zeros are added until the minimum width is reached
/// * # For o, x, X, the # flag prefixes any nonzero output value with 0, 0x, or 0X, respectively;
/// for e, E, f, the # flag forces the output value to contain a decimal point in all cases.
///
/// The following modifiers are supported:
///
/// * (none) argument is char (c), int (d, i), unsigned (o, u, x, X) double (e, E, f, g, G) or string (s)
/// * l argument is long (d, i), unsigned long (o, u, x, X) or long double (e, E, f, g, G)
/// * L argument is long long (d, i), unsigned long long (o, u, x, X)
/// * h argument is short (d, i), unsigned short (o, u, x, X) or float (e, E, f, g, G)
/// * ? argument is any signed or unsigned int, short, long, or 64-bit integer (d, i, o, x, X)
///
/// The width argument is a nonnegative decimal integer or '*' with an additional nonnegative integer value
/// preceding the value to be formated, controlling the minimum number of characters printed.
/// If the number of characters in the output value is less than the specified width, blanks or
/// leading zeros are added, according to the specified flags (-, +, 0).
///
/// Precision is a nonnegative decimal integer or '*' with an additional nonnegative integer value preceding
/// the value to be formated, preceded by a period (.), which specifies the number of characters
/// to be printed, the number of decimal places, or the number of significant digits.
///
/// Throws an InvalidArgumentException if an argument index is out of range.
///
/// Starting with release 1.4.3, an argument that does not match the format
/// specifier no longer results in a BadCastException. The string [ERRFMT] is
/// written to the result string instead.
///
/// If there are more format specifiers than values, the format specifiers without a corresponding value
/// are copied verbatim to output.
///
/// If there are more values than format specifiers, the superfluous values are ignored.
///
/// Usage Examples:
/// std::string s1 = format("The answer to life, the universe, and everything is %d", 42);
/// std::string s2 = format("second: %[1]d, first: %[0]d", 1, 2);
void Foundation_API format(std::string& result, const char *fmt, const std::vector<Any>& values);

View File

@ -20,12 +20,15 @@
#include "Poco/Foundation.h"
#include "Poco/SharedPtr.h"
#include "Poco/String.h"
#include "Poco/RWLock.h"
#include <map>
namespace Poco {
class TextEncodingManager;
class Foundation_API TextEncodingRegistry;
class Foundation_API TextEncoding
@ -170,10 +173,55 @@ public:
static const std::string GLOBAL;
/// Name of the global TextEncoding, which is the empty string.
static const TextEncodingRegistry& registry();
/// Returns the TextEncodingRegistry.
protected:
static TextEncodingManager& manager();
/// Returns the TextEncodingManager.
static TextEncodingRegistry* registry(int);
/// Returns the TextEncodingRegistry.
};
class Foundation_API TextEncodingRegistry
/// This class serves as the main registry for all
/// supported TextEncoding's.
{
public:
TextEncodingRegistry();
/// Constructs TextEncodingRegistry
~TextEncodingRegistry();
/// Destroys TextEncodingRegistry
bool has(const std::string& name) const;
// Returns true if requested encoding is found.
// it will eturn true for both canonical and
// alternative encoding name.
void add(TextEncoding::Ptr pEncoding);
/// Adds encoding to the registry under its canonnical name.
void add(TextEncoding::Ptr pEncoding, const std::string& name);
/// Adds encoding to the registry under the specified name.
void remove(const std::string& name);
/// Removes the specified encoding from the registry.
TextEncoding::Ptr find(const std::string& name) const;
/// Returns Ptr to the enconding registerd under the speciied
/// name or having the name as an alias.
///
/// If encoding is not found, the returned Ptr points to nothing.
private:
TextEncodingRegistry(const TextEncodingRegistry&);
TextEncodingRegistry& operator = (const TextEncodingRegistry&);
typedef std::map<std::string, TextEncoding::Ptr, CILess> EncodingMap;
EncodingMap _encodings;
mutable RWLock _lock;
};

View File

@ -21,6 +21,7 @@ namespace Poco {
const char* ASCIIEncoding::_names[] =
{
"US-ASCII",
"ASCII",
NULL
};

View File

@ -14,7 +14,6 @@
#include "Poco/TextEncoding.h"
#include "Poco/Exception.h"
#include "Poco/String.h"
#include "Poco/ASCIIEncoding.h"
#include "Poco/Latin1Encoding.h"
#include "Poco/Latin2Encoding.h"
@ -25,87 +24,90 @@
#include "Poco/Windows1250Encoding.h"
#include "Poco/Windows1251Encoding.h"
#include "Poco/Windows1252Encoding.h"
#include "Poco/RWLock.h"
#include "Poco/SingletonHolder.h"
#include <map>
namespace Poco {
//
// TextEncodingManager
// TextEncodingRegistry
//
class TextEncodingManager
TextEncodingRegistry::TextEncodingRegistry()
{
public:
TextEncodingManager()
{
TextEncoding::Ptr pUtf8Encoding(new UTF8Encoding);
add(pUtf8Encoding, TextEncoding::GLOBAL);
TextEncoding::Ptr pUtf8Encoding(new UTF8Encoding);
add(pUtf8Encoding, TextEncoding::GLOBAL);
add(new ASCIIEncoding);
add(new Latin1Encoding);
add(new Latin2Encoding);
add(new Latin9Encoding);
add(pUtf8Encoding);
add(new UTF16Encoding);
add(new UTF32Encoding);
add(new Windows1250Encoding);
add(new Windows1251Encoding);
add(new Windows1252Encoding);
add(new ASCIIEncoding);
add(new Latin1Encoding);
add(new Latin2Encoding);
add(new Latin9Encoding);
add(pUtf8Encoding);
add(new UTF16Encoding);
add(new UTF32Encoding);
add(new Windows1250Encoding);
add(new Windows1251Encoding);
add(new Windows1252Encoding);
}
TextEncodingRegistry::~TextEncodingRegistry()
{
}
bool TextEncodingRegistry::has(const std::string& name) const
{
if (_encodings.find(name) != _encodings.end())
return true;
for (const auto& enc : _encodings)
{
if (enc.second->isA(name)) return true;
}
return false;
}
~TextEncodingManager()
{
}
void add(TextEncoding::Ptr pEncoding)
{
add(pEncoding, pEncoding->canonicalName());
}
void TextEncodingRegistry::add(TextEncoding::Ptr pEncoding)
{
add(pEncoding, pEncoding->canonicalName());
}
void add(TextEncoding::Ptr pEncoding, const std::string& name)
{
RWLock::ScopedLock lock(_lock, true);
void TextEncodingRegistry::add(TextEncoding::Ptr pEncoding, const std::string& name)
{
RWLock::ScopedLock lock(_lock, true);
_encodings[name] = pEncoding;
}
void TextEncodingRegistry::remove(const std::string& name)
{
RWLock::ScopedLock lock(_lock, true);
_encodings.erase(name);
}
TextEncoding::Ptr TextEncodingRegistry::find(const std::string& name) const
{
RWLock::ScopedLock lock(_lock);
_encodings[name] = pEncoding;
}
void remove(const std::string& name)
{
RWLock::ScopedLock lock(_lock, true);
EncodingMap::const_iterator it = _encodings.find(name);
if (it != _encodings.end())
return it->second;
_encodings.erase(name);
}
TextEncoding::Ptr find(const std::string& name) const
for (it = _encodings.begin(); it != _encodings.end(); ++it)
{
RWLock::ScopedLock lock(_lock);
EncodingMap::const_iterator it = _encodings.find(name);
if (it != _encodings.end())
if (it->second->isA(name))
return it->second;
for (it = _encodings.begin(); it != _encodings.end(); ++it)
{
if (it->second->isA(name))
return it->second;
}
return TextEncoding::Ptr();
}
return TextEncoding::Ptr();
}
private:
TextEncodingManager(const TextEncodingManager&);
TextEncodingManager& operator = (const TextEncodingManager&);
typedef std::map<std::string, TextEncoding::Ptr, CILess> EncodingMap;
EncodingMap _encodings;
mutable RWLock _lock;
};
//
@ -147,7 +149,7 @@ int TextEncoding::sequenceLength(const unsigned char* bytes, int length) const
TextEncoding& TextEncoding::byName(const std::string& encodingName)
{
TextEncoding* pEncoding = manager().find(encodingName);
TextEncoding* pEncoding = registry(0)->find(encodingName);
if (pEncoding)
return *pEncoding;
else
@ -157,25 +159,25 @@ TextEncoding& TextEncoding::byName(const std::string& encodingName)
TextEncoding::Ptr TextEncoding::find(const std::string& encodingName)
{
return manager().find(encodingName);
return registry(0)->find(encodingName);
}
void TextEncoding::add(TextEncoding::Ptr pEncoding)
{
manager().add(pEncoding, pEncoding->canonicalName());
registry(0)->add(pEncoding, pEncoding->canonicalName());
}
void TextEncoding::add(TextEncoding::Ptr pEncoding, const std::string& name)
{
manager().add(pEncoding, name);
registry(0)->add(pEncoding, name);
}
void TextEncoding::remove(const std::string& encodingName)
{
manager().remove(encodingName);
registry(0)->remove(encodingName);
}
@ -195,13 +197,23 @@ TextEncoding& TextEncoding::global()
namespace
{
static SingletonHolder<TextEncodingManager> sh;
TextEncodingRegistry* getRegistry()
{
static SingletonHolder<TextEncodingRegistry> sh;
return sh.get();
}
}
TextEncodingManager& TextEncoding::manager()
const TextEncodingRegistry& TextEncoding::registry()
{
return *sh.get();
return *getRegistry();
}
TextEncodingRegistry* TextEncoding::registry(int)
{
return getRegistry();
}

View File

@ -36,6 +36,9 @@ TextEncodingTest::~TextEncodingTest()
void TextEncodingTest::testTextEncoding()
{
TextEncoding& ascii = TextEncoding::byName("ASCII");
assert(std::string("US-ASCII") == ascii.canonicalName());
TextEncoding& utf8 = TextEncoding::byName("utf8");
assert (std::string("UTF-8") == utf8.canonicalName());
@ -91,6 +94,44 @@ void TextEncodingTest::testTextEncoding()
}
void TextEncodingTest::testTextEncodingManager()
{
TextEncodingRegistry registry;
assert(registry.has("us-ascii"));
assert(registry.has("US-ASCII"));
assert(registry.has("ascii"));
assert(registry.has("ASCII"));
assert(registry.has("utf-8"));
assert(registry.has("UTF-8"));
assert(registry.has("utf-16"));
assert(registry.has("UTF-16"));
assert(registry.has("utf-32"));
assert(registry.has("UTF-32"));
assert(registry.has("iso-8859-1"));
assert(registry.has("ISO-8859-1"));
assert(registry.has("iso-8859-2"));
assert(registry.has("ISO-8859-2"));
assert(registry.has("iso-8859-15"));
assert(registry.has("ISO-8859-15"));
assert(registry.has("windows-1250"));
assert(registry.has("WINDOWS-1250"));
assert(registry.has("windows-1251"));
assert(registry.has("WINDOWS-1251"));
assert(registry.has("windows-1252"));
assert(registry.has("WINDOWS-1252"));
}
void TextEncodingTest::setUp()
{
}
@ -106,6 +147,7 @@ CppUnit::Test* TextEncodingTest::suite()
CppUnit::TestSuite* pSuite = new CppUnit::TestSuite("TextEncodingTest");
CppUnit_addTest(pSuite, TextEncodingTest, testTextEncoding);
CppUnit_addTest(pSuite, TextEncodingTest, testTextEncodingManager);
return pSuite;
}

View File

@ -25,6 +25,7 @@ public:
~TextEncodingTest();
void testTextEncoding();
void testTextEncodingManager();
void setUp();
void tearDown();

View File

@ -84,6 +84,12 @@ public:
/// the file system in order to avoid potential memory
/// exhaustion when attachment files are very large.
MailMessage(MailMessage&&);
/// Move constructor.
MailMessage& operator = (MailMessage&&);
/// Move assignment.
virtual ~MailMessage();
/// Destroys the MailMessage.
@ -231,9 +237,9 @@ public:
void write(std::ostream& ostr) const;
/// Writes the mail message to the given output stream.
static std::string encodeWord(const std::string& text, const std::string& charset = "UTF-8");
static std::string encodeWord(const std::string& text, const std::string& charset = "UTF-8", char encoding = 'q');
/// If the given string contains non-ASCII characters,
/// encodes the given string using RFC 2047 "Q" word encoding.
/// encodes the given string using RFC 2047 'Q' or 'B' word encoding.
///
/// The given text must already be encoded in the character set
/// given in charset (default is UTF-8).
@ -241,6 +247,15 @@ public:
/// Returns the encoded string, or the original string if it
/// consists only of ASCII characters.
static std::string decodeWord(const std::string& encodedWord, std::string toCharset = "");
/// Decodes a string containing encoded-word's according to the rules specified in
/// RFC 2047 and returns the decoded string. Both Q and B encodings are supported.
///
/// If toCharset is not provided, no decoded string conversion is performed (ie.
/// string is simply decoded to the charset specified in encodedWord string)
/// If toCharset is provided, returned string is converted to the specified
/// charset. For a list of supported encodings, see Poco:TextEncodingRegistry.
static const std::string HEADER_SUBJECT;
static const std::string HEADER_FROM;
static const std::string HEADER_TO;
@ -274,6 +289,12 @@ protected:
static const std::string& contentTransferEncodingToString(ContentTransferEncoding encoding);
static int lineLength(const std::string& str);
static void appendRecipient(const MailRecipient& recipient, std::string& str);
static std::string decodeWord(const std::string& charset, char encoding,
const std::string& text, const std::string& toCharset);
static void getEncWordLimits(const std::string& encodedWord,
std::string::size_type& pos1, std::string::size_type& pos2, bool isComment);
static void advanceToEncoded(const std::string& encoded, std::string& decoded,
std::string::size_type& pos1, bool& isComment);
private:
MailMessage(const MailMessage&);

View File

@ -43,9 +43,12 @@ public:
MailRecipient();
/// Creates an empty MailRecipient.
MailRecipient(const MailRecipient& recipient);
/// Creates a MailRecipient by copying another one.
MailRecipient(MailRecipient&& recipient);
/// Creates a MailRecipient by moving another one.
MailRecipient(RecipientType type, const std::string& address);
/// Creates a MailRecipient of the given type.
@ -55,10 +58,13 @@ public:
~MailRecipient();
/// Destroys the MailRecipient.
MailRecipient& operator = (const MailRecipient& recipient);
/// Assigns another recipient.
/// Assigns another recipient by copying it.
MailRecipient& operator = (MailRecipient&& recipient);
/// Assigns another recipient by moving it.
void swap(MailRecipient& recipient);
/// Exchanges the content of two recipients.

View File

@ -29,9 +29,14 @@
#include "Poco/DateTimeFormatter.h"
#include "Poco/DateTimeParser.h"
#include "Poco/String.h"
#include "Poco/Format.h"
#include "Poco/StringTokenizer.h"
#include "Poco/StreamCopier.h"
#include "Poco/NumberFormatter.h"
#include "Poco/TextEncoding.h"
#include "Poco/TextConverter.h"
#include "Poco/NumberParser.h"
#include "Poco/Ascii.h"
#include <sstream>
@ -42,6 +47,11 @@ using Poco::DateTimeFormat;
using Poco::DateTimeFormatter;
using Poco::DateTimeParser;
using Poco::StringTokenizer;
using Poco::TextEncoding;
using Poco::TextEncodingRegistry;
using Poco::TextConverter;
using Poco::NumberParser;
using Poco::Ascii;
using Poco::icompare;
@ -216,6 +226,38 @@ MailMessage::MailMessage(PartStoreFactory* pStoreFactory):
}
MailMessage::MailMessage(MailMessage&& other):
_recipients(std::move(other._recipients)),
_content(std::move(other._content)),
_encoding(other._encoding),
_boundary(std::move(other._boundary)),
_pStoreFactory(other._pStoreFactory)
{
other._recipients.clear();
other._content.clear();
other._boundary.clear();
other._pStoreFactory = 0;
}
MailMessage& MailMessage::operator = (MailMessage&& other)
{
if (&other != this)
{
_recipients = std::move(other._recipients);
other._recipients.clear();
_content = std::move(other._content);
other._content.clear();
_encoding = other._encoding;
_boundary = std::move(other._boundary);
other._boundary.clear();
_pStoreFactory = other._pStoreFactory;
other._pStoreFactory = 0;
}
return *this;
}
MailMessage::~MailMessage()
{
for (PartVec::iterator it = _parts.begin(); it != _parts.end(); ++it)
@ -620,78 +662,329 @@ void MailMessage::appendRecipient(const MailRecipient& recipient, std::string& s
}
std::string MailMessage::encodeWord(const std::string& text, const std::string& charset)
void encodeQ(std::string& encodedText, std::string::const_iterator it, std::string::size_type& lineLength)
{
bool containsNonASCII = false;
for (std::string::const_iterator it = text.begin(); it != text.end(); ++it)
switch (*it)
{
if (static_cast<unsigned char>(*it) > 127)
case ' ':
encodedText += '_';
lineLength++;
break;
case '=':
case '?':
case '_':
case '(':
case ')':
case '[':
case ']':
case '<':
case '>':
case ',':
case ';':
case ':':
case '.':
case '@':
encodedText += '=';
NumberFormatter::appendHex(encodedText, static_cast<unsigned>(static_cast<unsigned char>(*it)), 2);
lineLength += 3;
break;
default:
if (*it > 32 && *it < 127)
{
containsNonASCII = true;
break;
}
}
if (!containsNonASCII) return text;
std::string encodedText;
std::string::size_type lineLength = 0;
for (std::string::const_iterator it = text.begin(); it != text.end(); ++it)
{
if (lineLength == 0)
{
encodedText += "=?";
encodedText += charset;
encodedText += "?q?";
lineLength += charset.length() + 5;
}
switch (*it)
{
case ' ':
encodedText += '_';
encodedText += *it;
lineLength++;
break;
case '=':
case '?':
case '_':
case '(':
case ')':
case '[':
case ']':
case '<':
case '>':
case ',':
case ';':
case ':':
case '.':
case '@':
}
else
{
encodedText += '=';
NumberFormatter::appendHex(encodedText, static_cast<unsigned>(static_cast<unsigned char>(*it)), 2);
lineLength += 3;
break;
default:
if (*it > 32 && *it < 127)
}
}
}
void startEncoding(std::string& encodedText, const std::string& charset, char encoding)
{
encodedText += "=?";
encodedText += charset;
encodedText += '?';
encodedText += encoding;
encodedText += '?';
}
std::string MailMessage::encodeWord(const std::string& text, const std::string& charset, char encoding)
{
if (encoding == 'q' || encoding == 'Q')
{
bool containsNonASCII = false;
for (std::string::const_iterator it = text.begin(); it != text.end(); ++it)
{
if (static_cast<unsigned char>(*it) > 127)
{
encodedText += *it;
lineLength++;
containsNonASCII = true;
break;
}
}
if (!containsNonASCII) return text;
}
std::string encodedText;
std::string::size_type lineLength = 0;
if (encoding == 'q' || encoding == 'Q')
{
for (std::string::const_iterator it = text.begin(); it != text.end(); ++it)
{
if (lineLength == 0)
{
startEncoding(encodedText, charset, encoding);
lineLength += charset.length() + 5;
}
encodeQ(encodedText, it, lineLength);
if ((lineLength >= 64 &&
(*it == ' ' || *it == '\t' || *it == '\r' || *it == '\n')) ||
lineLength >= 72)
{
encodedText += "?=\r\n ";
lineLength = 0;
}
}
}
else if (encoding == 'b' || encoding == 'B')
{
// to ensure we're under 75 chars, 4 padding chars are always predicted
lineLength = 75 - (charset.length() + 5/*=??B?*/ + 2/*?=*/ + 4/*base64 padding*/);
std::string::size_type pos = 0;
size_t textLen = static_cast<size_t>(floor(lineLength * 3 / 4));
std::ostringstream ostr;
while (true)
{
Base64Encoder encoder(ostr);
encoder.rdbuf()->setLineLength(static_cast<int>(lineLength));
startEncoding(encodedText, charset, encoding);
std::string line = text.substr(pos, textLen);
encoder << line;
encoder.close();
encodedText.append(ostr.str());
encodedText.append("?=");
if (line.size() < textLen) break;
ostr.str("");
pos += textLen;
encodedText.append("\r\n");
}
lineLength = 0;;
}
else
{
throw InvalidArgumentException(Poco::format("MailMessage::encodeWord: "
"unknown encoding: %c", encoding));
}
if (lineLength > 0) encodedText += "?=";
return encodedText;
}
void MailMessage::advanceToEncoded(const std::string& encoded, std::string& decoded, std::string::size_type& pos1, bool& isComment)
{
bool spaceOnly = isComment; // flag to trim away spaces between encoded-word's
auto it = encoded.begin();
auto end = encoded.end();
for (; it != end; ++it)
{
if (*it == '=')
{
if (++it != end && *it == '?')
{
if (spaceOnly) trimRightInPlace(decoded);
return;
}
}
else if (*it == '(') isComment = true;
else if (*it == ')') isComment = false;
if ((isComment) && (!Ascii::isSpace(*it))) spaceOnly = false;
decoded.append(1, *it);
++pos1;
}
pos1 = std::string::npos;
}
std::string MailMessage::decodeWord(const std::string& encoded, std::string toCharset)
{
std::string encodedWord = replace(encoded, "?=\r\n=?", "?==?");
bool toCharsetGiven = !toCharset.empty();
std::string errMsg;
const std::size_t notFound = std::string::npos;
std::string decoded;
std::string::size_type pos1 = 0, pos2 = 0;
bool isComment = false;
advanceToEncoded(encodedWord, decoded, pos1, isComment);
if (pos1 != notFound)
{
getEncWordLimits(encodedWord, pos1, pos2, isComment);
while ((pos1 != notFound) && (pos2 != notFound) && pos2 > pos1 + 2)
{
pos1 += 2;
StringTokenizer st(encodedWord.substr(pos1, pos2 - pos1), "?");
if (st.count() == 3)
{
std::string charset = st[0];
if (!toCharsetGiven) toCharset = charset;
if (st[1].size() > 1)
{
throw InvalidArgumentException(Poco::format("MailMessage::decodeWord: "
"invalid encoding %s", st[1]));
}
char encoding = st[1][0];
std::string encodedText = st[2];
if (encodedText.find_first_of(" ?") != notFound)
{
throw InvalidArgumentException("MailMessage::decodeWord: "
"forbidden characters found in encoded-word");
}
else if (encoding == 'q' || encoding == 'Q')
{
// no incomplete encoded characters allowed on single line
std::string::size_type eqPos = encodedText.rfind('=');
if (eqPos != notFound)
{
if ((eqPos + 2) >= encodedText.size())
{
throw InvalidArgumentException("MailMessage::decodeWord: "
"incomplete encoded character found in encoded-word");
}
}
}
decoded.append(decodeWord(charset, encoding, encodedText, toCharset));
pos1 = pos2 + 2;
advanceToEncoded(encodedWord.substr(pos1), decoded, pos1, isComment);
if (pos1 != notFound) getEncWordLimits(encodedWord, pos1, pos2, isComment);
}
else
{
encodedText += '=';
NumberFormatter::appendHex(encodedText, static_cast<unsigned>(static_cast<unsigned char>(*it)), 2);
lineLength += 3;
throw InvalidArgumentException(Poco::format("MailMessage::decodeWord: "
"invalid number of entries in encoded-word (expected 3, found %z)", st.count()));
}
}
if ((lineLength >= 64 && (*it == ' ' || *it == '\t' || *it == '\r' || *it == '\n')) || lineLength >= 72)
{
encodedText += "?=\r\n ";
lineLength = 0;
}
}
if (lineLength > 0)
else decoded = std::move(encodedWord);
return decoded;
}
void MailMessage::getEncWordLimits(const std::string& encodedWord, std::string::size_type& pos1, std::string::size_type& pos2, bool isComment)
{
const std::size_t notFound = std::string::npos;
pos1 = encodedWord.find("=?", pos1); // beginning of encoded-word
if (pos1 != notFound)
{
encodedText += "?=";
}
return encodedText;
// must look sequentially for all '?' occurences because of a (valid) case like this:
// =?ISO-8859-1?q?=C4?=
// where end would be prematurely found if we search for ?= only
pos2 = encodedWord.find('?', pos1 + 2); // first '?'
if (pos2 == notFound) goto err;
pos2 = encodedWord.find('?', pos2 + 1); // second '?'
if (pos2 == notFound) goto err;
pos2 = encodedWord.find("?=", pos2 + 1); // end of encoded-word
if (pos2 == notFound) goto err;
// before we leave, double-check for the next encoded-word end, to make sure
// an illegal '?' was not sneaked in (eg. =?ISO-8859-1?q?=C4?=D6?=)
if (((encodedWord.find("?=", pos2 + 1) != notFound &&
encodedWord.find("=?", pos2 + 1) == notFound)) ||
((encodedWord.find("=?", pos2 + 1) != notFound &&
encodedWord.find("?=", pos2 + 1) == notFound))) goto err;
}
else goto err;
// if encoded word is in a comment, then '(' and ')' are forbidden inside it
if (isComment &&
(notFound != encodedWord.substr(pos1, pos2 - pos1).find_first_of("()"))) goto err;
return;
err:
throw InvalidArgumentException("MailMessage::encodedWordLimits: invalid encoded word");
}
std::string MailMessage::decodeWord(const std::string& charset, char encoding,
const std::string& text, const std::string& toCharset)
{
const TextEncodingRegistry& registry = TextEncoding::registry();
if (!registry.has(charset) || !registry.has(toCharset))
{
throw NotImplementedException(Poco::format("MailMessage::decodeWord: "
"charset not supported: %s", charset));
}
TextEncoding* fromEnc = registry.find(charset);
TextEncoding* toEnc = registry.find(toCharset);
std::string decoded;
switch (encoding)
{
case 'B': case 'b':
{
std::istringstream istr(text);
Base64Decoder decoder(istr);
int c = decoder.get();
while (c != -1) { decoded.append(1, char(c)); c = decoder.get(); }
break;
}
case 'Q': case 'q':
{
bool isWide = false;
std::vector<char> wideChar;
std::vector<unsigned char> wideCharSeq;
for (const auto& c : text)
{
if (!Ascii::isPrintable(c) || c == '?' || c == ' ')
{
throw InvalidArgumentException("MailMessage::decodeWord: encoded-word must not contain "
"non-printable characters, '? or SPACE");
}
if (c == '_') decoded.append(1, ' ');
else if (c == '=') isWide = true;
else if (isWide)
{
wideChar.push_back(c);
if (wideChar.size() % 2 == 0)
{
std::string wcStr(&wideChar[0], wideChar.size());
unsigned char chr = NumberParser::parseHex(wcStr);
wideCharSeq.push_back(chr);
if (fromEnc->sequenceLength(&wideCharSeq[0], static_cast<int>(wideCharSeq.size())) > 0)
{
auto it = wideCharSeq.begin();
auto end = wideCharSeq.end();
for (; it != end; ++it)
{
decoded.append(1, static_cast<char>(*it));
}
wideChar.clear();
wideCharSeq.clear();
isWide = false;
}
}
}
else decoded.append(1, c);
}
break;
}
default:
throw InvalidArgumentException(Poco::format("MailMessage::decodeWord: Unknown encoding: %c", encoding));
}
if (charset != toCharset)
{
TextConverter converter(*fromEnc, *toEnc);
std::string converted;
converter.convert(decoded, converted);
return std::move(converted);
}
return std::move(decoded);
}

View File

@ -25,7 +25,7 @@ MailRecipient::MailRecipient():
{
}
MailRecipient::MailRecipient(const MailRecipient& recipient):
_address(recipient._address),
_realName(recipient._realName),
@ -33,6 +33,16 @@ MailRecipient::MailRecipient(const MailRecipient& recipient):
{
}
MailRecipient::MailRecipient(MailRecipient&& recipient) :
_address(std::move(recipient._address)),
_realName(std::move(recipient._realName)),
_type(recipient._type)
{
recipient._address.clear();
recipient._realName.clear();
}
MailRecipient::MailRecipient(RecipientType type, const std::string& address):
_address(address),
@ -53,7 +63,7 @@ MailRecipient::~MailRecipient()
{
}
MailRecipient& MailRecipient::operator = (const MailRecipient& recipient)
{
if (this != &recipient)
@ -65,6 +75,20 @@ MailRecipient& MailRecipient::operator = (const MailRecipient& recipient)
}
MailRecipient& MailRecipient::operator = (MailRecipient&& recipient)
{
if (this != &recipient)
{
_address = std::move(recipient._address);
recipient._address.clear();
_realName = std::move(recipient._realName);
recipient._realName.clear();
_type = recipient._type;
}
return *this;
}
void MailRecipient::swap(MailRecipient& recipient)
{
std::swap(_type, recipient._type);

View File

@ -290,7 +290,7 @@ public:
{
Poco::Timestamp start;
#ifdef _WIN32
rc = WSAPoll(&_pollfds[0], _pollfds.size(), static_cast<INT>(timeout.totalMilliseconds()));
rc = WSAPoll(&_pollfds[0], static_cast<ULONG>(_pollfds.size()), static_cast<INT>(timeout.totalMilliseconds()));
#else
rc = ::poll(&_pollfds[0], _pollfds.size(), timeout.totalMilliseconds());
#endif

View File

@ -375,7 +375,7 @@ Poco::Timespan WebSocketImpl::getReceiveTimeout()
int WebSocketImpl::available()
{
int n = _buffer.size() - _bufferOffset;
int n = static_cast<int>(_buffer.size()) - _bufferOffset;
if (n > 0)
return n + _pStreamSocketImpl->available();
else

View File

@ -650,21 +650,166 @@ void MailMessageTest::testEncodeWord()
{
std::string plain("this is pure ASCII");
std::string encoded = MailMessage::encodeWord(plain, "ISO-8859-1");
assert (encoded == plain);
plain = "This text contains German Umlauts: \304\326";
assert(encoded == plain);
plain = "This text contains German Umlauts: \xC4\xD6";
encoded = MailMessage::encodeWord(plain, "ISO-8859-1");
assert (encoded == "=?ISO-8859-1?q?This_text_contains_German_Umlauts=3A_=C4=D6?=");
plain = "This text contains German Umlauts: \304\326. "
"It is also a very long text. Longer than 75 "
"characters. Long enough to become three lines "
"after being word-encoded.";
assert(encoded == "=?ISO-8859-1?q?This_text_contains_German_Umlauts=3A_=C4=D6?=");
plain = "This text contains German Umlauts: \xC4\xD6. "
"It is also a very long text. Longer than 75 "
"characters. Long enough to become three lines "
"after being word-encoded.";
encoded = MailMessage::encodeWord(plain, "ISO-8859-1");
assert (encoded == "=?ISO-8859-1?q?This_text_contains_German_Umlauts=3A_=C4=D6=2E_It_?=\r\n"
" =?ISO-8859-1?q?is_also_a_very_long_text=2E_Longer_than_75_characters=2E_?=\r\n"
" =?ISO-8859-1?q?Long_enough_to_become_three_lines_after_being_word-encode?=\r\n"
" =?ISO-8859-1?q?d=2E?=");
assert(encoded == "=?ISO-8859-1?q?This_text_contains_German_Umlauts=3A_=C4=D6=2E_It_?=\r\n"
" =?ISO-8859-1?q?is_also_a_very_long_text=2E_Longer_than_75_characters=2E_?=\r\n"
" =?ISO-8859-1?q?Long_enough_to_become_three_lines_after_being_word-encode?=\r\n"
" =?ISO-8859-1?q?d=2E?=");
plain = "If you can read this yo";
encoded = MailMessage::encodeWord(plain, "ISO-8859-1", 'B');
assert(encoded == "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=");
plain = "u understand the example.";
encoded = MailMessage::encodeWord(plain, "ISO-8859-2", 'B');
assert(encoded == "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=");
plain = "This text contains German Umlauts: \xC4\xD6. "
"It is also a very long text. Longer than 75 "
"characters. Long enough to become three lines "
"after being word-encoded.";
encoded = MailMessage::encodeWord(plain, "ISO-8859-1", 'B');
assert(encoded ==
"=?ISO-8859-1?B?VGhpcyB0ZXh0IGNvbnRhaW5zIEdlcm1hbiBVbWxhdXRzOiDE1i4gSQ==?=\r\n"
"=?ISO-8859-1?B?dCBpcyBhbHNvIGEgdmVyeSBsb25nIHRleHQuIExvbmdlciB0aGFuIA==?=\r\n"
"=?ISO-8859-1?B?NzUgY2hhcmFjdGVycy4gTG9uZyBlbm91Z2ggdG8gYmVjb21lIHRocg==?=\r\n"
"=?ISO-8859-1?B?ZWUgbGluZXMgYWZ0ZXIgYmVpbmcgd29yZC1lbmNvZGVkLg==?=");
std::string decoded = MailMessage::decodeWord(encoded);
assert (decoded == plain);
}
void MailMessageTest::testDecodeWord()
{
std::string encoded = "=?ISO-8859-1?q?=C4=D6?=";
std::string decoded = MailMessage::decodeWord(encoded);
assert(decoded == "\xC4\xD6");
encoded = "=?ISO-8859-1?q?=C4=D6?=abc=?ISO-8859-1?q?=C4=D6?=";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "\xC4\xD6" "abc" "\xC4\xD6");
decoded = MailMessage::decodeWord(encoded, "UTF-8");
assert(decoded == "\xC3\x84\xC3\x96" "abc" "\xC3\x84\xC3\x96");
try
{
MailMessage::decodeWord("=?ISO-8859-1?q?=C4 =D6?=");
fail("must fail");
}
catch (Poco::InvalidArgumentException&) {}
try
{
MailMessage::decodeWord("=?ISO-8859-1?q?=C4?=D6?=\r\n");
fail("must fail");
}
catch (Poco::InvalidArgumentException&) {}
encoded = "=?ISO-8859-1?q?=C4=D6_It_?=\r\n=?ISO-8859-1?q?is?=";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "\xC4\xD6 It is");
encoded = "=?ISO-8859-1?q?This_text_contains_German_Umlauts=3A_=C4=D6?=";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "This text contains German Umlauts: \xC4\xD6");
decoded = MailMessage::decodeWord(encoded, "UTF-8");
assert(decoded == "This text contains German Umlauts: \xC3\x84\xC3\x96");
encoded = "=?ISO-8859-1?q?This_text_contains_German_Umlauts=3A_=C4=D6=2E_It_?=\r\n"
"=?ISO-8859-1?q?is_also_a_very_long_text=2E_Longer_than_75_characters=2E_?=\r\n"
"=?ISO-8859-1?q?Long_enough_to_become_three_lines_after_being_word-encode?=\r\n"
"=?ISO-8859-1?q?d=2E?=";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "This text contains German Umlauts: \xC4\xD6. It "
"is also a very long text. Longer than 75 characters. "
"Long enough to become three lines after being word-encode"
"d.");
decoded = MailMessage::decodeWord(encoded, "UTF-8");
assert(decoded == "This text contains German Umlauts: \xC3\x84\xC3\x96. It "
"is also a very long text. Longer than 75 characters. "
"Long enough to become three lines after being word-encode"
"d.");
encoded = "=?ISO-8859-1?Q?=F8=E9?=";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "\xF8\xE9");
encoded = "From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>\r\n"
"To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>\r\n"
"CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>\r\n"
"Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n"
"=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "From: Keith Moore <moore@cs.utk.edu>\r\n"
"To: Keld J\xF8rn Simonsen <keld@dkuug.dk>\r\n"
"CC: Andr\xE9 Pirard <PIRARD@vm1.ulg.ac.be>\r\n"
"Subject: If you can read this you understand the example.");
encoded = "From: =?ISO-8859-1?Q?Olle_J=E4rnefors?= <ojarnef@admin.kth.se>\r\n"
"To: ietf-822@dimacs.rutgers.edu, ojarnef@admin.kth.se\r\n"
"Subject: Time for ISO 10646?";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "From: Olle J\xE4rnefors <ojarnef@admin.kth.se>\r\n"
"To: ietf-822@dimacs.rutgers.edu, ojarnef@admin.kth.se\r\n"
"Subject: Time for ISO 10646?");
encoded = "To: Dave Crocker <dcrocker@mordor.stanford.edu>\r\n"
"Cc: ietf-822@dimacs.rutgers.edu, paf@comsol.se\r\n"
"From: =?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?= <paf@nada.kth.se>\r\n"
"Subject: Re: RFC-HDR care and feeding";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "To: Dave Crocker <dcrocker@mordor.stanford.edu>\r\n"
"Cc: ietf-822@dimacs.rutgers.edu, paf@comsol.se\r\n"
"From: Patrik F\xE4ltstr\xF6m <paf@nada.kth.se>\r\n"
"Subject: Re: RFC-HDR care and feeding");
// encoded chars cannot be broken between lines
try
{
encoded = "=?ISO-8859-1?Q?=?=\r\n=?ISO-8859-1?Q?AB?=";
decoded = MailMessage::decodeWord(encoded);
fail("must fail");
}
catch (Poco::InvalidArgumentException&) {}
// comments and spaces therein
encoded = "(=?ISO-8859-1?Q?a?=)";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "(a)");
encoded = "(=?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?=)";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "(a b c)");
encoded = "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "(ab)");
encoded = "(=?ISO-8859-1?Q?a?=\r\n\t=?ISO-8859-1?Q?b?=)";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "(ab)");
encoded = "(=?ISO-8859-1?Q?a_b?=)";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "(a b)");
encoded = "(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)";
decoded = MailMessage::decodeWord(encoded);
assert(decoded == "(a b)");
}
@ -697,6 +842,7 @@ CppUnit::Test* MailMessageTest::suite()
CppUnit_addTest(pSuite, MailMessageTest, testReadWriteMultiPart);
CppUnit_addTest(pSuite, MailMessageTest, testReadWriteMultiPartStore);
CppUnit_addTest(pSuite, MailMessageTest, testEncodeWord);
CppUnit_addTest(pSuite, MailMessageTest, testDecodeWord);
return pSuite;
}

View File

@ -39,6 +39,7 @@ public:
void testReadMultiPartWithAttachmentNames();
void testReadMultiPartDefaultTransferEncoding();
void testEncodeWord();
void testDecodeWord();
void setUp();
void tearDown();