From 9e010930cfcc6d05f9dc4c1ed45d3ada87f411d6 Mon Sep 17 00:00:00 2001 From: kmribti Date: Sat, 16 Jan 2016 10:00:36 +0100 Subject: [PATCH] Add mime RFC2047 decoder to MessageHeader Add RFC2047 word decode to MessageHeader class --- Net/src/MessageHeader.cpp | 129 +++++++++++++++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 1 deletion(-) diff --git a/Net/src/MessageHeader.cpp b/Net/src/MessageHeader.cpp index 2a41d8e94..c3ad4877a 100644 --- a/Net/src/MessageHeader.cpp +++ b/Net/src/MessageHeader.cpp @@ -18,7 +18,12 @@ #include "Poco/Net/NetException.h" #include "Poco/String.h" #include "Poco/Ascii.h" +#include "Poco/TextConverter.h" +#include "Poco/StringTokenizer.h" +#include "Poco/Base64Decoder.h" +#include "Poco/UTF8Encoding.h" +#include namespace Poco { namespace Net { @@ -98,7 +103,7 @@ void MessageHeader::read(std::istream& istr) throw MessageException("Folded field value too long/no CRLF found"); } Poco::trimRightInPlace(value); - add(name, value); + add(name, decodeWord(value)); ++fields; } istr.putback(ch); @@ -253,5 +258,127 @@ void MessageHeader::quote(const std::string& value, std::string& result, bool al if (mustQuote) result += '"'; } +void MessageHeader::decodeRFC2047(const std::string& ins, std::string& outs, const std::string& charset_to) { + std::string tempout; + StringTokenizer tokens(ins, "?"); + + + std::string charset = toUpper(tokens[0]); + std::string encoding = toUpper(tokens[1]); + std::string text = tokens[2]; + + std::istringstream istr(text); + + if (encoding == "B") { + // Base64 encoding. + Base64Decoder decoder(istr); + for (char c; decoder.get(c); tempout += c) {} + } + else if (encoding == "Q") { + // Quoted encoding. + for (char c; istr.get(c);) { + if (c == '_') { + //RFC 2047 _ is a space. + tempout += " "; + continue; + } + + // FIXME: check that we have enought chars- + if (c == '=') { + // The next two chars are hex representation of the complete byte. + std::string hex; + for (int i = 0; i < 2; i++) { + istr.get(c); + hex += c; + } + hex = toUpper(hex); + tempout += (char)(int)strtol(hex.c_str(), 0, 16); + continue; + } + tempout += c; + } + } + else { + // Wrong encoding + outs = ins; + return; + } + + // convert to the right charset. + if (charset != charset_to) { + try { + TextEncoding& enc = TextEncoding::byName(charset); + TextEncoding& dec = TextEncoding::byName(charset_to); + TextConverter converter(enc, dec); + converter.convert(tempout, outs); + } + catch (...) { + // FIXME: Unsuported encoding... + outs = tempout; + } + } + else { + // Not conversion necesary. + outs = tempout; + } +} + + +std::string MessageHeader::decodeWord(const std::string& text, const std::string& charset) +{ + std::string outs, tmp = text; + do { + std::string tmp2; + // find the begining of the next rfc2047 chunk + auto pos = tmp.find("=?"); + if (pos == std::string::npos) { + // No more found, return + outs += tmp; + break; + } + + // check if there are standar text before the rfc2047 chunk, and if so, copy it. + if (pos > 0) { + outs += tmp.substr(0, pos - 1); + } + + // remove text already copied. + tmp = tmp.substr(pos + 2); + + // find the first separator + auto pos1 = tmp.find("?"); + if (pos1 == std::string::npos) { + // not found. + outs += tmp; + break; + } + + // find the second separator + auto pos2 = tmp.find("?", pos1 + 1); + if (pos2 == std::string::npos) { + // not found + outs += tmp; + break; + } + + // find the end of the actual rfc2047 chunk + auto pos3 = tmp.find("?=", pos2 + 1); + if (pos3 == std::string::npos) { + // not found. + outs += tmp; + break; + + } + // At this place, there are a valid rfc2047 chunk, so decode and copy the result. + decodeRFC2047(tmp.substr(0, pos3), tmp2, charset); + outs += tmp2; + + // Jump at the rest of the string and repeat the whole process. + tmp = tmp.substr(pos3 + 2); + } while (true); + + return outs; +} + } } // namespace Poco::Net