fix(Net::MessageHeader): Correct RFC 2047 decoding for two consecutive encoded words (#4542)

* fix bug #4535
RFC 2047 decodeWord

* modify logic
uses separate string which contains simbols between chunks and if this
string contains only space ot \n or \t or \v than trim it
This commit is contained in:
Alexander B 2024-04-29 11:54:23 +03:00 committed by GitHub
parent 6998c66539
commit 1a0355f1b7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 24 additions and 6 deletions

View File

@ -413,10 +413,10 @@ void MessageHeader::decodeRFC2047(const std::string& ins, std::string& outs, con
std::string MessageHeader::decodeWord(const std::string& text, const std::string& charset)
{
std::string outs, tmp = text;
size_t pos = tmp.find("=?");
do {
std::string tmp2;
// find the begining of the next rfc2047 chunk
size_t pos = tmp.find("=?");
if (pos == std::string::npos) {
// No more found, return
outs += tmp;
@ -453,14 +453,24 @@ std::string MessageHeader::decodeWord(const std::string& text, const std::string
// not found.
outs += tmp;
break;
}
// At this place, there are a valid rfc2047 chunk, so decode and copy the result.
decodeRFC2047(tmp.substr(0, pos3), tmp2, charset);
outs += tmp2;
// Jump at the rest of the string and repeat the whole process.
tmp = tmp.substr(pos3 + 2);
pos = tmp.find("=?");
if (pos != std::string::npos)
{
std::string betweenChunks = tmp.substr(0, pos);
if (betweenChunks.find_first_not_of(" \t\v\n") == std::string::npos)
{
tmp = tmp.substr(pos);
pos = 0;
}
}
} while (true);
return outs;

View File

@ -400,11 +400,19 @@ void MessageHeaderTest::testDecodeWord()
coded = "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)";
decoded = MessageHeader::decodeWord(coded, "ISO-8859-1");
assertTrue (decoded == "(a b)");
assertTrue (decoded == "(ab)");
coded = "(=?ISO-8859-1?Q?a?= <0> =?ISO-8859-1?Q?b?=)";
decoded = MessageHeader::decodeWord(coded, "ISO-8859-1");
assertTrue (decoded == "(a <0> b)");
coded = "Hello =?UTF-8?B?RnJhbmNpcw==?=, good bye";
decoded = MessageHeader::decodeWord(coded, "ISO-8859-1");
assertTrue (decoded == "Hello Francis, good bye");
coded = "application/pdf; name=\"=?utf-8?Q?RUG_Regler-_und_Ger=C3=A4tebau_Gm?= =?utf-8?Q?bH_Angebot_Erneuerung_=C3=9CE.pdf?=\"";
decoded = MessageHeader::decodeWord(coded, "UTF-8");
assertTrue (decoded == "application/pdf; name=\"RUG Regler- und Gerätebau GmbH Angebot Erneuerung ÜE.pdf\"");
}
// Sample HTTP reuest header
@ -430,10 +438,10 @@ void MessageHeaderTest::testAutoDecode()
MessageHeader mh;
mh.read(istr);
assertEquals(mh.get("X-Encoded-Header-A"), "(a b)");
assertEquals(mh.get("X-Encoded-Header-A"), "(ab)");
assertEquals(mh.get("X-Encoded-Header-B"), "Hello Francis, good bye");
assertEquals(mh.getDecoded("X-Encoded-Header-A"), "(a b)");
assertEquals(mh.getDecoded("X-Encoded-Header-A"), "(ab)");
assertEquals(mh.getDecoded("X-Encoded-Header-B"), "Hello Francis, good bye");
}
{
@ -445,7 +453,7 @@ void MessageHeaderTest::testAutoDecode()
assertEquals(mh.get("X-Encoded-Header-A"), "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)");
assertEquals(mh.get("X-Encoded-Header-B"), "Hello =?UTF-8?B?RnJhbmNpcw==?=, good bye");
assertEquals(mh.getDecoded("X-Encoded-Header-A"), "(a b)");
assertEquals(mh.getDecoded("X-Encoded-Header-A"), "(ab)");
assertEquals(mh.getDecoded("X-Encoded-Header-B"), "Hello Francis, good bye");
}
}