diff --git a/src/vmime/word.cpp b/src/vmime/word.cpp index 1a70a785..27b741d4 100644 --- a/src/vmime/word.cpp +++ b/src/vmime/word.cpp @@ -74,8 +74,7 @@ word::word(const string& buffer, const charset& charset, const string& lang) shared_ptr word::parseNext (const parsingContext& ctx, const string& buffer, const size_t position, - const size_t end, size_t* newPosition, - bool prevIsEncoded, bool* isEncoded, bool isFirst) + const size_t end, size_t* newPosition, parserState* state) { size_t pos = position; @@ -131,7 +130,7 @@ shared_ptr word::parseNext if (!unencoded.empty()) { - if (prevIsEncoded && !isFirst) + if (state->prevIsEncoded && !state->isFirst) unencoded = whiteSpaces + unencoded; shared_ptr w = make_shared (unencoded, defaultCharset); @@ -140,8 +139,8 @@ shared_ptr word::parseNext if (newPosition) *newPosition = pos; - if (isEncoded) - *isEncoded = false; + state->prevIsEncoded = false; + state->isFirst = false; return (w); } @@ -192,13 +191,13 @@ shared_ptr word::parseNext pos += 2; // ?= shared_ptr w = make_shared (); - w->parse(ctx, buffer, wordStart, pos, NULL); + w->parseWithState(ctx, buffer, wordStart, pos, NULL, state); if (newPosition) *newPosition = pos; - if (isEncoded) - *isEncoded = true; + state->prevIsEncoded = true; + state->isFirst = false; return (w); } @@ -208,7 +207,7 @@ shared_ptr word::parseNext if (startPos != end) { - if (prevIsEncoded && !isFirst) + if (state->prevIsEncoded && !state->isFirst) unencoded = whiteSpaces + unencoded; unencoded += buffer.substr(startPos, end - startPos); @@ -223,8 +222,8 @@ shared_ptr word::parseNext if (newPosition) *newPosition = end; - if (isEncoded) - *isEncoded = false; + state->prevIsEncoded = false; + state->isFirst = false; return (w); } @@ -242,9 +241,9 @@ const std::vector > word::parseMultiple size_t pos = position; - bool prevIsEncoded = false; + parserState state; - while ((w = word::parseNext(ctx, buffer, pos, end, &pos, prevIsEncoded, &prevIsEncoded, (w == NULL))) != NULL) + while ((w = word::parseNext(ctx, buffer, pos, end, &pos, &state)) != NULL) res.push_back(w); if (newPosition) @@ -257,6 +256,14 @@ const std::vector > word::parseMultiple void word::parseImpl (const parsingContext& ctx, const string& buffer, const size_t position, const size_t end, size_t* newPosition) +{ + parseWithState(ctx, buffer, position, end, newPosition, NULL); +} + + +void word::parseWithState + (const parsingContext& ctx, const string& buffer, const size_t position, + const size_t end, size_t* newPosition, parserState* state) { if (position + 6 < end && // 6 = "=?(.+)?(.*)?=" buffer[position] == '=' && buffer[position + 1] == '?') @@ -319,12 +326,19 @@ void word::parseImpl } // Decode text + string encodedBuffer(dataPos, dataEnd); string decodedBuffer; - utility::inputStreamStringAdapter ein(string(dataPos, dataEnd)); + if (state && !state->undecodedBytes.empty()) + { + encodedBuffer = state->undecodedBytes + encodedBuffer; + state->undecodedBytes.clear(); + } + + utility::inputStreamStringAdapter ein(encodedBuffer); utility::outputStreamStringAdapter eout(decodedBuffer); - theEncoder->decode(ein, eout); + const size_t decodedLen = theEncoder->decode(ein, eout); m_buffer = decodedBuffer; @@ -333,6 +347,21 @@ void word::parseImpl if (newPosition) *newPosition = (p - buffer.begin()); + // For Base64 encoding, ensure all bytes have been decoded. + // If there are remaining bytes, keep them for the next run. + // + // This allows decoding some insanities like: + // =?utf-8?B?5Lit5?= =?utf-8?B?paH?= + if (*encPos == 'B' || *encPos == 'b') + { + const size_t actualEncodedLen = encodedBuffer.length(); + const size_t theoricalEncodedLen = + ((decodedLen + ((decodedLen % 3) ? (3 - (decodedLen % 3)) : 0) ) / 3) * 4; + + if (state && actualEncodedLen != theoricalEncodedLen) + state->undecodedBytes.assign(dataPos + theoricalEncodedLen, dataEnd); + } + return; } } diff --git a/src/vmime/word.hpp b/src/vmime/word.hpp index 0e60225e..2c6ecb57 100644 --- a/src/vmime/word.hpp +++ b/src/vmime/word.hpp @@ -151,6 +151,20 @@ public: bool prevWordIsEncoded; bool lastCharIsSpace; }; + + class parserState + { + public: + + parserState() + : prevIsEncoded(false), isFirst(true) + { + } + + bool prevIsEncoded; + bool isFirst; + std::string undecodedBytes; + }; #endif @@ -169,6 +183,14 @@ protected: const size_t curLinePos = 0, size_t* newLinePos = NULL) const; + void parseWithState + (const parsingContext& ctx, + const string& buffer, + const size_t position, + const size_t end, + size_t* newPosition, + parserState* state); + public: using component::generate; @@ -193,9 +215,7 @@ private: const size_t position, const size_t end, size_t* newPosition, - bool prevIsEncoded, - bool* isEncoded, - bool isFirst); + parserState* state); static const std::vector > parseMultiple (const parsingContext& ctx, diff --git a/tests/parser/textTest.cpp b/tests/parser/textTest.cpp index 58d5bff5..588dc194 100644 --- a/tests/parser/textTest.cpp +++ b/tests/parser/textTest.cpp @@ -59,6 +59,8 @@ VMIME_TEST_SUITE_BEGIN(textTest) VMIME_TEST(testInternationalizedEmail_UTF8) VMIME_TEST(testInternationalizedEmail_nonUTF8) VMIME_TEST(testInternationalizedEmail_folding) + + VMIME_TEST(testWronglyPaddedB64Words) VMIME_TEST_LIST_END @@ -595,5 +597,25 @@ VMIME_TEST_SUITE_BEGIN(textTest) " encoded text", w2.generate(20)); } + void testWronglyPaddedB64Words() + { + vmime::text outText; + + vmime::text::decodeAndUnfold("=?utf-8?B?5Lit5?=\n =?utf-8?B?paH?=", &outText); + + VASSERT_EQ("1", "\xe4\xb8\xad\xe6\x96\x87", + outText.getConvertedText(vmime::charset("utf-8"))); + + vmime::text::decodeAndUnfold("=?utf-8?B?5Lit5p?=\n =?utf-8?B?aH?=", &outText); + + VASSERT_EQ("2", "\xe4\xb8\xad\xe6\x96\x87", + outText.getConvertedText(vmime::charset("utf-8"))); + + vmime::text::decodeAndUnfold("=?utf-8?B?5Lit5pa?=\n =?utf-8?B?H?=", &outText); + + VASSERT_EQ("3", "\xe4\xb8\xad\xe6\x96\x87", + outText.getConvertedText(vmime::charset("utf-8"))); + } + VMIME_TEST_SUITE_END