From 439b2b3e90cb78a81e8d42ffdcd8543c64b1d1de Mon Sep 17 00:00:00 2001 From: Vincent Richard Date: Mon, 28 Apr 2008 19:49:48 +0000 Subject: [PATCH] Fixed extra space in subject (see https://sourceforge.net/forum/message.php?msg_id=4894970). --- src/text.cpp | 103 +++++++++++++++----------- src/word.cpp | 42 ++++++++++- tests/parser/attachmentHelperTest.cpp | 2 + tests/parser/htmlTextPartTest.cpp | 12 +++ tests/parser/textTest.cpp | 52 ++++++++++--- vmime/word.hpp | 3 +- 6 files changed, 158 insertions(+), 56 deletions(-) diff --git a/src/text.cpp b/src/text.cpp index 67aed20f..13a682ef 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -269,63 +269,78 @@ void text::createFromString(const string& in, const charset& ch) removeAllWords(); - for (string::size_type end = in.size(), pos = 0, start = 0 ; ; ) + const string::size_type asciiCount = + utility::stringUtils::countASCIIchars(in.begin(), in.end()); + + const string::size_type asciiPercent = + (in.length() == 0 ? 100 : (100 * asciiCount) / in.length()); + + // If there are "too much" non-ASCII chars, encode everything + if (asciiPercent < 60) // less than 60% ASCII chars { - if (pos == end || parserHelpers::isSpace(in[pos])) + appendWord(vmime::create (in, ch)); + } + // Else, only encode words which need it + else + { + for (string::size_type end = in.size(), pos = 0, start = 0 ; ; ) { - if (pos != end) - ++pos; - - const string chunk(in.begin() + start, in.begin() + pos); - - if (is8bit) + if (pos == end || parserHelpers::isSpace(in[pos])) { - if (count && prevIs8bit) + const string chunk(in.begin() + start, in.begin() + pos); + + if (pos != end) + ++pos; + + if (is8bit) { - // No need to create a new encoded word, just append - // the current word to the previous one. - ref w = getWordAt(getWordCount() - 1); - w->getBuffer() += chunk; + if (count && prevIs8bit) + { + // No need to create a new encoded word, just append + // the current word to the previous one. + ref w = getWordAt(getWordCount() - 1); + w->getBuffer() += " " + chunk; + } + else + { + appendWord(vmime::create (chunk, ch)); + + prevIs8bit = true; + ++count; + } } else { - appendWord(vmime::create (chunk, ch)); + if (count && !prevIs8bit) + { + ref w = getWordAt(getWordCount() - 1); + w->getBuffer() += " " + chunk; + } + else + { + appendWord(vmime::create + (chunk, charset(charsets::US_ASCII))); - prevIs8bit = true; - ++count; + prevIs8bit = false; + ++count; + } } + + if (pos == end) + break; + + is8bit = false; + start = pos; + } + else if (!parserHelpers::isAscii(in[pos])) + { + is8bit = true; + ++pos; } else { - if (count && !prevIs8bit) - { - ref w = getWordAt(getWordCount() - 1); - w->getBuffer() += chunk; - } - else - { - appendWord(vmime::create - (chunk, charset(charsets::US_ASCII))); - - prevIs8bit = false; - ++count; - } + ++pos; } - - if (pos == end) - break; - - is8bit = false; - start = pos; - } - else if (!parserHelpers::isAscii(in[pos])) - { - is8bit = true; - ++pos; - } - else - { - ++pos; } } } diff --git a/src/word.cpp b/src/word.cpp index 98ad208a..3a0605ff 100644 --- a/src/word.cpp +++ b/src/word.cpp @@ -73,8 +73,13 @@ ref word::parseNext(const string& buffer, const string::size_type positio // - before the first word // - between two encoded words // - after the last word + string whiteSpaces; + while (pos < end && parserHelpers::isSpace(buffer[pos])) + { + whiteSpaces += buffer[pos]; ++pos; + } string::size_type startPos = pos; string unencoded; @@ -88,7 +93,10 @@ ref word::parseNext(const string& buffer, const string::size_type positio string::size_type endPos = pos; if (pos > position && buffer[pos - 1] == '\r') + { + ++pos; --endPos; + } while (pos != end && parserHelpers::isSpace(buffer[pos])) ++pos; @@ -97,6 +105,7 @@ ref word::parseNext(const string& buffer, const string::size_type positio unencoded += ' '; startPos = pos; + continue; } // Start of an encoded word else if (pos + 8 < end && // 8 = "=?(.+)?(.+)?(.*)?=" @@ -107,6 +116,9 @@ ref word::parseNext(const string& buffer, const string::size_type positio if (!unencoded.empty()) { + if (prevIsEncoded) + unencoded = whiteSpaces + unencoded; + ref w = vmime::create (unencoded, charset(charsets::US_ASCII)); w->setParsedBounds(position, pos); @@ -183,7 +195,7 @@ ref word::parseNext(const string& buffer, const string::size_type positio if (end != startPos) { if (startPos != pos && !isFirst && prevIsEncoded) - unencoded += ' '; + unencoded += whiteSpaces; unencoded += buffer.substr(startPos, end - startPos); @@ -388,11 +400,15 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe { os << CRLF; curLineLength = 0; + + state->lastCharIsSpace = true; } else { os << NEW_LINE_SEQUENCE; curLineLength = NEW_LINE_SEQUENCE_LENGTH; + + state->lastCharIsSpace = true; } p = curLineStart; @@ -401,8 +417,16 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe } else { + if (!state->isFirstWord && state->prevWordIsEncoded && !state->lastCharIsSpace && !parserHelpers::isSpace(*curLineStart)) + os << " "; // Separate from previous word + os << string(curLineStart, p); + if (parserHelpers::isSpace(*(p - 1))) + state->lastCharIsSpace = true; + else + state->lastCharIsSpace = false; + if (p == end) { finished = true; @@ -439,15 +463,24 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe os << string(curLineStart, lastWSpos); + if (lastWSpos > curLineStart && std::isspace(*(lastWSpos - 1))) + state->lastCharIsSpace = true; + else + state->lastCharIsSpace = false; + if (flags & text::NO_NEW_LINE_SEQUENCE) { os << CRLF; curLineLength = 0; + + state->lastCharIsSpace = true; } else { os << NEW_LINE_SEQUENCE; curLineLength = NEW_LINE_SEQUENCE_LENGTH; + + state->lastCharIsSpace = true; } curLineStart = lastWSpos + 1; @@ -523,13 +556,17 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe { os << NEW_LINE_SEQUENCE; curLineLength = NEW_LINE_SEQUENCE_LENGTH; + + state->lastCharIsSpace = true; } // Encode and fold input buffer - if (!startNewLine && !state->isFirstWord && state->prevWordIsEncoded) + if (!startNewLine && !state->isFirstWord && !state->lastCharIsSpace) { os << " "; // Separate from previous word ++curLineLength; + + state->lastCharIsSpace = true; } for (unsigned int i = 0 ; ; ++i) @@ -561,6 +598,7 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe os << wordEnd; state->prevWordIsEncoded = true; + state->lastCharIsSpace = false; } } diff --git a/tests/parser/attachmentHelperTest.cpp b/tests/parser/attachmentHelperTest.cpp index 73a1286e..d1ae10af 100644 --- a/tests/parser/attachmentHelperTest.cpp +++ b/tests/parser/attachmentHelperTest.cpp @@ -307,6 +307,8 @@ VMIME_TEST_SUITE_BEGIN vmime::ref msg = vmime::create (); msg->parse(data); + VASSERT_EQ("0", 2, msg->getBody()->getPartCount()); + vmime::ref att = vmime::attachmentHelper:: getBodyPartAttachment(msg->getBody()->getPartAt(0)); diff --git a/tests/parser/htmlTextPartTest.cpp b/tests/parser/htmlTextPartTest.cpp index 3f9a718e..6276db26 100644 --- a/tests/parser/htmlTextPartTest.cpp +++ b/tests/parser/htmlTextPartTest.cpp @@ -81,6 +81,10 @@ VMIME_TEST_SUITE_BEGIN vmime::ref msg = vmime::create (); msg->parse(msgString); + // Sanity checks + VASSERT_EQ("part-count1", 2, msg->getBody()->getPartCount()); + VASSERT_EQ("part-count2", 2, msg->getBody()->getPartAt(1)->getBody()->getPartCount()); + vmime::htmlTextPart htmlPart; htmlPart.parse(msg, msg->getBody()->getPartAt(1), msg->getBody()->getPartAt(1)->getBody()->getPartAt(0)); @@ -132,6 +136,10 @@ VMIME_TEST_SUITE_BEGIN vmime::ref msg = vmime::create (); msg->parse(msgString); + // Sanity checks + VASSERT_EQ("part-count1", 2, msg->getBody()->getPartCount()); + VASSERT_EQ("part-count2", 3, msg->getBody()->getPartAt(1)->getBody()->getPartCount()); + vmime::htmlTextPart htmlPart; htmlPart.parse(msg, msg->getBody()->getPartAt(1), msg->getBody()->getPartAt(1)->getBody()->getPartAt(1)); @@ -198,6 +206,10 @@ VMIME_TEST_SUITE_BEGIN vmime::ref msg = vmime::create (); msg->parse(msgString); + // Sanity checks + VASSERT_EQ("part-count1", 2, msg->getBody()->getPartCount()); + VASSERT_EQ("part-count2", 2, msg->getBody()->getPartAt(1)->getBody()->getPartCount()); + vmime::htmlTextPart htmlPart; htmlPart.parse(msg, msg->getBody()->getPartAt(1), msg->getBody()->getPartAt(1)->getBody()->getPartAt(1)); diff --git a/tests/parser/textTest.cpp b/tests/parser/textTest.cpp index 706452fb..152dbc46 100644 --- a/tests/parser/textTest.cpp +++ b/tests/parser/textTest.cpp @@ -42,18 +42,14 @@ VMIME_TEST_SUITE_BEGIN VMIME_TEST(testWordParse) VMIME_TEST(testWordGenerate) VMIME_TEST(testWordGenerateSpace) + VMIME_TEST(testWordGenerateSpace2) VMIME_TEST(testWordGenerateMultiBytes) VMIME_TEST_LIST_END static const vmime::string getDisplayText(const vmime::text& t) { - vmime::string res; - - for (int i = 0 ; i < t.getWordCount() ; ++i) - res += t.getWordAt(i)->getBuffer(); - - return res; + return t.getWholeBuffer(); } static const vmime::string cleanGeneratedWords(const std::string& str) @@ -142,11 +138,11 @@ VMIME_TEST_SUITE_BEGIN t2.createFromString(s2, c2); VASSERT_EQ("2.1", 3, t2.getWordCount()); - VASSERT_EQ("2.2", s2_1, t2.getWordAt(0)->getBuffer()); + VASSERT_EQ("2.2", "some ASCII characters and special chars:", t2.getWordAt(0)->getBuffer()); VASSERT_EQ("2.3", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(0)->getCharset()); - VASSERT_EQ("2.4", s2_2, t2.getWordAt(1)->getBuffer()); + VASSERT_EQ("2.4", "\xf1\xf2\xf3\xf4", t2.getWordAt(1)->getBuffer()); VASSERT_EQ("2.5", c2, t2.getWordAt(1)->getCharset()); - VASSERT_EQ("2.6", s2_3, t2.getWordAt(2)->getBuffer()); + VASSERT_EQ("2.6", "and then more ASCII chars.", t2.getWordAt(2)->getBuffer()); VASSERT_EQ("2.7", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(2)->getCharset()); } @@ -215,6 +211,15 @@ VMIME_TEST_SUITE_BEGIN VASSERT_EQ("9", "a b ", DISPLAY_FORM(" \t =?ISO-8859-1?Q?a?= b ")); VASSERT_EQ("10", "a b", DISPLAY_FORM(" a\r\n\t b")); + VASSERT_EQ("11", "a b c", DISPLAY_FORM("a =?ISO-8859-1?Q?b?= c")); + VASSERT_EQ("12", "a b c ", DISPLAY_FORM("a =?ISO-8859-1?Q?b?= c ")); + VASSERT_EQ("13", "a b c ", DISPLAY_FORM(" a =?ISO-8859-1?Q?b?= c ")); + VASSERT_EQ("14", "a b c d", DISPLAY_FORM("a =?ISO-8859-1?Q?b?= c =?ISO-8859-1?Q?d?= ")); + VASSERT_EQ("15", "a b c d e", DISPLAY_FORM("a =?ISO-8859-1?Q?b?= c =?ISO-8859-1?Q?d?= e")); + + // Whitespaces and multiline + VASSERT_EQ("16", "a b c d e", DISPLAY_FORM("=?ISO-8859-1?Q?a_b_?=c\n\t=?ISO-8859-1?Q?d_?=e")); + #undef DISPLAY_FORM } @@ -293,6 +298,35 @@ VMIME_TEST_SUITE_BEGIN txt2.parse(encoded, 0, encoded.length()); VASSERT_EQ("3", decoded, txt2.getWholeBuffer()); + + // -- test rencoding + VASSERT_EQ("4", encoded, txt2.generate()); + } + + void testWordGenerateSpace2() + { + // White-space between two encoded words (#2) + vmime::text txt; + txt.appendWord(vmime::create ("Facture ", "utf-8")); + txt.appendWord(vmime::create ("\xc3\xa0", "utf-8")); + txt.appendWord(vmime::create (" envoyer ", "utf-8")); + txt.appendWord(vmime::create ("\xc3\xa0", "utf-8")); + txt.appendWord(vmime::create (" Martine", "utf-8")); + + const vmime::string decoded = "Facture ""\xc3\xa0"" envoyer ""\xc3\xa0"" Martine"; + const vmime::string encoded = "Facture =?utf-8?B?w6A=?= envoyer =?utf-8?B?w6A=?= Martine"; + + // -- test encoding + VASSERT_EQ("1", encoded, txt.generate()); + + // -- ensure no space is added when decoding + vmime::text txt2; + txt2.parse(encoded, 0, encoded.length()); + + VASSERT_EQ("2", decoded, txt2.getWholeBuffer()); + + // -- test rencoding + VASSERT_EQ("3", encoded, txt2.generate()); } void testWordGenerateMultiBytes() diff --git a/vmime/word.hpp b/vmime/word.hpp index 9efc83cc..800a78d5 100644 --- a/vmime/word.hpp +++ b/vmime/word.hpp @@ -115,12 +115,13 @@ public: public: generatorState() - : isFirstWord(true), prevWordIsEncoded(false) + : isFirstWord(true), prevWordIsEncoded(false), lastCharIsSpace(false) { } bool isFirstWord; bool prevWordIsEncoded; + bool lastCharIsSpace; }; #endif