aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/text.cpp103
-rw-r--r--src/word.cpp42
-rw-r--r--tests/parser/attachmentHelperTest.cpp2
-rw-r--r--tests/parser/htmlTextPartTest.cpp12
-rw-r--r--tests/parser/textTest.cpp52
-rw-r--r--vmime/word.hpp3
6 files changed, 158 insertions, 56 deletions
diff --git a/src/text.cpp b/src/text.cpp
index 67aed20f..13a682ef 100644
--- a/src/text.cpp
+++ b/src/text.cpp
@@ -269,63 +269,78 @@ void text::createFromString(const string& in, const charset& ch)
removeAllWords();
- for (string::size_type end = in.size(), pos = 0, start = 0 ; ; )
+ const string::size_type asciiCount =
+ utility::stringUtils::countASCIIchars(in.begin(), in.end());
+
+ const string::size_type asciiPercent =
+ (in.length() == 0 ? 100 : (100 * asciiCount) / in.length());
+
+ // If there are "too much" non-ASCII chars, encode everything
+ if (asciiPercent < 60) // less than 60% ASCII chars
+ {
+ appendWord(vmime::create <word>(in, ch));
+ }
+ // Else, only encode words which need it
+ else
{
- if (pos == end || parserHelpers::isSpace(in[pos]))
+ for (string::size_type end = in.size(), pos = 0, start = 0 ; ; )
{
- if (pos != end)
- ++pos;
+ if (pos == end || parserHelpers::isSpace(in[pos]))
+ {
+ const string chunk(in.begin() + start, in.begin() + pos);
- const string chunk(in.begin() + start, in.begin() + pos);
+ if (pos != end)
+ ++pos;
- if (is8bit)
- {
- if (count && prevIs8bit)
+ if (is8bit)
{
- // No need to create a new encoded word, just append
- // the current word to the previous one.
- ref <word> w = getWordAt(getWordCount() - 1);
- w->getBuffer() += chunk;
+ if (count && prevIs8bit)
+ {
+ // No need to create a new encoded word, just append
+ // the current word to the previous one.
+ ref <word> w = getWordAt(getWordCount() - 1);
+ w->getBuffer() += " " + chunk;
+ }
+ else
+ {
+ appendWord(vmime::create <word>(chunk, ch));
+
+ prevIs8bit = true;
+ ++count;
+ }
}
else
{
- appendWord(vmime::create <word>(chunk, ch));
-
- prevIs8bit = true;
- ++count;
+ if (count && !prevIs8bit)
+ {
+ ref <word> w = getWordAt(getWordCount() - 1);
+ w->getBuffer() += " " + chunk;
+ }
+ else
+ {
+ appendWord(vmime::create <word>
+ (chunk, charset(charsets::US_ASCII)));
+
+ prevIs8bit = false;
+ ++count;
+ }
}
+
+ if (pos == end)
+ break;
+
+ is8bit = false;
+ start = pos;
+ }
+ else if (!parserHelpers::isAscii(in[pos]))
+ {
+ is8bit = true;
+ ++pos;
}
else
{
- if (count && !prevIs8bit)
- {
- ref <word> w = getWordAt(getWordCount() - 1);
- w->getBuffer() += chunk;
- }
- else
- {
- appendWord(vmime::create <word>
- (chunk, charset(charsets::US_ASCII)));
-
- prevIs8bit = false;
- ++count;
- }
+ ++pos;
}
-
- if (pos == end)
- break;
-
- is8bit = false;
- start = pos;
- }
- else if (!parserHelpers::isAscii(in[pos]))
- {
- is8bit = true;
- ++pos;
- }
- else
- {
- ++pos;
}
}
}
diff --git a/src/word.cpp b/src/word.cpp
index 98ad208a..3a0605ff 100644
--- a/src/word.cpp
+++ b/src/word.cpp
@@ -73,8 +73,13 @@ ref <word> word::parseNext(const string& buffer, const string::size_type positio
// - before the first word
// - between two encoded words
// - after the last word
+ string whiteSpaces;
+
while (pos < end && parserHelpers::isSpace(buffer[pos]))
+ {
+ whiteSpaces += buffer[pos];
++pos;
+ }
string::size_type startPos = pos;
string unencoded;
@@ -88,7 +93,10 @@ ref <word> word::parseNext(const string& buffer, const string::size_type positio
string::size_type endPos = pos;
if (pos > position && buffer[pos - 1] == '\r')
+ {
+ ++pos;
--endPos;
+ }
while (pos != end && parserHelpers::isSpace(buffer[pos]))
++pos;
@@ -97,6 +105,7 @@ ref <word> word::parseNext(const string& buffer, const string::size_type positio
unencoded += ' ';
startPos = pos;
+ continue;
}
// Start of an encoded word
else if (pos + 8 < end && // 8 = "=?(.+)?(.+)?(.*)?="
@@ -107,6 +116,9 @@ ref <word> word::parseNext(const string& buffer, const string::size_type positio
if (!unencoded.empty())
{
+ if (prevIsEncoded)
+ unencoded = whiteSpaces + unencoded;
+
ref <word> w = vmime::create <word>(unencoded, charset(charsets::US_ASCII));
w->setParsedBounds(position, pos);
@@ -183,7 +195,7 @@ ref <word> word::parseNext(const string& buffer, const string::size_type positio
if (end != startPos)
{
if (startPos != pos && !isFirst && prevIsEncoded)
- unencoded += ' ';
+ unencoded += whiteSpaces;
unencoded += buffer.substr(startPos, end - startPos);
@@ -388,11 +400,15 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
{
os << CRLF;
curLineLength = 0;
+
+ state->lastCharIsSpace = true;
}
else
{
os << NEW_LINE_SEQUENCE;
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+
+ state->lastCharIsSpace = true;
}
p = curLineStart;
@@ -401,8 +417,16 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
}
else
{
+ if (!state->isFirstWord && state->prevWordIsEncoded && !state->lastCharIsSpace && !parserHelpers::isSpace(*curLineStart))
+ os << " "; // Separate from previous word
+
os << string(curLineStart, p);
+ if (parserHelpers::isSpace(*(p - 1)))
+ state->lastCharIsSpace = true;
+ else
+ state->lastCharIsSpace = false;
+
if (p == end)
{
finished = true;
@@ -439,15 +463,24 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
os << string(curLineStart, lastWSpos);
+ if (lastWSpos > curLineStart && std::isspace(*(lastWSpos - 1)))
+ state->lastCharIsSpace = true;
+ else
+ state->lastCharIsSpace = false;
+
if (flags & text::NO_NEW_LINE_SEQUENCE)
{
os << CRLF;
curLineLength = 0;
+
+ state->lastCharIsSpace = true;
}
else
{
os << NEW_LINE_SEQUENCE;
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+
+ state->lastCharIsSpace = true;
}
curLineStart = lastWSpos + 1;
@@ -523,13 +556,17 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
{
os << NEW_LINE_SEQUENCE;
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+
+ state->lastCharIsSpace = true;
}
// Encode and fold input buffer
- if (!startNewLine && !state->isFirstWord && state->prevWordIsEncoded)
+ if (!startNewLine && !state->isFirstWord && !state->lastCharIsSpace)
{
os << " "; // Separate from previous word
++curLineLength;
+
+ state->lastCharIsSpace = true;
}
for (unsigned int i = 0 ; ; ++i)
@@ -561,6 +598,7 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
os << wordEnd;
state->prevWordIsEncoded = true;
+ state->lastCharIsSpace = false;
}
}
diff --git a/tests/parser/attachmentHelperTest.cpp b/tests/parser/attachmentHelperTest.cpp
index 73a1286e..d1ae10af 100644
--- a/tests/parser/attachmentHelperTest.cpp
+++ b/tests/parser/attachmentHelperTest.cpp
@@ -307,6 +307,8 @@ VMIME_TEST_SUITE_BEGIN
vmime::ref <vmime::message> msg = vmime::create <vmime::message>();
msg->parse(data);
+ VASSERT_EQ("0", 2, msg->getBody()->getPartCount());
+
vmime::ref <const vmime::attachment> att = vmime::attachmentHelper::
getBodyPartAttachment(msg->getBody()->getPartAt(0));
diff --git a/tests/parser/htmlTextPartTest.cpp b/tests/parser/htmlTextPartTest.cpp
index 3f9a718e..6276db26 100644
--- a/tests/parser/htmlTextPartTest.cpp
+++ b/tests/parser/htmlTextPartTest.cpp
@@ -81,6 +81,10 @@ VMIME_TEST_SUITE_BEGIN
vmime::ref <vmime::message> msg = vmime::create <vmime::message>();
msg->parse(msgString);
+ // Sanity checks
+ VASSERT_EQ("part-count1", 2, msg->getBody()->getPartCount());
+ VASSERT_EQ("part-count2", 2, msg->getBody()->getPartAt(1)->getBody()->getPartCount());
+
vmime::htmlTextPart htmlPart;
htmlPart.parse(msg, msg->getBody()->getPartAt(1),
msg->getBody()->getPartAt(1)->getBody()->getPartAt(0));
@@ -132,6 +136,10 @@ VMIME_TEST_SUITE_BEGIN
vmime::ref <vmime::message> msg = vmime::create <vmime::message>();
msg->parse(msgString);
+ // Sanity checks
+ VASSERT_EQ("part-count1", 2, msg->getBody()->getPartCount());
+ VASSERT_EQ("part-count2", 3, msg->getBody()->getPartAt(1)->getBody()->getPartCount());
+
vmime::htmlTextPart htmlPart;
htmlPart.parse(msg, msg->getBody()->getPartAt(1),
msg->getBody()->getPartAt(1)->getBody()->getPartAt(1));
@@ -198,6 +206,10 @@ VMIME_TEST_SUITE_BEGIN
vmime::ref <vmime::message> msg = vmime::create <vmime::message>();
msg->parse(msgString);
+ // Sanity checks
+ VASSERT_EQ("part-count1", 2, msg->getBody()->getPartCount());
+ VASSERT_EQ("part-count2", 2, msg->getBody()->getPartAt(1)->getBody()->getPartCount());
+
vmime::htmlTextPart htmlPart;
htmlPart.parse(msg, msg->getBody()->getPartAt(1),
msg->getBody()->getPartAt(1)->getBody()->getPartAt(1));
diff --git a/tests/parser/textTest.cpp b/tests/parser/textTest.cpp
index 706452fb..152dbc46 100644
--- a/tests/parser/textTest.cpp
+++ b/tests/parser/textTest.cpp
@@ -42,18 +42,14 @@ VMIME_TEST_SUITE_BEGIN
VMIME_TEST(testWordParse)
VMIME_TEST(testWordGenerate)
VMIME_TEST(testWordGenerateSpace)
+ VMIME_TEST(testWordGenerateSpace2)
VMIME_TEST(testWordGenerateMultiBytes)
VMIME_TEST_LIST_END
static const vmime::string getDisplayText(const vmime::text& t)
{
- vmime::string res;
-
- for (int i = 0 ; i < t.getWordCount() ; ++i)
- res += t.getWordAt(i)->getBuffer();
-
- return res;
+ return t.getWholeBuffer();
}
static const vmime::string cleanGeneratedWords(const std::string& str)
@@ -142,11 +138,11 @@ VMIME_TEST_SUITE_BEGIN
t2.createFromString(s2, c2);
VASSERT_EQ("2.1", 3, t2.getWordCount());
- VASSERT_EQ("2.2", s2_1, t2.getWordAt(0)->getBuffer());
+ VASSERT_EQ("2.2", "some ASCII characters and special chars:", t2.getWordAt(0)->getBuffer());
VASSERT_EQ("2.3", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(0)->getCharset());
- VASSERT_EQ("2.4", s2_2, t2.getWordAt(1)->getBuffer());
+ VASSERT_EQ("2.4", "\xf1\xf2\xf3\xf4", t2.getWordAt(1)->getBuffer());
VASSERT_EQ("2.5", c2, t2.getWordAt(1)->getCharset());
- VASSERT_EQ("2.6", s2_3, t2.getWordAt(2)->getBuffer());
+ VASSERT_EQ("2.6", "and then more ASCII chars.", t2.getWordAt(2)->getBuffer());
VASSERT_EQ("2.7", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(2)->getCharset());
}
@@ -215,6 +211,15 @@ VMIME_TEST_SUITE_BEGIN
VASSERT_EQ("9", "a b ", DISPLAY_FORM(" \t =?ISO-8859-1?Q?a?= b "));
VASSERT_EQ("10", "a b", DISPLAY_FORM(" a\r\n\t b"));
+ VASSERT_EQ("11", "a b c", DISPLAY_FORM("a =?ISO-8859-1?Q?b?= c"));
+ VASSERT_EQ("12", "a b c ", DISPLAY_FORM("a =?ISO-8859-1?Q?b?= c "));
+ VASSERT_EQ("13", "a b c ", DISPLAY_FORM(" a =?ISO-8859-1?Q?b?= c "));
+ VASSERT_EQ("14", "a b c d", DISPLAY_FORM("a =?ISO-8859-1?Q?b?= c =?ISO-8859-1?Q?d?= "));
+ VASSERT_EQ("15", "a b c d e", DISPLAY_FORM("a =?ISO-8859-1?Q?b?= c =?ISO-8859-1?Q?d?= e"));
+
+ // Whitespaces and multiline
+ VASSERT_EQ("16", "a b c d e", DISPLAY_FORM("=?ISO-8859-1?Q?a_b_?=c\n\t=?ISO-8859-1?Q?d_?=e"));
+
#undef DISPLAY_FORM
}
@@ -293,6 +298,35 @@ VMIME_TEST_SUITE_BEGIN
txt2.parse(encoded, 0, encoded.length());
VASSERT_EQ("3", decoded, txt2.getWholeBuffer());
+
+ // -- test rencoding
+ VASSERT_EQ("4", encoded, txt2.generate());
+ }
+
+ void testWordGenerateSpace2()
+ {
+ // White-space between two encoded words (#2)
+ vmime::text txt;
+ txt.appendWord(vmime::create <vmime::word>("Facture ", "utf-8"));
+ txt.appendWord(vmime::create <vmime::word>("\xc3\xa0", "utf-8"));
+ txt.appendWord(vmime::create <vmime::word>(" envoyer ", "utf-8"));
+ txt.appendWord(vmime::create <vmime::word>("\xc3\xa0", "utf-8"));
+ txt.appendWord(vmime::create <vmime::word>(" Martine", "utf-8"));
+
+ const vmime::string decoded = "Facture ""\xc3\xa0"" envoyer ""\xc3\xa0"" Martine";
+ const vmime::string encoded = "Facture =?utf-8?B?w6A=?= envoyer =?utf-8?B?w6A=?= Martine";
+
+ // -- test encoding
+ VASSERT_EQ("1", encoded, txt.generate());
+
+ // -- ensure no space is added when decoding
+ vmime::text txt2;
+ txt2.parse(encoded, 0, encoded.length());
+
+ VASSERT_EQ("2", decoded, txt2.getWholeBuffer());
+
+ // -- test rencoding
+ VASSERT_EQ("3", encoded, txt2.generate());
}
void testWordGenerateMultiBytes()
diff --git a/vmime/word.hpp b/vmime/word.hpp
index 9efc83cc..800a78d5 100644
--- a/vmime/word.hpp
+++ b/vmime/word.hpp
@@ -115,12 +115,13 @@ public:
public:
generatorState()
- : isFirstWord(true), prevWordIsEncoded(false)
+ : isFirstWord(true), prevWordIsEncoded(false), lastCharIsSpace(false)
{
}
bool isFirstWord;
bool prevWordIsEncoded;
+ bool lastCharIsSpace;
};
#endif