This commit is contained in:
Vincent Richard 2008-04-28 19:49:48 +00:00
parent 9246b79aae
commit 439b2b3e90
6 changed files with 158 additions and 56 deletions

View File

@ -269,63 +269,78 @@ void text::createFromString(const string& in, const charset& ch)
removeAllWords(); removeAllWords();
for (string::size_type end = in.size(), pos = 0, start = 0 ; ; ) const string::size_type asciiCount =
utility::stringUtils::countASCIIchars(in.begin(), in.end());
const string::size_type asciiPercent =
(in.length() == 0 ? 100 : (100 * asciiCount) / in.length());
// If there are "too much" non-ASCII chars, encode everything
if (asciiPercent < 60) // less than 60% ASCII chars
{ {
if (pos == end || parserHelpers::isSpace(in[pos])) appendWord(vmime::create <word>(in, ch));
}
// Else, only encode words which need it
else
{
for (string::size_type end = in.size(), pos = 0, start = 0 ; ; )
{ {
if (pos != end) if (pos == end || parserHelpers::isSpace(in[pos]))
++pos;
const string chunk(in.begin() + start, in.begin() + pos);
if (is8bit)
{ {
if (count && prevIs8bit) const string chunk(in.begin() + start, in.begin() + pos);
if (pos != end)
++pos;
if (is8bit)
{ {
// No need to create a new encoded word, just append if (count && prevIs8bit)
// the current word to the previous one. {
ref <word> w = getWordAt(getWordCount() - 1); // No need to create a new encoded word, just append
w->getBuffer() += chunk; // the current word to the previous one.
ref <word> w = getWordAt(getWordCount() - 1);
w->getBuffer() += " " + chunk;
}
else
{
appendWord(vmime::create <word>(chunk, ch));
prevIs8bit = true;
++count;
}
} }
else else
{ {
appendWord(vmime::create <word>(chunk, ch)); if (count && !prevIs8bit)
{
ref <word> w = getWordAt(getWordCount() - 1);
w->getBuffer() += " " + chunk;
}
else
{
appendWord(vmime::create <word>
(chunk, charset(charsets::US_ASCII)));
prevIs8bit = true; prevIs8bit = false;
++count; ++count;
}
} }
if (pos == end)
break;
is8bit = false;
start = pos;
}
else if (!parserHelpers::isAscii(in[pos]))
{
is8bit = true;
++pos;
} }
else else
{ {
if (count && !prevIs8bit) ++pos;
{
ref <word> w = getWordAt(getWordCount() - 1);
w->getBuffer() += chunk;
}
else
{
appendWord(vmime::create <word>
(chunk, charset(charsets::US_ASCII)));
prevIs8bit = false;
++count;
}
} }
if (pos == end)
break;
is8bit = false;
start = pos;
}
else if (!parserHelpers::isAscii(in[pos]))
{
is8bit = true;
++pos;
}
else
{
++pos;
} }
} }
} }

View File

@ -73,8 +73,13 @@ ref <word> word::parseNext(const string& buffer, const string::size_type positio
// - before the first word // - before the first word
// - between two encoded words // - between two encoded words
// - after the last word // - after the last word
string whiteSpaces;
while (pos < end && parserHelpers::isSpace(buffer[pos])) while (pos < end && parserHelpers::isSpace(buffer[pos]))
{
whiteSpaces += buffer[pos];
++pos; ++pos;
}
string::size_type startPos = pos; string::size_type startPos = pos;
string unencoded; string unencoded;
@ -88,7 +93,10 @@ ref <word> word::parseNext(const string& buffer, const string::size_type positio
string::size_type endPos = pos; string::size_type endPos = pos;
if (pos > position && buffer[pos - 1] == '\r') if (pos > position && buffer[pos - 1] == '\r')
{
++pos;
--endPos; --endPos;
}
while (pos != end && parserHelpers::isSpace(buffer[pos])) while (pos != end && parserHelpers::isSpace(buffer[pos]))
++pos; ++pos;
@ -97,6 +105,7 @@ ref <word> word::parseNext(const string& buffer, const string::size_type positio
unencoded += ' '; unencoded += ' ';
startPos = pos; startPos = pos;
continue;
} }
// Start of an encoded word // Start of an encoded word
else if (pos + 8 < end && // 8 = "=?(.+)?(.+)?(.*)?=" else if (pos + 8 < end && // 8 = "=?(.+)?(.+)?(.*)?="
@ -107,6 +116,9 @@ ref <word> word::parseNext(const string& buffer, const string::size_type positio
if (!unencoded.empty()) if (!unencoded.empty())
{ {
if (prevIsEncoded)
unencoded = whiteSpaces + unencoded;
ref <word> w = vmime::create <word>(unencoded, charset(charsets::US_ASCII)); ref <word> w = vmime::create <word>(unencoded, charset(charsets::US_ASCII));
w->setParsedBounds(position, pos); w->setParsedBounds(position, pos);
@ -183,7 +195,7 @@ ref <word> word::parseNext(const string& buffer, const string::size_type positio
if (end != startPos) if (end != startPos)
{ {
if (startPos != pos && !isFirst && prevIsEncoded) if (startPos != pos && !isFirst && prevIsEncoded)
unencoded += ' '; unencoded += whiteSpaces;
unencoded += buffer.substr(startPos, end - startPos); unencoded += buffer.substr(startPos, end - startPos);
@ -388,11 +400,15 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
{ {
os << CRLF; os << CRLF;
curLineLength = 0; curLineLength = 0;
state->lastCharIsSpace = true;
} }
else else
{ {
os << NEW_LINE_SEQUENCE; os << NEW_LINE_SEQUENCE;
curLineLength = NEW_LINE_SEQUENCE_LENGTH; curLineLength = NEW_LINE_SEQUENCE_LENGTH;
state->lastCharIsSpace = true;
} }
p = curLineStart; p = curLineStart;
@ -401,8 +417,16 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
} }
else else
{ {
if (!state->isFirstWord && state->prevWordIsEncoded && !state->lastCharIsSpace && !parserHelpers::isSpace(*curLineStart))
os << " "; // Separate from previous word
os << string(curLineStart, p); os << string(curLineStart, p);
if (parserHelpers::isSpace(*(p - 1)))
state->lastCharIsSpace = true;
else
state->lastCharIsSpace = false;
if (p == end) if (p == end)
{ {
finished = true; finished = true;
@ -439,15 +463,24 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
os << string(curLineStart, lastWSpos); os << string(curLineStart, lastWSpos);
if (lastWSpos > curLineStart && std::isspace(*(lastWSpos - 1)))
state->lastCharIsSpace = true;
else
state->lastCharIsSpace = false;
if (flags & text::NO_NEW_LINE_SEQUENCE) if (flags & text::NO_NEW_LINE_SEQUENCE)
{ {
os << CRLF; os << CRLF;
curLineLength = 0; curLineLength = 0;
state->lastCharIsSpace = true;
} }
else else
{ {
os << NEW_LINE_SEQUENCE; os << NEW_LINE_SEQUENCE;
curLineLength = NEW_LINE_SEQUENCE_LENGTH; curLineLength = NEW_LINE_SEQUENCE_LENGTH;
state->lastCharIsSpace = true;
} }
curLineStart = lastWSpos + 1; curLineStart = lastWSpos + 1;
@ -523,13 +556,17 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
{ {
os << NEW_LINE_SEQUENCE; os << NEW_LINE_SEQUENCE;
curLineLength = NEW_LINE_SEQUENCE_LENGTH; curLineLength = NEW_LINE_SEQUENCE_LENGTH;
state->lastCharIsSpace = true;
} }
// Encode and fold input buffer // Encode and fold input buffer
if (!startNewLine && !state->isFirstWord && state->prevWordIsEncoded) if (!startNewLine && !state->isFirstWord && !state->lastCharIsSpace)
{ {
os << " "; // Separate from previous word os << " "; // Separate from previous word
++curLineLength; ++curLineLength;
state->lastCharIsSpace = true;
} }
for (unsigned int i = 0 ; ; ++i) for (unsigned int i = 0 ; ; ++i)
@ -561,6 +598,7 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
os << wordEnd; os << wordEnd;
state->prevWordIsEncoded = true; state->prevWordIsEncoded = true;
state->lastCharIsSpace = false;
} }
} }

View File

@ -307,6 +307,8 @@ VMIME_TEST_SUITE_BEGIN
vmime::ref <vmime::message> msg = vmime::create <vmime::message>(); vmime::ref <vmime::message> msg = vmime::create <vmime::message>();
msg->parse(data); msg->parse(data);
VASSERT_EQ("0", 2, msg->getBody()->getPartCount());
vmime::ref <const vmime::attachment> att = vmime::attachmentHelper:: vmime::ref <const vmime::attachment> att = vmime::attachmentHelper::
getBodyPartAttachment(msg->getBody()->getPartAt(0)); getBodyPartAttachment(msg->getBody()->getPartAt(0));

View File

@ -81,6 +81,10 @@ VMIME_TEST_SUITE_BEGIN
vmime::ref <vmime::message> msg = vmime::create <vmime::message>(); vmime::ref <vmime::message> msg = vmime::create <vmime::message>();
msg->parse(msgString); msg->parse(msgString);
// Sanity checks
VASSERT_EQ("part-count1", 2, msg->getBody()->getPartCount());
VASSERT_EQ("part-count2", 2, msg->getBody()->getPartAt(1)->getBody()->getPartCount());
vmime::htmlTextPart htmlPart; vmime::htmlTextPart htmlPart;
htmlPart.parse(msg, msg->getBody()->getPartAt(1), htmlPart.parse(msg, msg->getBody()->getPartAt(1),
msg->getBody()->getPartAt(1)->getBody()->getPartAt(0)); msg->getBody()->getPartAt(1)->getBody()->getPartAt(0));
@ -132,6 +136,10 @@ VMIME_TEST_SUITE_BEGIN
vmime::ref <vmime::message> msg = vmime::create <vmime::message>(); vmime::ref <vmime::message> msg = vmime::create <vmime::message>();
msg->parse(msgString); msg->parse(msgString);
// Sanity checks
VASSERT_EQ("part-count1", 2, msg->getBody()->getPartCount());
VASSERT_EQ("part-count2", 3, msg->getBody()->getPartAt(1)->getBody()->getPartCount());
vmime::htmlTextPart htmlPart; vmime::htmlTextPart htmlPart;
htmlPart.parse(msg, msg->getBody()->getPartAt(1), htmlPart.parse(msg, msg->getBody()->getPartAt(1),
msg->getBody()->getPartAt(1)->getBody()->getPartAt(1)); msg->getBody()->getPartAt(1)->getBody()->getPartAt(1));
@ -198,6 +206,10 @@ VMIME_TEST_SUITE_BEGIN
vmime::ref <vmime::message> msg = vmime::create <vmime::message>(); vmime::ref <vmime::message> msg = vmime::create <vmime::message>();
msg->parse(msgString); msg->parse(msgString);
// Sanity checks
VASSERT_EQ("part-count1", 2, msg->getBody()->getPartCount());
VASSERT_EQ("part-count2", 2, msg->getBody()->getPartAt(1)->getBody()->getPartCount());
vmime::htmlTextPart htmlPart; vmime::htmlTextPart htmlPart;
htmlPart.parse(msg, msg->getBody()->getPartAt(1), htmlPart.parse(msg, msg->getBody()->getPartAt(1),
msg->getBody()->getPartAt(1)->getBody()->getPartAt(1)); msg->getBody()->getPartAt(1)->getBody()->getPartAt(1));

View File

@ -42,18 +42,14 @@ VMIME_TEST_SUITE_BEGIN
VMIME_TEST(testWordParse) VMIME_TEST(testWordParse)
VMIME_TEST(testWordGenerate) VMIME_TEST(testWordGenerate)
VMIME_TEST(testWordGenerateSpace) VMIME_TEST(testWordGenerateSpace)
VMIME_TEST(testWordGenerateSpace2)
VMIME_TEST(testWordGenerateMultiBytes) VMIME_TEST(testWordGenerateMultiBytes)
VMIME_TEST_LIST_END VMIME_TEST_LIST_END
static const vmime::string getDisplayText(const vmime::text& t) static const vmime::string getDisplayText(const vmime::text& t)
{ {
vmime::string res; return t.getWholeBuffer();
for (int i = 0 ; i < t.getWordCount() ; ++i)
res += t.getWordAt(i)->getBuffer();
return res;
} }
static const vmime::string cleanGeneratedWords(const std::string& str) static const vmime::string cleanGeneratedWords(const std::string& str)
@ -142,11 +138,11 @@ VMIME_TEST_SUITE_BEGIN
t2.createFromString(s2, c2); t2.createFromString(s2, c2);
VASSERT_EQ("2.1", 3, t2.getWordCount()); VASSERT_EQ("2.1", 3, t2.getWordCount());
VASSERT_EQ("2.2", s2_1, t2.getWordAt(0)->getBuffer()); VASSERT_EQ("2.2", "some ASCII characters and special chars:", t2.getWordAt(0)->getBuffer());
VASSERT_EQ("2.3", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(0)->getCharset()); VASSERT_EQ("2.3", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(0)->getCharset());
VASSERT_EQ("2.4", s2_2, t2.getWordAt(1)->getBuffer()); VASSERT_EQ("2.4", "\xf1\xf2\xf3\xf4", t2.getWordAt(1)->getBuffer());
VASSERT_EQ("2.5", c2, t2.getWordAt(1)->getCharset()); VASSERT_EQ("2.5", c2, t2.getWordAt(1)->getCharset());
VASSERT_EQ("2.6", s2_3, t2.getWordAt(2)->getBuffer()); VASSERT_EQ("2.6", "and then more ASCII chars.", t2.getWordAt(2)->getBuffer());
VASSERT_EQ("2.7", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(2)->getCharset()); VASSERT_EQ("2.7", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(2)->getCharset());
} }
@ -215,6 +211,15 @@ VMIME_TEST_SUITE_BEGIN
VASSERT_EQ("9", "a b ", DISPLAY_FORM(" \t =?ISO-8859-1?Q?a?= b ")); VASSERT_EQ("9", "a b ", DISPLAY_FORM(" \t =?ISO-8859-1?Q?a?= b "));
VASSERT_EQ("10", "a b", DISPLAY_FORM(" a\r\n\t b")); VASSERT_EQ("10", "a b", DISPLAY_FORM(" a\r\n\t b"));
VASSERT_EQ("11", "a b c", DISPLAY_FORM("a =?ISO-8859-1?Q?b?= c"));
VASSERT_EQ("12", "a b c ", DISPLAY_FORM("a =?ISO-8859-1?Q?b?= c "));
VASSERT_EQ("13", "a b c ", DISPLAY_FORM(" a =?ISO-8859-1?Q?b?= c "));
VASSERT_EQ("14", "a b c d", DISPLAY_FORM("a =?ISO-8859-1?Q?b?= c =?ISO-8859-1?Q?d?= "));
VASSERT_EQ("15", "a b c d e", DISPLAY_FORM("a =?ISO-8859-1?Q?b?= c =?ISO-8859-1?Q?d?= e"));
// Whitespaces and multiline
VASSERT_EQ("16", "a b c d e", DISPLAY_FORM("=?ISO-8859-1?Q?a_b_?=c\n\t=?ISO-8859-1?Q?d_?=e"));
#undef DISPLAY_FORM #undef DISPLAY_FORM
} }
@ -293,6 +298,35 @@ VMIME_TEST_SUITE_BEGIN
txt2.parse(encoded, 0, encoded.length()); txt2.parse(encoded, 0, encoded.length());
VASSERT_EQ("3", decoded, txt2.getWholeBuffer()); VASSERT_EQ("3", decoded, txt2.getWholeBuffer());
// -- test rencoding
VASSERT_EQ("4", encoded, txt2.generate());
}
void testWordGenerateSpace2()
{
// White-space between two encoded words (#2)
vmime::text txt;
txt.appendWord(vmime::create <vmime::word>("Facture ", "utf-8"));
txt.appendWord(vmime::create <vmime::word>("\xc3\xa0", "utf-8"));
txt.appendWord(vmime::create <vmime::word>(" envoyer ", "utf-8"));
txt.appendWord(vmime::create <vmime::word>("\xc3\xa0", "utf-8"));
txt.appendWord(vmime::create <vmime::word>(" Martine", "utf-8"));
const vmime::string decoded = "Facture ""\xc3\xa0"" envoyer ""\xc3\xa0"" Martine";
const vmime::string encoded = "Facture =?utf-8?B?w6A=?= envoyer =?utf-8?B?w6A=?= Martine";
// -- test encoding
VASSERT_EQ("1", encoded, txt.generate());
// -- ensure no space is added when decoding
vmime::text txt2;
txt2.parse(encoded, 0, encoded.length());
VASSERT_EQ("2", decoded, txt2.getWholeBuffer());
// -- test rencoding
VASSERT_EQ("3", encoded, txt2.generate());
} }
void testWordGenerateMultiBytes() void testWordGenerateMultiBytes()

View File

@ -115,12 +115,13 @@ public:
public: public:
generatorState() generatorState()
: isFirstWord(true), prevWordIsEncoded(false) : isFirstWord(true), prevWordIsEncoded(false), lastCharIsSpace(false)
{ {
} }
bool isFirstWord; bool isFirstWord;
bool prevWordIsEncoded; bool prevWordIsEncoded;
bool lastCharIsSpace;
}; };
#endif #endif