Moved encodeAndFold() and decodeAndUnfold() functions from "base.cpp" to "text.cpp".
This commit is contained in:
parent
91cc37178f
commit
5868c87506
@ -284,12 +284,12 @@ public:
|
||||
return getpid();
|
||||
}
|
||||
|
||||
vmime::messaging::socketFactory* getSocketFactory(const vmime::string& name) const
|
||||
vmime::messaging::socketFactory* getSocketFactory(const vmime::string& /* name */) const
|
||||
{
|
||||
return m_sf;
|
||||
}
|
||||
|
||||
vmime::messaging::timeoutHandlerFactory* getTimeoutHandlerFactory(const vmime::string& name) const
|
||||
vmime::messaging::timeoutHandlerFactory* getTimeoutHandlerFactory(const vmime::string& /* name */) const
|
||||
{
|
||||
// Not used for now
|
||||
return NULL;
|
||||
|
522
src/base.cpp
522
src/base.cpp
@ -98,528 +98,6 @@ namespace lineLengthLimits
|
||||
|
||||
|
||||
|
||||
/** Encode and fold text in respect to RFC-2047.
|
||||
*
|
||||
* @param os output stream
|
||||
* @param in input text
|
||||
* @param maxLineLength maximum line length for output
|
||||
* @param firstLineOffset the first line length (may be useful if the current output line is not empty)
|
||||
* @param lastLineLength will receive the length of the last line written
|
||||
* @param flags encoding flags (see encodeAndFoldFlags)
|
||||
*/
|
||||
|
||||
void encodeAndFoldText(utility::outputStream& os, const text& in, const string::size_type maxLineLength,
|
||||
const string::size_type firstLineOffset, string::size_type* lastLineLength, const int flags)
|
||||
{
|
||||
string::size_type curLineLength = firstLineOffset;
|
||||
|
||||
for (int wi = 0 ; wi < in.getWordCount() ; ++wi)
|
||||
{
|
||||
const word& w = *in.getWordAt(wi);
|
||||
const string& buffer = w.getBuffer();
|
||||
|
||||
// Calculate the number of ASCII chars to check whether encoding is needed
|
||||
// and _which_ encoding to use.
|
||||
const string::size_type asciiCount =
|
||||
stringUtils::countASCIIchars(buffer.begin(), buffer.end());
|
||||
|
||||
bool noEncoding = (flags & encodeAndFoldFlags::forceNoEncoding) ||
|
||||
(!(flags & encodeAndFoldFlags::forceEncoding) && asciiCount == buffer.length());
|
||||
|
||||
if (noEncoding)
|
||||
{
|
||||
// We will fold lines without encoding them.
|
||||
|
||||
string::const_iterator lastWSpos = buffer.end(); // last white-space position
|
||||
string::const_iterator curLineStart = buffer.begin(); // current line start
|
||||
|
||||
string::const_iterator p = buffer.begin();
|
||||
const string::const_iterator end = buffer.end();
|
||||
|
||||
bool finished = false;
|
||||
bool newLine = false;
|
||||
|
||||
while (!finished)
|
||||
{
|
||||
for ( ; p != end ; ++p, ++curLineLength)
|
||||
{
|
||||
// Exceeded maximum line length, but we have found a white-space
|
||||
// where we can cut the line...
|
||||
if (curLineLength >= maxLineLength && lastWSpos != end)
|
||||
break;
|
||||
|
||||
if (*p == ' ' || *p == '\t')
|
||||
{
|
||||
// Remember the position of this white-space character
|
||||
lastWSpos = p;
|
||||
}
|
||||
}
|
||||
|
||||
if (p != end)
|
||||
++curLineLength;
|
||||
|
||||
//if (p == end || curLineLength >= maxLineLength)
|
||||
{
|
||||
if (p == end || lastWSpos == end)
|
||||
{
|
||||
// If we are here, it means that we have found no whitespace
|
||||
// before the first "maxLineLength" characters. In this case,
|
||||
// we write the full line no matter of the max line length...
|
||||
|
||||
if (!newLine && p != end && lastWSpos == end &&
|
||||
wi != 0 && curLineStart == buffer.begin())
|
||||
{
|
||||
// Here, we are continuing on the line of previous encoded
|
||||
// word, but there is not even enough space to put the
|
||||
// first word of this line, so we start a new line.
|
||||
if (flags & encodeAndFoldFlags::noNewLineSequence)
|
||||
{
|
||||
os << CRLF;
|
||||
curLineLength = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
os << NEW_LINE_SEQUENCE;
|
||||
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
|
||||
}
|
||||
|
||||
p = curLineStart;
|
||||
lastWSpos = end;
|
||||
newLine = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
os << string(curLineStart, p);
|
||||
|
||||
if (p == end)
|
||||
{
|
||||
finished = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags & encodeAndFoldFlags::noNewLineSequence)
|
||||
{
|
||||
os << CRLF;
|
||||
curLineLength = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
os << NEW_LINE_SEQUENCE;
|
||||
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
|
||||
}
|
||||
|
||||
curLineStart = p;
|
||||
lastWSpos = end;
|
||||
newLine = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// In this case, there will not be enough space on the line for all the
|
||||
// characters _after_ the last white-space; so we cut the line at this
|
||||
// last white-space.
|
||||
|
||||
#if 1
|
||||
if (curLineLength != 1 && wi != 0)
|
||||
os << " "; // Separate from previous word
|
||||
#endif
|
||||
|
||||
os << string(curLineStart, lastWSpos);
|
||||
|
||||
if (flags & encodeAndFoldFlags::noNewLineSequence)
|
||||
{
|
||||
os << CRLF;
|
||||
curLineLength = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
os << NEW_LINE_SEQUENCE;
|
||||
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
|
||||
}
|
||||
|
||||
curLineStart = lastWSpos + 1;
|
||||
|
||||
p = lastWSpos + 1;
|
||||
lastWSpos = end;
|
||||
newLine = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
RFC #2047:
|
||||
4. Encodings
|
||||
|
||||
Initially, the legal values for "encoding" are "Q" and "B". These
|
||||
encodings are described below. The "Q" encoding is recommended for
|
||||
use when most of the characters to be encoded are in the ASCII
|
||||
character set; otherwise, the "B" encoding should be used.
|
||||
Nevertheless, a mail reader which claims to recognize 'encoded-word's
|
||||
MUST be able to accept either encoding for any character set which it
|
||||
supports.
|
||||
*/
|
||||
else
|
||||
{
|
||||
// We will encode _AND_ fold lines
|
||||
|
||||
/*
|
||||
RFC #2047:
|
||||
2. Syntax of encoded-words
|
||||
|
||||
" While there is no limit to the length of a multiple-line header
|
||||
field, each line of a header field that contains one or more
|
||||
'encoded-word's is limited to 76 characters. "
|
||||
*/
|
||||
|
||||
const string::size_type maxLineLength3 =
|
||||
(maxLineLength == lineLengthLimits::infinite)
|
||||
? maxLineLength
|
||||
: std::min(maxLineLength, (const string::size_type) 76);
|
||||
|
||||
// Base64 if more than 60% non-ascii, quoted-printable else (default)
|
||||
const string::size_type asciiPercent = (100 * asciiCount) / buffer.length();
|
||||
const string::value_type encoding = (asciiPercent <= 40) ? 'B' : 'Q';
|
||||
|
||||
string wordStart("=?" + w.getCharset().getName() + "?" + encoding + "?");
|
||||
string wordEnd("?=");
|
||||
|
||||
const string::size_type minWordLength = wordStart.length() + wordEnd.length();
|
||||
const string::size_type maxLineLength2 = (maxLineLength3 < minWordLength + 1)
|
||||
? maxLineLength3 + minWordLength + 1 : maxLineLength3;
|
||||
|
||||
// Checks whether remaining space on this line is usable. If too few
|
||||
// characters can be encoded, start a new line.
|
||||
bool startNewLine = true;
|
||||
|
||||
if (curLineLength + 2 < maxLineLength2)
|
||||
{
|
||||
const string::size_type remainingSpaceOnLine = maxLineLength2 - curLineLength - 2;
|
||||
|
||||
if (remainingSpaceOnLine < minWordLength + 10)
|
||||
{
|
||||
// Space for no more than 10 encoded chars!
|
||||
// It is not worth while to continue on this line...
|
||||
startNewLine = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// OK, there is enough usable space on the current line.
|
||||
startNewLine = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (startNewLine)
|
||||
{
|
||||
os << NEW_LINE_SEQUENCE;
|
||||
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
|
||||
}
|
||||
|
||||
// Encode and fold input buffer
|
||||
string::const_iterator pos = buffer.begin();
|
||||
string::size_type remaining = buffer.length();
|
||||
|
||||
encoder* theEncoder = ((encoding == 'B')
|
||||
? ((encoder*) new encoderB64)
|
||||
: ((encoder*) new encoderQP));
|
||||
|
||||
string qpEncodedBuffer;
|
||||
|
||||
if (encoding == 'Q')
|
||||
{
|
||||
theEncoder->getProperties()["rfc2047"] = true;
|
||||
|
||||
// In the case of Quoted-Printable encoding, we cannot simply encode input
|
||||
// buffer line by line. So, we encode the whole buffer and we will fold it
|
||||
// in the next loop...
|
||||
utility::inputStreamStringAdapter in(buffer);
|
||||
utility::outputStreamStringAdapter out(qpEncodedBuffer);
|
||||
|
||||
theEncoder->encode(in, out);
|
||||
|
||||
pos = qpEncodedBuffer.begin();
|
||||
remaining = qpEncodedBuffer.length();
|
||||
}
|
||||
|
||||
#if 1
|
||||
if (curLineLength != 1 && wi != 0)
|
||||
{
|
||||
os << " "; // Separate from previous word
|
||||
++curLineLength;
|
||||
}
|
||||
#endif
|
||||
|
||||
for ( ; remaining ; )
|
||||
{
|
||||
// Start a new encoded word
|
||||
os << wordStart;
|
||||
curLineLength += minWordLength;
|
||||
|
||||
// Compute the number of encoded chars that will fit on this line
|
||||
const string::size_type fit = maxLineLength2 - curLineLength;
|
||||
|
||||
// Base-64 encoding
|
||||
if (encoding == 'B')
|
||||
{
|
||||
// TODO: WARNING! "Any encoded word which encodes a non-integral
|
||||
// number of characters or octets is incorrectly formed."
|
||||
|
||||
// Here, we have a formula to compute the maximum number of source
|
||||
// characters to encode knowing the maximum number of encoded chars
|
||||
// (with Base64, 3 bytes of input provide 4 bytes of output).
|
||||
string::size_type count = (fit > 1) ? ((fit - 1) * 3) / 4 : 1;
|
||||
if (count > remaining) count = remaining;
|
||||
|
||||
utility::inputStreamStringAdapter in
|
||||
(buffer, pos - buffer.begin(), pos - buffer.begin() + count);
|
||||
|
||||
curLineLength += theEncoder->encode(in, os);
|
||||
|
||||
pos += count;
|
||||
remaining -= count;
|
||||
}
|
||||
// Quoted-Printable encoding
|
||||
else
|
||||
{
|
||||
// TODO: WARNING! "Any encoded word which encodes a non-integral
|
||||
// number of characters or octets is incorrectly formed."
|
||||
|
||||
// All we have to do here is to take a certain number of character
|
||||
// (that is less than or equal to "fit") from the QP encoded buffer,
|
||||
// but we also make sure not to fold a "=XY" encoded char.
|
||||
const string::const_iterator qpEnd = qpEncodedBuffer.end();
|
||||
string::const_iterator lastFoldPos = pos;
|
||||
string::const_iterator p = pos;
|
||||
string::size_type n = 0;
|
||||
|
||||
while (n < fit && p != qpEnd)
|
||||
{
|
||||
if (*p == '=')
|
||||
{
|
||||
if (n + 3 >= fit)
|
||||
{
|
||||
lastFoldPos = p;
|
||||
break;
|
||||
}
|
||||
|
||||
p += 3;
|
||||
n += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
++p;
|
||||
++n;
|
||||
}
|
||||
}
|
||||
|
||||
if (lastFoldPos == pos)
|
||||
lastFoldPos = p;
|
||||
|
||||
os << string(pos, lastFoldPos);
|
||||
|
||||
curLineLength += (lastFoldPos - pos) + 1;
|
||||
|
||||
pos += n;
|
||||
remaining -= n;
|
||||
}
|
||||
|
||||
// End of the encoded word
|
||||
os << wordEnd;
|
||||
|
||||
if (remaining)
|
||||
{
|
||||
os << NEW_LINE_SEQUENCE;
|
||||
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
|
||||
}
|
||||
}
|
||||
|
||||
delete (theEncoder);
|
||||
}
|
||||
}
|
||||
|
||||
if (lastLineLength)
|
||||
*lastLineLength = curLineLength;
|
||||
}
|
||||
|
||||
|
||||
void decodeAndUnfoldText(const string::const_iterator& inStart, const string::const_iterator& inEnd, text& out)
|
||||
{
|
||||
// NOTE: See RFC-2047, Pages 11-12 for knowing about handling
|
||||
// of white-spaces between encoded words.
|
||||
|
||||
out.removeAllWords();
|
||||
|
||||
string::const_iterator p = inStart;
|
||||
const string::const_iterator end = inEnd;
|
||||
|
||||
const charset defaultCharset(charsets::US_ASCII);
|
||||
charset prevWordCharset(defaultCharset);
|
||||
|
||||
bool prevIsEncoded = false;
|
||||
|
||||
string::const_iterator prevPos = p;
|
||||
|
||||
for ( ; ; )
|
||||
{
|
||||
if (p == end || *p == '\n')
|
||||
{
|
||||
string::const_iterator textEnd = p;
|
||||
|
||||
if (textEnd != inStart && *(textEnd - 1) == '\r')
|
||||
--textEnd;
|
||||
|
||||
if (textEnd != prevPos)
|
||||
{
|
||||
if (!out.isEmpty() && prevWordCharset == defaultCharset)
|
||||
{
|
||||
out.getWordAt(out.getWordCount() - 1)->getBuffer() += string(prevPos, textEnd);
|
||||
}
|
||||
else
|
||||
{
|
||||
prevWordCharset = defaultCharset;
|
||||
out.appendWord(new word(string(prevPos, textEnd), defaultCharset));
|
||||
prevIsEncoded = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (p == end)
|
||||
{
|
||||
// Finished
|
||||
break;
|
||||
}
|
||||
|
||||
// Skip the new-line character
|
||||
prevPos = ++p;
|
||||
}
|
||||
else if (*p == '=' && (p + 1) != end && *(p + 1) == '?')
|
||||
{
|
||||
string::const_iterator wordPos = p;
|
||||
p += 2; // skip '=?'
|
||||
|
||||
if (p != end)
|
||||
{
|
||||
const string::const_iterator charsetPos = p;
|
||||
|
||||
for ( ; p != end && *p != '?' ; ++p);
|
||||
|
||||
if (p != end) // a charset is specified
|
||||
{
|
||||
const string::const_iterator charsetEnd = p;
|
||||
const string::const_iterator encPos = ++p; // skip '?'
|
||||
|
||||
for ( ; p != end && *p != '?' ; ++p);
|
||||
|
||||
if (p != end) // an encoding is specified
|
||||
{
|
||||
//const string::const_iterator encEnd = p;
|
||||
const string::const_iterator dataPos = ++p; // skip '?'
|
||||
|
||||
for ( ; p != end && !(*p == '?' && *(p + 1) == '=') ; ++p);
|
||||
|
||||
if (p != end) // some data is specified
|
||||
{
|
||||
const string::const_iterator dataEnd = p;
|
||||
p += 2; // skip '?='
|
||||
|
||||
encoder* theEncoder = NULL;
|
||||
|
||||
// Base-64 encoding
|
||||
if (*encPos == 'B' || *encPos == 'b')
|
||||
{
|
||||
theEncoder = new encoderB64;
|
||||
}
|
||||
// Quoted-Printable encoding
|
||||
else if (*encPos == 'Q' || *encPos == 'q')
|
||||
{
|
||||
theEncoder = new encoderQP;
|
||||
theEncoder->getProperties()["rfc2047"] = true;
|
||||
}
|
||||
|
||||
if (theEncoder)
|
||||
{
|
||||
// Decode text
|
||||
string decodedBuffer;
|
||||
|
||||
utility::inputStreamStringAdapter ein(string(dataPos, dataEnd));
|
||||
utility::outputStreamStringAdapter eout(decodedBuffer);
|
||||
|
||||
theEncoder->decode(ein, eout);
|
||||
delete (theEncoder);
|
||||
|
||||
// Append all the unencoded text before this word
|
||||
if (prevPos != wordPos)
|
||||
{
|
||||
string::const_iterator p = prevPos;
|
||||
|
||||
if (prevIsEncoded)
|
||||
{
|
||||
// Check whether there are only white-spaces between
|
||||
// the two encoded words
|
||||
for ( ; (p != wordPos) && isspace(*p) ; ++p);
|
||||
}
|
||||
|
||||
if (p != wordPos) // if not empty
|
||||
{
|
||||
if (!out.isEmpty() && prevWordCharset == defaultCharset)
|
||||
{
|
||||
out.getWordAt(out.getWordCount() - 1)->
|
||||
getBuffer() += string(prevPos, wordPos);
|
||||
}
|
||||
else
|
||||
{
|
||||
out.appendWord(new word
|
||||
(string(prevPos, wordPos), defaultCharset));
|
||||
|
||||
prevWordCharset = defaultCharset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Append this fresh decoded word to output text
|
||||
charset thisCharset(string(charsetPos, charsetEnd));
|
||||
|
||||
if (!out.isEmpty() && prevWordCharset == thisCharset)
|
||||
{
|
||||
out.getWordAt(out.getWordCount() - 1)->
|
||||
getBuffer() += decodedBuffer;
|
||||
}
|
||||
else
|
||||
{
|
||||
prevWordCharset = thisCharset;
|
||||
out.appendWord(new word(decodedBuffer, thisCharset));
|
||||
}
|
||||
|
||||
// This word has been decoded: we can advance in the input buffer
|
||||
prevPos = p;
|
||||
prevIsEncoded = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Unknown encoding: can't decode this word, we will
|
||||
// treat this word as ordinary text (RFC-2047, Page 9).
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
++p;
|
||||
}
|
||||
|
||||
for ( ; p != end && *p != '=' && *p != '\n' ; ++p);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void decodeAndUnfoldText(const string& in, text& out)
|
||||
{
|
||||
decodeAndUnfoldText(in.begin(), in.end(), out);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// V-Mime Initializer
|
||||
// ====================
|
||||
|
26
src/base.hpp
26
src/base.hpp
@ -91,32 +91,6 @@ namespace vmime
|
||||
}
|
||||
|
||||
|
||||
// Field contents encoding (RFC-2047 and folding)
|
||||
void encodeAndFoldText(utility::outputStream& os, const text& in, const string::size_type maxLineLength, const string::size_type firstLineOffset, string::size_type* lastLineLength, const int flags);
|
||||
void decodeAndUnfoldText(const string& in, text& out);
|
||||
void decodeAndUnfoldText(const string::const_iterator& inStart, const string::const_iterator& inEnd, text& out);
|
||||
|
||||
|
||||
//
|
||||
// Some constants
|
||||
//
|
||||
|
||||
// Flags used by "encodeAndFoldText" function
|
||||
namespace encodeAndFoldFlags
|
||||
{
|
||||
enum
|
||||
{
|
||||
// If both "forceNoEncoding" and "forceEncoding" are specified,
|
||||
// "forceNoEncoding" is used by default.
|
||||
forceNoEncoding = (1 << 0),
|
||||
forceEncoding = (1 << 1),
|
||||
|
||||
noNewLineSequence = (1 << 2),
|
||||
|
||||
none = 0
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
RFC#2822
|
||||
|
12
src/body.cpp
12
src/body.cpp
@ -258,8 +258,10 @@ void body::generate(utility::outputStream& os, const string::size_type maxLineLe
|
||||
|
||||
if (!prologText.empty())
|
||||
{
|
||||
encodeAndFoldText(os, text(word(prologText, getCharset())), maxLineLength, 0,
|
||||
NULL, encodeAndFoldFlags::forceNoEncoding | encodeAndFoldFlags::noNewLineSequence);
|
||||
text prolog(word(prologText, getCharset()));
|
||||
|
||||
prolog.encodeAndFold(os, maxLineLength, 0,
|
||||
NULL, text::FORCE_NO_ENCODING | text::NO_NEW_LINE_SEQUENCE);
|
||||
|
||||
os << CRLF;
|
||||
}
|
||||
@ -279,8 +281,10 @@ void body::generate(utility::outputStream& os, const string::size_type maxLineLe
|
||||
|
||||
if (!epilogText.empty())
|
||||
{
|
||||
encodeAndFoldText(os, text(word(epilogText, getCharset())), maxLineLength, 0,
|
||||
NULL, encodeAndFoldFlags::forceNoEncoding | encodeAndFoldFlags::noNewLineSequence);
|
||||
text epilog(word(epilogText, getCharset()));
|
||||
|
||||
epilog.encodeAndFold(os, maxLineLength, 0,
|
||||
NULL, text::FORCE_NO_ENCODING | text::NO_NEW_LINE_SEQUENCE);
|
||||
|
||||
os << CRLF;
|
||||
}
|
||||
|
@ -316,7 +316,7 @@ void mailbox::parse(const string& buffer, const string::size_type position,
|
||||
}
|
||||
else
|
||||
{
|
||||
decodeAndUnfoldText(name, m_name);
|
||||
text::decodeAndUnfold(name, &m_name);
|
||||
m_email.empty();
|
||||
m_email.reserve(address.size());
|
||||
|
||||
@ -405,8 +405,8 @@ void mailbox::generate(utility::outputStream& os, const string::size_type maxLin
|
||||
string::size_type pos = curLinePos;
|
||||
bool newLine = true;
|
||||
|
||||
encodeAndFoldText(os, m_name, maxLineLength, pos, &pos,
|
||||
forceEncode ? encodeAndFoldFlags::forceEncoding : encodeAndFoldFlags::none);
|
||||
m_name.encodeAndFold(os, maxLineLength, pos, &pos,
|
||||
forceEncode ? text::FORCE_ENCODING : 0);
|
||||
|
||||
if (pos + m_email.length() + 3 > maxLineLength)
|
||||
{
|
||||
|
@ -100,7 +100,7 @@ void mailboxGroup::parse(const string& buffer, const string::size_type position,
|
||||
}
|
||||
}
|
||||
|
||||
decodeAndUnfoldText(name, m_name);
|
||||
text::decodeAndUnfold(name, &m_name);
|
||||
|
||||
if (newPosition)
|
||||
*newPosition = end;
|
||||
@ -152,8 +152,8 @@ void mailboxGroup::generate(utility::outputStream& os, const string::size_type m
|
||||
|
||||
string::size_type pos = curLinePos;
|
||||
|
||||
encodeAndFoldText(os, m_name, maxLineLength - 2, pos, &pos,
|
||||
forceEncode ? encodeAndFoldFlags::forceEncoding : encodeAndFoldFlags::none);
|
||||
m_name.encodeAndFold(os, maxLineLength - 2, pos, &pos,
|
||||
forceEncode ? text::FORCE_ENCODING : 0);
|
||||
|
||||
os << ":";
|
||||
++pos;
|
||||
|
@ -602,7 +602,7 @@ void IMAPMessage::processFetchResponse
|
||||
|
||||
// Subject
|
||||
text subject;
|
||||
decodeAndUnfoldText(env->env_subject()->value(), subject);
|
||||
text::decodeAndUnfold(env->env_subject()->value(), &subject);
|
||||
|
||||
hdr.Subject().setValue(subject);
|
||||
|
||||
@ -724,7 +724,7 @@ void IMAPMessage::convertAddressList
|
||||
const IMAPParser::address& addr = **it;
|
||||
|
||||
text name;
|
||||
decodeAndUnfoldText(addr.addr_name()->value(), name);
|
||||
text::decodeAndUnfold(addr.addr_name()->value(), &name);
|
||||
|
||||
string email = addr.addr_mailbox()->value()
|
||||
+ "@" + addr.addr_host()->value();
|
||||
|
@ -213,8 +213,8 @@ void relay::generate(utility::outputStream& os, const string::size_type maxLineL
|
||||
|
||||
oss << "; " << m_date.generate();
|
||||
|
||||
encodeAndFoldText(os, text(oss.str()), maxLineLength,
|
||||
curLinePos, newLinePos, encodeAndFoldFlags::forceNoEncoding);
|
||||
text(oss.str()).encodeAndFold(os, maxLineLength,
|
||||
curLinePos, newLinePos, text::FORCE_NO_ENCODING);
|
||||
}
|
||||
|
||||
|
||||
|
527
src/text.cpp
527
src/text.cpp
@ -19,6 +19,12 @@
|
||||
|
||||
#include "text.hpp"
|
||||
|
||||
#include "utility/stringUtils.hpp"
|
||||
|
||||
#include "encoder.hpp"
|
||||
#include "encoderB64.hpp"
|
||||
#include "encoderQP.hpp"
|
||||
|
||||
|
||||
namespace vmime
|
||||
{
|
||||
@ -63,7 +69,7 @@ text::~text()
|
||||
void text::parse(const string& buffer, const string::size_type position,
|
||||
const string::size_type end, string::size_type* newPosition)
|
||||
{
|
||||
decodeAndUnfoldText(buffer.begin() + position, buffer.begin() + end, *this);
|
||||
decodeAndUnfold(buffer.begin() + position, buffer.begin() + end, *this);
|
||||
|
||||
if (newPosition)
|
||||
*newPosition = end;
|
||||
@ -73,7 +79,7 @@ void text::parse(const string& buffer, const string::size_type position,
|
||||
void text::generate(utility::outputStream& os, const string::size_type maxLineLength,
|
||||
const string::size_type curLinePos, string::size_type* newLinePos) const
|
||||
{
|
||||
encodeAndFoldText(os, *this, maxLineLength, curLinePos, newLinePos, encodeAndFoldFlags::none);
|
||||
encodeAndFold(os, maxLineLength, curLinePos, newLinePos, 0);
|
||||
}
|
||||
|
||||
|
||||
@ -315,4 +321,521 @@ text* text::newFromString(const string& in, const charset& ch, text* generateInE
|
||||
}
|
||||
|
||||
|
||||
void text::encodeAndFold(utility::outputStream& os, const string::size_type maxLineLength,
|
||||
const string::size_type firstLineOffset, string::size_type* lastLineLength, const int flags) const
|
||||
{
|
||||
string::size_type curLineLength = firstLineOffset;
|
||||
|
||||
for (int wi = 0 ; wi < getWordCount() ; ++wi)
|
||||
{
|
||||
const word& w = *getWordAt(wi);
|
||||
const string& buffer = w.getBuffer();
|
||||
|
||||
// Calculate the number of ASCII chars to check whether encoding is needed
|
||||
// and _which_ encoding to use.
|
||||
const string::size_type asciiCount =
|
||||
stringUtils::countASCIIchars(buffer.begin(), buffer.end());
|
||||
|
||||
bool noEncoding = (flags & FORCE_NO_ENCODING) ||
|
||||
(!(flags & FORCE_ENCODING) && asciiCount == buffer.length());
|
||||
|
||||
if (noEncoding)
|
||||
{
|
||||
// We will fold lines without encoding them.
|
||||
|
||||
string::const_iterator lastWSpos = buffer.end(); // last white-space position
|
||||
string::const_iterator curLineStart = buffer.begin(); // current line start
|
||||
|
||||
string::const_iterator p = buffer.begin();
|
||||
const string::const_iterator end = buffer.end();
|
||||
|
||||
bool finished = false;
|
||||
bool newLine = false;
|
||||
|
||||
while (!finished)
|
||||
{
|
||||
for ( ; p != end ; ++p, ++curLineLength)
|
||||
{
|
||||
// Exceeded maximum line length, but we have found a white-space
|
||||
// where we can cut the line...
|
||||
if (curLineLength >= maxLineLength && lastWSpos != end)
|
||||
break;
|
||||
|
||||
if (*p == ' ' || *p == '\t')
|
||||
{
|
||||
// Remember the position of this white-space character
|
||||
lastWSpos = p;
|
||||
}
|
||||
}
|
||||
|
||||
if (p != end)
|
||||
++curLineLength;
|
||||
|
||||
//if (p == end || curLineLength >= maxLineLength)
|
||||
{
|
||||
if (p == end || lastWSpos == end)
|
||||
{
|
||||
// If we are here, it means that we have found no whitespace
|
||||
// before the first "maxLineLength" characters. In this case,
|
||||
// we write the full line no matter of the max line length...
|
||||
|
||||
if (!newLine && p != end && lastWSpos == end &&
|
||||
wi != 0 && curLineStart == buffer.begin())
|
||||
{
|
||||
// Here, we are continuing on the line of previous encoded
|
||||
// word, but there is not even enough space to put the
|
||||
// first word of this line, so we start a new line.
|
||||
if (flags & NO_NEW_LINE_SEQUENCE)
|
||||
{
|
||||
os << CRLF;
|
||||
curLineLength = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
os << NEW_LINE_SEQUENCE;
|
||||
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
|
||||
}
|
||||
|
||||
p = curLineStart;
|
||||
lastWSpos = end;
|
||||
newLine = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
os << string(curLineStart, p);
|
||||
|
||||
if (p == end)
|
||||
{
|
||||
finished = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags & NO_NEW_LINE_SEQUENCE)
|
||||
{
|
||||
os << CRLF;
|
||||
curLineLength = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
os << NEW_LINE_SEQUENCE;
|
||||
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
|
||||
}
|
||||
|
||||
curLineStart = p;
|
||||
lastWSpos = end;
|
||||
newLine = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// In this case, there will not be enough space on the line for all the
|
||||
// characters _after_ the last white-space; so we cut the line at this
|
||||
// last white-space.
|
||||
|
||||
#if 1
|
||||
if (curLineLength != 1 && wi != 0)
|
||||
os << " "; // Separate from previous word
|
||||
#endif
|
||||
|
||||
os << string(curLineStart, lastWSpos);
|
||||
|
||||
if (flags & NO_NEW_LINE_SEQUENCE)
|
||||
{
|
||||
os << CRLF;
|
||||
curLineLength = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
os << NEW_LINE_SEQUENCE;
|
||||
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
|
||||
}
|
||||
|
||||
curLineStart = lastWSpos + 1;
|
||||
|
||||
p = lastWSpos + 1;
|
||||
lastWSpos = end;
|
||||
newLine = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
RFC #2047:
|
||||
4. Encodings
|
||||
|
||||
Initially, the legal values for "encoding" are "Q" and "B". These
|
||||
encodings are described below. The "Q" encoding is recommended for
|
||||
use when most of the characters to be encoded are in the ASCII
|
||||
character set; otherwise, the "B" encoding should be used.
|
||||
Nevertheless, a mail reader which claims to recognize 'encoded-word's
|
||||
MUST be able to accept either encoding for any character set which it
|
||||
supports.
|
||||
*/
|
||||
else
|
||||
{
|
||||
// We will encode _AND_ fold lines
|
||||
|
||||
/*
|
||||
RFC #2047:
|
||||
2. Syntax of encoded-words
|
||||
|
||||
" While there is no limit to the length of a multiple-line header
|
||||
field, each line of a header field that contains one or more
|
||||
'encoded-word's is limited to 76 characters. "
|
||||
*/
|
||||
|
||||
const string::size_type maxLineLength3 =
|
||||
(maxLineLength == lineLengthLimits::infinite)
|
||||
? maxLineLength
|
||||
: std::min(maxLineLength, (const string::size_type) 76);
|
||||
|
||||
// Base64 if more than 60% non-ascii, quoted-printable else (default)
|
||||
const string::size_type asciiPercent = (100 * asciiCount) / buffer.length();
|
||||
const string::value_type encoding = (asciiPercent <= 40) ? 'B' : 'Q';
|
||||
|
||||
string wordStart("=?" + w.getCharset().getName() + "?" + encoding + "?");
|
||||
string wordEnd("?=");
|
||||
|
||||
const string::size_type minWordLength = wordStart.length() + wordEnd.length();
|
||||
const string::size_type maxLineLength2 = (maxLineLength3 < minWordLength + 1)
|
||||
? maxLineLength3 + minWordLength + 1 : maxLineLength3;
|
||||
|
||||
// Checks whether remaining space on this line is usable. If too few
|
||||
// characters can be encoded, start a new line.
|
||||
bool startNewLine = true;
|
||||
|
||||
if (curLineLength + 2 < maxLineLength2)
|
||||
{
|
||||
const string::size_type remainingSpaceOnLine = maxLineLength2 - curLineLength - 2;
|
||||
|
||||
if (remainingSpaceOnLine < minWordLength + 10)
|
||||
{
|
||||
// Space for no more than 10 encoded chars!
|
||||
// It is not worth while to continue on this line...
|
||||
startNewLine = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// OK, there is enough usable space on the current line.
|
||||
startNewLine = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (startNewLine)
|
||||
{
|
||||
os << NEW_LINE_SEQUENCE;
|
||||
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
|
||||
}
|
||||
|
||||
// Encode and fold input buffer
|
||||
string::const_iterator pos = buffer.begin();
|
||||
string::size_type remaining = buffer.length();
|
||||
|
||||
encoder* theEncoder = ((encoding == 'B')
|
||||
? ((encoder*) new encoderB64)
|
||||
: ((encoder*) new encoderQP));
|
||||
|
||||
string qpEncodedBuffer;
|
||||
|
||||
if (encoding == 'Q')
|
||||
{
|
||||
theEncoder->getProperties()["rfc2047"] = true;
|
||||
|
||||
// In the case of Quoted-Printable encoding, we cannot simply encode input
|
||||
// buffer line by line. So, we encode the whole buffer and we will fold it
|
||||
// in the next loop...
|
||||
utility::inputStreamStringAdapter in(buffer);
|
||||
utility::outputStreamStringAdapter out(qpEncodedBuffer);
|
||||
|
||||
theEncoder->encode(in, out);
|
||||
|
||||
pos = qpEncodedBuffer.begin();
|
||||
remaining = qpEncodedBuffer.length();
|
||||
}
|
||||
|
||||
#if 1
|
||||
if (curLineLength != 1 && wi != 0)
|
||||
{
|
||||
os << " "; // Separate from previous word
|
||||
++curLineLength;
|
||||
}
|
||||
#endif
|
||||
|
||||
for ( ; remaining ; )
|
||||
{
|
||||
// Start a new encoded word
|
||||
os << wordStart;
|
||||
curLineLength += minWordLength;
|
||||
|
||||
// Compute the number of encoded chars that will fit on this line
|
||||
const string::size_type fit = maxLineLength2 - curLineLength;
|
||||
|
||||
// Base-64 encoding
|
||||
if (encoding == 'B')
|
||||
{
|
||||
// TODO: WARNING! "Any encoded word which encodes a non-integral
|
||||
// number of characters or octets is incorrectly formed."
|
||||
|
||||
// Here, we have a formula to compute the maximum number of source
|
||||
// characters to encode knowing the maximum number of encoded chars
|
||||
// (with Base64, 3 bytes of input provide 4 bytes of output).
|
||||
string::size_type count = (fit > 1) ? ((fit - 1) * 3) / 4 : 1;
|
||||
if (count > remaining) count = remaining;
|
||||
|
||||
utility::inputStreamStringAdapter in
|
||||
(buffer, pos - buffer.begin(), pos - buffer.begin() + count);
|
||||
|
||||
curLineLength += theEncoder->encode(in, os);
|
||||
|
||||
pos += count;
|
||||
remaining -= count;
|
||||
}
|
||||
// Quoted-Printable encoding
|
||||
else
|
||||
{
|
||||
// TODO: WARNING! "Any encoded word which encodes a non-integral
|
||||
// number of characters or octets is incorrectly formed."
|
||||
|
||||
// All we have to do here is to take a certain number of character
|
||||
// (that is less than or equal to "fit") from the QP encoded buffer,
|
||||
// but we also make sure not to fold a "=XY" encoded char.
|
||||
const string::const_iterator qpEnd = qpEncodedBuffer.end();
|
||||
string::const_iterator lastFoldPos = pos;
|
||||
string::const_iterator p = pos;
|
||||
string::size_type n = 0;
|
||||
|
||||
while (n < fit && p != qpEnd)
|
||||
{
|
||||
if (*p == '=')
|
||||
{
|
||||
if (n + 3 >= fit)
|
||||
{
|
||||
lastFoldPos = p;
|
||||
break;
|
||||
}
|
||||
|
||||
p += 3;
|
||||
n += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
++p;
|
||||
++n;
|
||||
}
|
||||
}
|
||||
|
||||
if (lastFoldPos == pos)
|
||||
lastFoldPos = p;
|
||||
|
||||
os << string(pos, lastFoldPos);
|
||||
|
||||
curLineLength += (lastFoldPos - pos) + 1;
|
||||
|
||||
pos += n;
|
||||
remaining -= n;
|
||||
}
|
||||
|
||||
// End of the encoded word
|
||||
os << wordEnd;
|
||||
|
||||
if (remaining)
|
||||
{
|
||||
os << NEW_LINE_SEQUENCE;
|
||||
curLineLength = NEW_LINE_SEQUENCE_LENGTH;
|
||||
}
|
||||
}
|
||||
|
||||
delete (theEncoder);
|
||||
}
|
||||
}
|
||||
|
||||
if (lastLineLength)
|
||||
*lastLineLength = curLineLength;
|
||||
}
|
||||
|
||||
|
||||
text* text::decodeAndUnfold(const string& in, text* generateInExisting)
|
||||
{
|
||||
text* out = (generateInExisting != NULL) ? generateInExisting : new text();
|
||||
|
||||
out->removeAllWords();
|
||||
|
||||
decodeAndUnfold(in.begin(), in.end(), *out);
|
||||
|
||||
return (out);
|
||||
}
|
||||
|
||||
|
||||
void text::decodeAndUnfold(const string::const_iterator& inStart, const string::const_iterator& inEnd, text& out)
|
||||
{
|
||||
// NOTE: See RFC-2047, Pages 11-12 for knowing about handling
|
||||
// of white-spaces between encoded words.
|
||||
|
||||
out.removeAllWords();
|
||||
|
||||
string::const_iterator p = inStart;
|
||||
const string::const_iterator end = inEnd;
|
||||
|
||||
const charset defaultCharset(charsets::US_ASCII);
|
||||
charset prevWordCharset(defaultCharset);
|
||||
|
||||
bool prevIsEncoded = false;
|
||||
|
||||
string::const_iterator prevPos = p;
|
||||
|
||||
for ( ; ; )
|
||||
{
|
||||
if (p == end || *p == '\n')
|
||||
{
|
||||
string::const_iterator textEnd = p;
|
||||
|
||||
if (textEnd != inStart && *(textEnd - 1) == '\r')
|
||||
--textEnd;
|
||||
|
||||
if (textEnd != prevPos)
|
||||
{
|
||||
if (!out.isEmpty() && prevWordCharset == defaultCharset)
|
||||
{
|
||||
out.getWordAt(out.getWordCount() - 1)->getBuffer() += string(prevPos, textEnd);
|
||||
}
|
||||
else
|
||||
{
|
||||
prevWordCharset = defaultCharset;
|
||||
out.appendWord(new word(string(prevPos, textEnd), defaultCharset));
|
||||
prevIsEncoded = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (p == end)
|
||||
{
|
||||
// Finished
|
||||
break;
|
||||
}
|
||||
|
||||
// Skip the new-line character
|
||||
prevPos = ++p;
|
||||
}
|
||||
else if (*p == '=' && (p + 1) != end && *(p + 1) == '?')
|
||||
{
|
||||
string::const_iterator wordPos = p;
|
||||
p += 2; // skip '=?'
|
||||
|
||||
if (p != end)
|
||||
{
|
||||
const string::const_iterator charsetPos = p;
|
||||
|
||||
for ( ; p != end && *p != '?' ; ++p);
|
||||
|
||||
if (p != end) // a charset is specified
|
||||
{
|
||||
const string::const_iterator charsetEnd = p;
|
||||
const string::const_iterator encPos = ++p; // skip '?'
|
||||
|
||||
for ( ; p != end && *p != '?' ; ++p);
|
||||
|
||||
if (p != end) // an encoding is specified
|
||||
{
|
||||
//const string::const_iterator encEnd = p;
|
||||
const string::const_iterator dataPos = ++p; // skip '?'
|
||||
|
||||
for ( ; p != end && !(*p == '?' && *(p + 1) == '=') ; ++p);
|
||||
|
||||
if (p != end) // some data is specified
|
||||
{
|
||||
const string::const_iterator dataEnd = p;
|
||||
p += 2; // skip '?='
|
||||
|
||||
encoder* theEncoder = NULL;
|
||||
|
||||
// Base-64 encoding
|
||||
if (*encPos == 'B' || *encPos == 'b')
|
||||
{
|
||||
theEncoder = new encoderB64;
|
||||
}
|
||||
// Quoted-Printable encoding
|
||||
else if (*encPos == 'Q' || *encPos == 'q')
|
||||
{
|
||||
theEncoder = new encoderQP;
|
||||
theEncoder->getProperties()["rfc2047"] = true;
|
||||
}
|
||||
|
||||
if (theEncoder)
|
||||
{
|
||||
// Decode text
|
||||
string decodedBuffer;
|
||||
|
||||
utility::inputStreamStringAdapter ein(string(dataPos, dataEnd));
|
||||
utility::outputStreamStringAdapter eout(decodedBuffer);
|
||||
|
||||
theEncoder->decode(ein, eout);
|
||||
delete (theEncoder);
|
||||
|
||||
// Append all the unencoded text before this word
|
||||
if (prevPos != wordPos)
|
||||
{
|
||||
string::const_iterator p = prevPos;
|
||||
|
||||
if (prevIsEncoded)
|
||||
{
|
||||
// Check whether there are only white-spaces between
|
||||
// the two encoded words
|
||||
for ( ; (p != wordPos) && isspace(*p) ; ++p);
|
||||
}
|
||||
|
||||
if (p != wordPos) // if not empty
|
||||
{
|
||||
if (!out.isEmpty() && prevWordCharset == defaultCharset)
|
||||
{
|
||||
out.getWordAt(out.getWordCount() - 1)->
|
||||
getBuffer() += string(prevPos, wordPos);
|
||||
}
|
||||
else
|
||||
{
|
||||
out.appendWord(new word
|
||||
(string(prevPos, wordPos), defaultCharset));
|
||||
|
||||
prevWordCharset = defaultCharset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Append this fresh decoded word to output text
|
||||
charset thisCharset(string(charsetPos, charsetEnd));
|
||||
|
||||
if (!out.isEmpty() && prevWordCharset == thisCharset)
|
||||
{
|
||||
out.getWordAt(out.getWordCount() - 1)->
|
||||
getBuffer() += decodedBuffer;
|
||||
}
|
||||
else
|
||||
{
|
||||
prevWordCharset = thisCharset;
|
||||
out.appendWord(new word(decodedBuffer, thisCharset));
|
||||
}
|
||||
|
||||
// This word has been decoded: we can advance in the input buffer
|
||||
prevPos = p;
|
||||
prevIsEncoded = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Unknown encoding: can't decode this word, we will
|
||||
// treat this word as ordinary text (RFC-2047, Page 9).
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
++p;
|
||||
}
|
||||
|
||||
for ( ; p != end && *p != '=' && *p != '\n' ; ++p);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // vmime
|
||||
|
36
src/text.hpp
36
src/text.hpp
@ -159,6 +159,40 @@ public:
|
||||
*/
|
||||
static text* newFromString(const string& in, const charset& ch, text* generateInExisting = NULL);
|
||||
|
||||
/** Flags used by "encodeAndFold" function.
|
||||
*/
|
||||
enum EncodeAndFoldFlags
|
||||
{
|
||||
// NOTE: If both "FORCE_NO_ENCODING" and "FORCE_ENCODING" are
|
||||
// specified, "FORCE_NO_ENCODING" is used by default.
|
||||
|
||||
FORCE_NO_ENCODING = (1 << 0), /**< Just fold lines, don't encode them. */
|
||||
FORCE_ENCODING = (1 << 1), /**< Encode lines even if they are plain ASCII text. */
|
||||
NO_NEW_LINE_SEQUENCE = (1 << 2) /**< Use CRLF instead of new-line sequence (CRLF + TAB). */
|
||||
};
|
||||
|
||||
/** Encode and fold text in respect to RFC-2047.
|
||||
*
|
||||
* @param os output stream
|
||||
* @param maxLineLength maximum line length for output
|
||||
* @param firstLineOffset the first line length (may be useful if the current output line is not empty)
|
||||
* @param lastLineLength will receive the length of the last line written
|
||||
* @param flags encoding flags (see EncodeAndFoldFlags)
|
||||
*/
|
||||
void encodeAndFold(utility::outputStream& os, const string::size_type maxLineLength,
|
||||
const string::size_type firstLineOffset, string::size_type* lastLineLength, const int flags) const;
|
||||
|
||||
/** Decode and unfold text (RFC-2047).
|
||||
*
|
||||
* @param in input string
|
||||
* @param generateInExisting if not NULL, the resulting text will be generated
|
||||
* in the specified object instead of a new created object (in this case, the
|
||||
* function returns the same pointer). Can be used to avoid copying the
|
||||
* resulting object into an existing object.
|
||||
* @return new text object or existing object if generateInExisting != NULL
|
||||
*/
|
||||
static text* decodeAndUnfold(const string& in, text* generateInExisting = NULL);
|
||||
|
||||
|
||||
using component::parse;
|
||||
using component::generate;
|
||||
@ -169,6 +203,8 @@ public:
|
||||
|
||||
private:
|
||||
|
||||
static void decodeAndUnfold(const string::const_iterator& inStart, const string::const_iterator& inEnd, text& out);
|
||||
|
||||
std::vector <word*> m_words;
|
||||
};
|
||||
|
||||
|
@ -114,6 +114,8 @@ namespace
|
||||
assert_eq("2.7", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(2)->getCharset());
|
||||
}
|
||||
|
||||
// TODO: tests for encodeAndFold() and decodeAndUnfold()
|
||||
|
||||
public:
|
||||
|
||||
textTest() : suite("vmime::text")
|
||||
|
Loading…
Reference in New Issue
Block a user