diff --git a/examples/common.inc b/examples/common.inc index ed9c4f4b..a21b6876 100644 --- a/examples/common.inc +++ b/examples/common.inc @@ -284,12 +284,12 @@ public: return getpid(); } - vmime::messaging::socketFactory* getSocketFactory(const vmime::string& name) const + vmime::messaging::socketFactory* getSocketFactory(const vmime::string& /* name */) const { return m_sf; } - vmime::messaging::timeoutHandlerFactory* getTimeoutHandlerFactory(const vmime::string& name) const + vmime::messaging::timeoutHandlerFactory* getTimeoutHandlerFactory(const vmime::string& /* name */) const { // Not used for now return NULL; diff --git a/src/base.cpp b/src/base.cpp index 6d0197c5..0e3fbc76 100644 --- a/src/base.cpp +++ b/src/base.cpp @@ -98,528 +98,6 @@ namespace lineLengthLimits -/** Encode and fold text in respect to RFC-2047. - * - * @param os output stream - * @param in input text - * @param maxLineLength maximum line length for output - * @param firstLineOffset the first line length (may be useful if the current output line is not empty) - * @param lastLineLength will receive the length of the last line written - * @param flags encoding flags (see encodeAndFoldFlags) - */ - -void encodeAndFoldText(utility::outputStream& os, const text& in, const string::size_type maxLineLength, - const string::size_type firstLineOffset, string::size_type* lastLineLength, const int flags) -{ - string::size_type curLineLength = firstLineOffset; - - for (int wi = 0 ; wi < in.getWordCount() ; ++wi) - { - const word& w = *in.getWordAt(wi); - const string& buffer = w.getBuffer(); - - // Calculate the number of ASCII chars to check whether encoding is needed - // and _which_ encoding to use. - const string::size_type asciiCount = - stringUtils::countASCIIchars(buffer.begin(), buffer.end()); - - bool noEncoding = (flags & encodeAndFoldFlags::forceNoEncoding) || - (!(flags & encodeAndFoldFlags::forceEncoding) && asciiCount == buffer.length()); - - if (noEncoding) - { - // We will fold lines without encoding them. - - string::const_iterator lastWSpos = buffer.end(); // last white-space position - string::const_iterator curLineStart = buffer.begin(); // current line start - - string::const_iterator p = buffer.begin(); - const string::const_iterator end = buffer.end(); - - bool finished = false; - bool newLine = false; - - while (!finished) - { - for ( ; p != end ; ++p, ++curLineLength) - { - // Exceeded maximum line length, but we have found a white-space - // where we can cut the line... - if (curLineLength >= maxLineLength && lastWSpos != end) - break; - - if (*p == ' ' || *p == '\t') - { - // Remember the position of this white-space character - lastWSpos = p; - } - } - - if (p != end) - ++curLineLength; - - //if (p == end || curLineLength >= maxLineLength) - { - if (p == end || lastWSpos == end) - { - // If we are here, it means that we have found no whitespace - // before the first "maxLineLength" characters. In this case, - // we write the full line no matter of the max line length... - - if (!newLine && p != end && lastWSpos == end && - wi != 0 && curLineStart == buffer.begin()) - { - // Here, we are continuing on the line of previous encoded - // word, but there is not even enough space to put the - // first word of this line, so we start a new line. - if (flags & encodeAndFoldFlags::noNewLineSequence) - { - os << CRLF; - curLineLength = 0; - } - else - { - os << NEW_LINE_SEQUENCE; - curLineLength = NEW_LINE_SEQUENCE_LENGTH; - } - - p = curLineStart; - lastWSpos = end; - newLine = true; - } - else - { - os << string(curLineStart, p); - - if (p == end) - { - finished = true; - } - else - { - if (flags & encodeAndFoldFlags::noNewLineSequence) - { - os << CRLF; - curLineLength = 0; - } - else - { - os << NEW_LINE_SEQUENCE; - curLineLength = NEW_LINE_SEQUENCE_LENGTH; - } - - curLineStart = p; - lastWSpos = end; - newLine = true; - } - } - } - else - { - // In this case, there will not be enough space on the line for all the - // characters _after_ the last white-space; so we cut the line at this - // last white-space. - -#if 1 - if (curLineLength != 1 && wi != 0) - os << " "; // Separate from previous word -#endif - - os << string(curLineStart, lastWSpos); - - if (flags & encodeAndFoldFlags::noNewLineSequence) - { - os << CRLF; - curLineLength = 0; - } - else - { - os << NEW_LINE_SEQUENCE; - curLineLength = NEW_LINE_SEQUENCE_LENGTH; - } - - curLineStart = lastWSpos + 1; - - p = lastWSpos + 1; - lastWSpos = end; - newLine = true; - } - } - } - } - /* - RFC #2047: - 4. Encodings - - Initially, the legal values for "encoding" are "Q" and "B". These - encodings are described below. The "Q" encoding is recommended for - use when most of the characters to be encoded are in the ASCII - character set; otherwise, the "B" encoding should be used. - Nevertheless, a mail reader which claims to recognize 'encoded-word's - MUST be able to accept either encoding for any character set which it - supports. - */ - else - { - // We will encode _AND_ fold lines - - /* - RFC #2047: - 2. Syntax of encoded-words - - " While there is no limit to the length of a multiple-line header - field, each line of a header field that contains one or more - 'encoded-word's is limited to 76 characters. " - */ - - const string::size_type maxLineLength3 = - (maxLineLength == lineLengthLimits::infinite) - ? maxLineLength - : std::min(maxLineLength, (const string::size_type) 76); - - // Base64 if more than 60% non-ascii, quoted-printable else (default) - const string::size_type asciiPercent = (100 * asciiCount) / buffer.length(); - const string::value_type encoding = (asciiPercent <= 40) ? 'B' : 'Q'; - - string wordStart("=?" + w.getCharset().getName() + "?" + encoding + "?"); - string wordEnd("?="); - - const string::size_type minWordLength = wordStart.length() + wordEnd.length(); - const string::size_type maxLineLength2 = (maxLineLength3 < minWordLength + 1) - ? maxLineLength3 + minWordLength + 1 : maxLineLength3; - - // Checks whether remaining space on this line is usable. If too few - // characters can be encoded, start a new line. - bool startNewLine = true; - - if (curLineLength + 2 < maxLineLength2) - { - const string::size_type remainingSpaceOnLine = maxLineLength2 - curLineLength - 2; - - if (remainingSpaceOnLine < minWordLength + 10) - { - // Space for no more than 10 encoded chars! - // It is not worth while to continue on this line... - startNewLine = true; - } - else - { - // OK, there is enough usable space on the current line. - startNewLine = false; - } - } - - if (startNewLine) - { - os << NEW_LINE_SEQUENCE; - curLineLength = NEW_LINE_SEQUENCE_LENGTH; - } - - // Encode and fold input buffer - string::const_iterator pos = buffer.begin(); - string::size_type remaining = buffer.length(); - - encoder* theEncoder = ((encoding == 'B') - ? ((encoder*) new encoderB64) - : ((encoder*) new encoderQP)); - - string qpEncodedBuffer; - - if (encoding == 'Q') - { - theEncoder->getProperties()["rfc2047"] = true; - - // In the case of Quoted-Printable encoding, we cannot simply encode input - // buffer line by line. So, we encode the whole buffer and we will fold it - // in the next loop... - utility::inputStreamStringAdapter in(buffer); - utility::outputStreamStringAdapter out(qpEncodedBuffer); - - theEncoder->encode(in, out); - - pos = qpEncodedBuffer.begin(); - remaining = qpEncodedBuffer.length(); - } - -#if 1 - if (curLineLength != 1 && wi != 0) - { - os << " "; // Separate from previous word - ++curLineLength; - } -#endif - - for ( ; remaining ; ) - { - // Start a new encoded word - os << wordStart; - curLineLength += minWordLength; - - // Compute the number of encoded chars that will fit on this line - const string::size_type fit = maxLineLength2 - curLineLength; - - // Base-64 encoding - if (encoding == 'B') - { - // TODO: WARNING! "Any encoded word which encodes a non-integral - // number of characters or octets is incorrectly formed." - - // Here, we have a formula to compute the maximum number of source - // characters to encode knowing the maximum number of encoded chars - // (with Base64, 3 bytes of input provide 4 bytes of output). - string::size_type count = (fit > 1) ? ((fit - 1) * 3) / 4 : 1; - if (count > remaining) count = remaining; - - utility::inputStreamStringAdapter in - (buffer, pos - buffer.begin(), pos - buffer.begin() + count); - - curLineLength += theEncoder->encode(in, os); - - pos += count; - remaining -= count; - } - // Quoted-Printable encoding - else - { - // TODO: WARNING! "Any encoded word which encodes a non-integral - // number of characters or octets is incorrectly formed." - - // All we have to do here is to take a certain number of character - // (that is less than or equal to "fit") from the QP encoded buffer, - // but we also make sure not to fold a "=XY" encoded char. - const string::const_iterator qpEnd = qpEncodedBuffer.end(); - string::const_iterator lastFoldPos = pos; - string::const_iterator p = pos; - string::size_type n = 0; - - while (n < fit && p != qpEnd) - { - if (*p == '=') - { - if (n + 3 >= fit) - { - lastFoldPos = p; - break; - } - - p += 3; - n += 3; - } - else - { - ++p; - ++n; - } - } - - if (lastFoldPos == pos) - lastFoldPos = p; - - os << string(pos, lastFoldPos); - - curLineLength += (lastFoldPos - pos) + 1; - - pos += n; - remaining -= n; - } - - // End of the encoded word - os << wordEnd; - - if (remaining) - { - os << NEW_LINE_SEQUENCE; - curLineLength = NEW_LINE_SEQUENCE_LENGTH; - } - } - - delete (theEncoder); - } - } - - if (lastLineLength) - *lastLineLength = curLineLength; -} - - -void decodeAndUnfoldText(const string::const_iterator& inStart, const string::const_iterator& inEnd, text& out) -{ - // NOTE: See RFC-2047, Pages 11-12 for knowing about handling - // of white-spaces between encoded words. - - out.removeAllWords(); - - string::const_iterator p = inStart; - const string::const_iterator end = inEnd; - - const charset defaultCharset(charsets::US_ASCII); - charset prevWordCharset(defaultCharset); - - bool prevIsEncoded = false; - - string::const_iterator prevPos = p; - - for ( ; ; ) - { - if (p == end || *p == '\n') - { - string::const_iterator textEnd = p; - - if (textEnd != inStart && *(textEnd - 1) == '\r') - --textEnd; - - if (textEnd != prevPos) - { - if (!out.isEmpty() && prevWordCharset == defaultCharset) - { - out.getWordAt(out.getWordCount() - 1)->getBuffer() += string(prevPos, textEnd); - } - else - { - prevWordCharset = defaultCharset; - out.appendWord(new word(string(prevPos, textEnd), defaultCharset)); - prevIsEncoded = false; - } - } - - if (p == end) - { - // Finished - break; - } - - // Skip the new-line character - prevPos = ++p; - } - else if (*p == '=' && (p + 1) != end && *(p + 1) == '?') - { - string::const_iterator wordPos = p; - p += 2; // skip '=?' - - if (p != end) - { - const string::const_iterator charsetPos = p; - - for ( ; p != end && *p != '?' ; ++p); - - if (p != end) // a charset is specified - { - const string::const_iterator charsetEnd = p; - const string::const_iterator encPos = ++p; // skip '?' - - for ( ; p != end && *p != '?' ; ++p); - - if (p != end) // an encoding is specified - { - //const string::const_iterator encEnd = p; - const string::const_iterator dataPos = ++p; // skip '?' - - for ( ; p != end && !(*p == '?' && *(p + 1) == '=') ; ++p); - - if (p != end) // some data is specified - { - const string::const_iterator dataEnd = p; - p += 2; // skip '?=' - - encoder* theEncoder = NULL; - - // Base-64 encoding - if (*encPos == 'B' || *encPos == 'b') - { - theEncoder = new encoderB64; - } - // Quoted-Printable encoding - else if (*encPos == 'Q' || *encPos == 'q') - { - theEncoder = new encoderQP; - theEncoder->getProperties()["rfc2047"] = true; - } - - if (theEncoder) - { - // Decode text - string decodedBuffer; - - utility::inputStreamStringAdapter ein(string(dataPos, dataEnd)); - utility::outputStreamStringAdapter eout(decodedBuffer); - - theEncoder->decode(ein, eout); - delete (theEncoder); - - // Append all the unencoded text before this word - if (prevPos != wordPos) - { - string::const_iterator p = prevPos; - - if (prevIsEncoded) - { - // Check whether there are only white-spaces between - // the two encoded words - for ( ; (p != wordPos) && isspace(*p) ; ++p); - } - - if (p != wordPos) // if not empty - { - if (!out.isEmpty() && prevWordCharset == defaultCharset) - { - out.getWordAt(out.getWordCount() - 1)-> - getBuffer() += string(prevPos, wordPos); - } - else - { - out.appendWord(new word - (string(prevPos, wordPos), defaultCharset)); - - prevWordCharset = defaultCharset; - } - } - } - - // Append this fresh decoded word to output text - charset thisCharset(string(charsetPos, charsetEnd)); - - if (!out.isEmpty() && prevWordCharset == thisCharset) - { - out.getWordAt(out.getWordCount() - 1)-> - getBuffer() += decodedBuffer; - } - else - { - prevWordCharset = thisCharset; - out.appendWord(new word(decodedBuffer, thisCharset)); - } - - // This word has been decoded: we can advance in the input buffer - prevPos = p; - prevIsEncoded = true; - } - else - { - // Unknown encoding: can't decode this word, we will - // treat this word as ordinary text (RFC-2047, Page 9). - } - } - } - } - } - } - else - { - ++p; - } - - for ( ; p != end && *p != '=' && *p != '\n' ; ++p); - } -} - - -void decodeAndUnfoldText(const string& in, text& out) -{ - decodeAndUnfoldText(in.begin(), in.end(), out); -} - - - // // V-Mime Initializer // ==================== diff --git a/src/base.hpp b/src/base.hpp index 79203fdf..735b8893 100644 --- a/src/base.hpp +++ b/src/base.hpp @@ -91,32 +91,6 @@ namespace vmime } - // Field contents encoding (RFC-2047 and folding) - void encodeAndFoldText(utility::outputStream& os, const text& in, const string::size_type maxLineLength, const string::size_type firstLineOffset, string::size_type* lastLineLength, const int flags); - void decodeAndUnfoldText(const string& in, text& out); - void decodeAndUnfoldText(const string::const_iterator& inStart, const string::const_iterator& inEnd, text& out); - - - // - // Some constants - // - - // Flags used by "encodeAndFoldText" function - namespace encodeAndFoldFlags - { - enum - { - // If both "forceNoEncoding" and "forceEncoding" are specified, - // "forceNoEncoding" is used by default. - forceNoEncoding = (1 << 0), - forceEncoding = (1 << 1), - - noNewLineSequence = (1 << 2), - - none = 0 - }; - } - /* RFC#2822 diff --git a/src/body.cpp b/src/body.cpp index 78f92b4d..da279b9b 100644 --- a/src/body.cpp +++ b/src/body.cpp @@ -258,8 +258,10 @@ void body::generate(utility::outputStream& os, const string::size_type maxLineLe if (!prologText.empty()) { - encodeAndFoldText(os, text(word(prologText, getCharset())), maxLineLength, 0, - NULL, encodeAndFoldFlags::forceNoEncoding | encodeAndFoldFlags::noNewLineSequence); + text prolog(word(prologText, getCharset())); + + prolog.encodeAndFold(os, maxLineLength, 0, + NULL, text::FORCE_NO_ENCODING | text::NO_NEW_LINE_SEQUENCE); os << CRLF; } @@ -279,8 +281,10 @@ void body::generate(utility::outputStream& os, const string::size_type maxLineLe if (!epilogText.empty()) { - encodeAndFoldText(os, text(word(epilogText, getCharset())), maxLineLength, 0, - NULL, encodeAndFoldFlags::forceNoEncoding | encodeAndFoldFlags::noNewLineSequence); + text epilog(word(epilogText, getCharset())); + + epilog.encodeAndFold(os, maxLineLength, 0, + NULL, text::FORCE_NO_ENCODING | text::NO_NEW_LINE_SEQUENCE); os << CRLF; } diff --git a/src/mailbox.cpp b/src/mailbox.cpp index 1420f738..a144de7b 100644 --- a/src/mailbox.cpp +++ b/src/mailbox.cpp @@ -316,7 +316,7 @@ void mailbox::parse(const string& buffer, const string::size_type position, } else { - decodeAndUnfoldText(name, m_name); + text::decodeAndUnfold(name, &m_name); m_email.empty(); m_email.reserve(address.size()); @@ -405,8 +405,8 @@ void mailbox::generate(utility::outputStream& os, const string::size_type maxLin string::size_type pos = curLinePos; bool newLine = true; - encodeAndFoldText(os, m_name, maxLineLength, pos, &pos, - forceEncode ? encodeAndFoldFlags::forceEncoding : encodeAndFoldFlags::none); + m_name.encodeAndFold(os, maxLineLength, pos, &pos, + forceEncode ? text::FORCE_ENCODING : 0); if (pos + m_email.length() + 3 > maxLineLength) { diff --git a/src/mailboxGroup.cpp b/src/mailboxGroup.cpp index c266a331..505e8448 100644 --- a/src/mailboxGroup.cpp +++ b/src/mailboxGroup.cpp @@ -100,7 +100,7 @@ void mailboxGroup::parse(const string& buffer, const string::size_type position, } } - decodeAndUnfoldText(name, m_name); + text::decodeAndUnfold(name, &m_name); if (newPosition) *newPosition = end; @@ -152,8 +152,8 @@ void mailboxGroup::generate(utility::outputStream& os, const string::size_type m string::size_type pos = curLinePos; - encodeAndFoldText(os, m_name, maxLineLength - 2, pos, &pos, - forceEncode ? encodeAndFoldFlags::forceEncoding : encodeAndFoldFlags::none); + m_name.encodeAndFold(os, maxLineLength - 2, pos, &pos, + forceEncode ? text::FORCE_ENCODING : 0); os << ":"; ++pos; diff --git a/src/messaging/IMAPMessage.cpp b/src/messaging/IMAPMessage.cpp index 56a3287f..72f9ce31 100644 --- a/src/messaging/IMAPMessage.cpp +++ b/src/messaging/IMAPMessage.cpp @@ -602,7 +602,7 @@ void IMAPMessage::processFetchResponse // Subject text subject; - decodeAndUnfoldText(env->env_subject()->value(), subject); + text::decodeAndUnfold(env->env_subject()->value(), &subject); hdr.Subject().setValue(subject); @@ -724,7 +724,7 @@ void IMAPMessage::convertAddressList const IMAPParser::address& addr = **it; text name; - decodeAndUnfoldText(addr.addr_name()->value(), name); + text::decodeAndUnfold(addr.addr_name()->value(), &name); string email = addr.addr_mailbox()->value() + "@" + addr.addr_host()->value(); diff --git a/src/relay.cpp b/src/relay.cpp index 25f3aac7..2edc9a28 100644 --- a/src/relay.cpp +++ b/src/relay.cpp @@ -213,8 +213,8 @@ void relay::generate(utility::outputStream& os, const string::size_type maxLineL oss << "; " << m_date.generate(); - encodeAndFoldText(os, text(oss.str()), maxLineLength, - curLinePos, newLinePos, encodeAndFoldFlags::forceNoEncoding); + text(oss.str()).encodeAndFold(os, maxLineLength, + curLinePos, newLinePos, text::FORCE_NO_ENCODING); } diff --git a/src/text.cpp b/src/text.cpp index 804507b1..5a5a1250 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -19,6 +19,12 @@ #include "text.hpp" +#include "utility/stringUtils.hpp" + +#include "encoder.hpp" +#include "encoderB64.hpp" +#include "encoderQP.hpp" + namespace vmime { @@ -63,7 +69,7 @@ text::~text() void text::parse(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition) { - decodeAndUnfoldText(buffer.begin() + position, buffer.begin() + end, *this); + decodeAndUnfold(buffer.begin() + position, buffer.begin() + end, *this); if (newPosition) *newPosition = end; @@ -73,7 +79,7 @@ void text::parse(const string& buffer, const string::size_type position, void text::generate(utility::outputStream& os, const string::size_type maxLineLength, const string::size_type curLinePos, string::size_type* newLinePos) const { - encodeAndFoldText(os, *this, maxLineLength, curLinePos, newLinePos, encodeAndFoldFlags::none); + encodeAndFold(os, maxLineLength, curLinePos, newLinePos, 0); } @@ -315,4 +321,521 @@ text* text::newFromString(const string& in, const charset& ch, text* generateInE } +void text::encodeAndFold(utility::outputStream& os, const string::size_type maxLineLength, + const string::size_type firstLineOffset, string::size_type* lastLineLength, const int flags) const +{ + string::size_type curLineLength = firstLineOffset; + + for (int wi = 0 ; wi < getWordCount() ; ++wi) + { + const word& w = *getWordAt(wi); + const string& buffer = w.getBuffer(); + + // Calculate the number of ASCII chars to check whether encoding is needed + // and _which_ encoding to use. + const string::size_type asciiCount = + stringUtils::countASCIIchars(buffer.begin(), buffer.end()); + + bool noEncoding = (flags & FORCE_NO_ENCODING) || + (!(flags & FORCE_ENCODING) && asciiCount == buffer.length()); + + if (noEncoding) + { + // We will fold lines without encoding them. + + string::const_iterator lastWSpos = buffer.end(); // last white-space position + string::const_iterator curLineStart = buffer.begin(); // current line start + + string::const_iterator p = buffer.begin(); + const string::const_iterator end = buffer.end(); + + bool finished = false; + bool newLine = false; + + while (!finished) + { + for ( ; p != end ; ++p, ++curLineLength) + { + // Exceeded maximum line length, but we have found a white-space + // where we can cut the line... + if (curLineLength >= maxLineLength && lastWSpos != end) + break; + + if (*p == ' ' || *p == '\t') + { + // Remember the position of this white-space character + lastWSpos = p; + } + } + + if (p != end) + ++curLineLength; + + //if (p == end || curLineLength >= maxLineLength) + { + if (p == end || lastWSpos == end) + { + // If we are here, it means that we have found no whitespace + // before the first "maxLineLength" characters. In this case, + // we write the full line no matter of the max line length... + + if (!newLine && p != end && lastWSpos == end && + wi != 0 && curLineStart == buffer.begin()) + { + // Here, we are continuing on the line of previous encoded + // word, but there is not even enough space to put the + // first word of this line, so we start a new line. + if (flags & NO_NEW_LINE_SEQUENCE) + { + os << CRLF; + curLineLength = 0; + } + else + { + os << NEW_LINE_SEQUENCE; + curLineLength = NEW_LINE_SEQUENCE_LENGTH; + } + + p = curLineStart; + lastWSpos = end; + newLine = true; + } + else + { + os << string(curLineStart, p); + + if (p == end) + { + finished = true; + } + else + { + if (flags & NO_NEW_LINE_SEQUENCE) + { + os << CRLF; + curLineLength = 0; + } + else + { + os << NEW_LINE_SEQUENCE; + curLineLength = NEW_LINE_SEQUENCE_LENGTH; + } + + curLineStart = p; + lastWSpos = end; + newLine = true; + } + } + } + else + { + // In this case, there will not be enough space on the line for all the + // characters _after_ the last white-space; so we cut the line at this + // last white-space. + +#if 1 + if (curLineLength != 1 && wi != 0) + os << " "; // Separate from previous word +#endif + + os << string(curLineStart, lastWSpos); + + if (flags & NO_NEW_LINE_SEQUENCE) + { + os << CRLF; + curLineLength = 0; + } + else + { + os << NEW_LINE_SEQUENCE; + curLineLength = NEW_LINE_SEQUENCE_LENGTH; + } + + curLineStart = lastWSpos + 1; + + p = lastWSpos + 1; + lastWSpos = end; + newLine = true; + } + } + } + } + /* + RFC #2047: + 4. Encodings + + Initially, the legal values for "encoding" are "Q" and "B". These + encodings are described below. The "Q" encoding is recommended for + use when most of the characters to be encoded are in the ASCII + character set; otherwise, the "B" encoding should be used. + Nevertheless, a mail reader which claims to recognize 'encoded-word's + MUST be able to accept either encoding for any character set which it + supports. + */ + else + { + // We will encode _AND_ fold lines + + /* + RFC #2047: + 2. Syntax of encoded-words + + " While there is no limit to the length of a multiple-line header + field, each line of a header field that contains one or more + 'encoded-word's is limited to 76 characters. " + */ + + const string::size_type maxLineLength3 = + (maxLineLength == lineLengthLimits::infinite) + ? maxLineLength + : std::min(maxLineLength, (const string::size_type) 76); + + // Base64 if more than 60% non-ascii, quoted-printable else (default) + const string::size_type asciiPercent = (100 * asciiCount) / buffer.length(); + const string::value_type encoding = (asciiPercent <= 40) ? 'B' : 'Q'; + + string wordStart("=?" + w.getCharset().getName() + "?" + encoding + "?"); + string wordEnd("?="); + + const string::size_type minWordLength = wordStart.length() + wordEnd.length(); + const string::size_type maxLineLength2 = (maxLineLength3 < minWordLength + 1) + ? maxLineLength3 + minWordLength + 1 : maxLineLength3; + + // Checks whether remaining space on this line is usable. If too few + // characters can be encoded, start a new line. + bool startNewLine = true; + + if (curLineLength + 2 < maxLineLength2) + { + const string::size_type remainingSpaceOnLine = maxLineLength2 - curLineLength - 2; + + if (remainingSpaceOnLine < minWordLength + 10) + { + // Space for no more than 10 encoded chars! + // It is not worth while to continue on this line... + startNewLine = true; + } + else + { + // OK, there is enough usable space on the current line. + startNewLine = false; + } + } + + if (startNewLine) + { + os << NEW_LINE_SEQUENCE; + curLineLength = NEW_LINE_SEQUENCE_LENGTH; + } + + // Encode and fold input buffer + string::const_iterator pos = buffer.begin(); + string::size_type remaining = buffer.length(); + + encoder* theEncoder = ((encoding == 'B') + ? ((encoder*) new encoderB64) + : ((encoder*) new encoderQP)); + + string qpEncodedBuffer; + + if (encoding == 'Q') + { + theEncoder->getProperties()["rfc2047"] = true; + + // In the case of Quoted-Printable encoding, we cannot simply encode input + // buffer line by line. So, we encode the whole buffer and we will fold it + // in the next loop... + utility::inputStreamStringAdapter in(buffer); + utility::outputStreamStringAdapter out(qpEncodedBuffer); + + theEncoder->encode(in, out); + + pos = qpEncodedBuffer.begin(); + remaining = qpEncodedBuffer.length(); + } + +#if 1 + if (curLineLength != 1 && wi != 0) + { + os << " "; // Separate from previous word + ++curLineLength; + } +#endif + + for ( ; remaining ; ) + { + // Start a new encoded word + os << wordStart; + curLineLength += minWordLength; + + // Compute the number of encoded chars that will fit on this line + const string::size_type fit = maxLineLength2 - curLineLength; + + // Base-64 encoding + if (encoding == 'B') + { + // TODO: WARNING! "Any encoded word which encodes a non-integral + // number of characters or octets is incorrectly formed." + + // Here, we have a formula to compute the maximum number of source + // characters to encode knowing the maximum number of encoded chars + // (with Base64, 3 bytes of input provide 4 bytes of output). + string::size_type count = (fit > 1) ? ((fit - 1) * 3) / 4 : 1; + if (count > remaining) count = remaining; + + utility::inputStreamStringAdapter in + (buffer, pos - buffer.begin(), pos - buffer.begin() + count); + + curLineLength += theEncoder->encode(in, os); + + pos += count; + remaining -= count; + } + // Quoted-Printable encoding + else + { + // TODO: WARNING! "Any encoded word which encodes a non-integral + // number of characters or octets is incorrectly formed." + + // All we have to do here is to take a certain number of character + // (that is less than or equal to "fit") from the QP encoded buffer, + // but we also make sure not to fold a "=XY" encoded char. + const string::const_iterator qpEnd = qpEncodedBuffer.end(); + string::const_iterator lastFoldPos = pos; + string::const_iterator p = pos; + string::size_type n = 0; + + while (n < fit && p != qpEnd) + { + if (*p == '=') + { + if (n + 3 >= fit) + { + lastFoldPos = p; + break; + } + + p += 3; + n += 3; + } + else + { + ++p; + ++n; + } + } + + if (lastFoldPos == pos) + lastFoldPos = p; + + os << string(pos, lastFoldPos); + + curLineLength += (lastFoldPos - pos) + 1; + + pos += n; + remaining -= n; + } + + // End of the encoded word + os << wordEnd; + + if (remaining) + { + os << NEW_LINE_SEQUENCE; + curLineLength = NEW_LINE_SEQUENCE_LENGTH; + } + } + + delete (theEncoder); + } + } + + if (lastLineLength) + *lastLineLength = curLineLength; +} + + +text* text::decodeAndUnfold(const string& in, text* generateInExisting) +{ + text* out = (generateInExisting != NULL) ? generateInExisting : new text(); + + out->removeAllWords(); + + decodeAndUnfold(in.begin(), in.end(), *out); + + return (out); +} + + +void text::decodeAndUnfold(const string::const_iterator& inStart, const string::const_iterator& inEnd, text& out) +{ + // NOTE: See RFC-2047, Pages 11-12 for knowing about handling + // of white-spaces between encoded words. + + out.removeAllWords(); + + string::const_iterator p = inStart; + const string::const_iterator end = inEnd; + + const charset defaultCharset(charsets::US_ASCII); + charset prevWordCharset(defaultCharset); + + bool prevIsEncoded = false; + + string::const_iterator prevPos = p; + + for ( ; ; ) + { + if (p == end || *p == '\n') + { + string::const_iterator textEnd = p; + + if (textEnd != inStart && *(textEnd - 1) == '\r') + --textEnd; + + if (textEnd != prevPos) + { + if (!out.isEmpty() && prevWordCharset == defaultCharset) + { + out.getWordAt(out.getWordCount() - 1)->getBuffer() += string(prevPos, textEnd); + } + else + { + prevWordCharset = defaultCharset; + out.appendWord(new word(string(prevPos, textEnd), defaultCharset)); + prevIsEncoded = false; + } + } + + if (p == end) + { + // Finished + break; + } + + // Skip the new-line character + prevPos = ++p; + } + else if (*p == '=' && (p + 1) != end && *(p + 1) == '?') + { + string::const_iterator wordPos = p; + p += 2; // skip '=?' + + if (p != end) + { + const string::const_iterator charsetPos = p; + + for ( ; p != end && *p != '?' ; ++p); + + if (p != end) // a charset is specified + { + const string::const_iterator charsetEnd = p; + const string::const_iterator encPos = ++p; // skip '?' + + for ( ; p != end && *p != '?' ; ++p); + + if (p != end) // an encoding is specified + { + //const string::const_iterator encEnd = p; + const string::const_iterator dataPos = ++p; // skip '?' + + for ( ; p != end && !(*p == '?' && *(p + 1) == '=') ; ++p); + + if (p != end) // some data is specified + { + const string::const_iterator dataEnd = p; + p += 2; // skip '?=' + + encoder* theEncoder = NULL; + + // Base-64 encoding + if (*encPos == 'B' || *encPos == 'b') + { + theEncoder = new encoderB64; + } + // Quoted-Printable encoding + else if (*encPos == 'Q' || *encPos == 'q') + { + theEncoder = new encoderQP; + theEncoder->getProperties()["rfc2047"] = true; + } + + if (theEncoder) + { + // Decode text + string decodedBuffer; + + utility::inputStreamStringAdapter ein(string(dataPos, dataEnd)); + utility::outputStreamStringAdapter eout(decodedBuffer); + + theEncoder->decode(ein, eout); + delete (theEncoder); + + // Append all the unencoded text before this word + if (prevPos != wordPos) + { + string::const_iterator p = prevPos; + + if (prevIsEncoded) + { + // Check whether there are only white-spaces between + // the two encoded words + for ( ; (p != wordPos) && isspace(*p) ; ++p); + } + + if (p != wordPos) // if not empty + { + if (!out.isEmpty() && prevWordCharset == defaultCharset) + { + out.getWordAt(out.getWordCount() - 1)-> + getBuffer() += string(prevPos, wordPos); + } + else + { + out.appendWord(new word + (string(prevPos, wordPos), defaultCharset)); + + prevWordCharset = defaultCharset; + } + } + } + + // Append this fresh decoded word to output text + charset thisCharset(string(charsetPos, charsetEnd)); + + if (!out.isEmpty() && prevWordCharset == thisCharset) + { + out.getWordAt(out.getWordCount() - 1)-> + getBuffer() += decodedBuffer; + } + else + { + prevWordCharset = thisCharset; + out.appendWord(new word(decodedBuffer, thisCharset)); + } + + // This word has been decoded: we can advance in the input buffer + prevPos = p; + prevIsEncoded = true; + } + else + { + // Unknown encoding: can't decode this word, we will + // treat this word as ordinary text (RFC-2047, Page 9). + } + } + } + } + } + } + else + { + ++p; + } + + for ( ; p != end && *p != '=' && *p != '\n' ; ++p); + } +} + + } // vmime diff --git a/src/text.hpp b/src/text.hpp index e05aea3e..e7971f01 100644 --- a/src/text.hpp +++ b/src/text.hpp @@ -159,6 +159,40 @@ public: */ static text* newFromString(const string& in, const charset& ch, text* generateInExisting = NULL); + /** Flags used by "encodeAndFold" function. + */ + enum EncodeAndFoldFlags + { + // NOTE: If both "FORCE_NO_ENCODING" and "FORCE_ENCODING" are + // specified, "FORCE_NO_ENCODING" is used by default. + + FORCE_NO_ENCODING = (1 << 0), /**< Just fold lines, don't encode them. */ + FORCE_ENCODING = (1 << 1), /**< Encode lines even if they are plain ASCII text. */ + NO_NEW_LINE_SEQUENCE = (1 << 2) /**< Use CRLF instead of new-line sequence (CRLF + TAB). */ + }; + + /** Encode and fold text in respect to RFC-2047. + * + * @param os output stream + * @param maxLineLength maximum line length for output + * @param firstLineOffset the first line length (may be useful if the current output line is not empty) + * @param lastLineLength will receive the length of the last line written + * @param flags encoding flags (see EncodeAndFoldFlags) + */ + void encodeAndFold(utility::outputStream& os, const string::size_type maxLineLength, + const string::size_type firstLineOffset, string::size_type* lastLineLength, const int flags) const; + + /** Decode and unfold text (RFC-2047). + * + * @param in input string + * @param generateInExisting if not NULL, the resulting text will be generated + * in the specified object instead of a new created object (in this case, the + * function returns the same pointer). Can be used to avoid copying the + * resulting object into an existing object. + * @return new text object or existing object if generateInExisting != NULL + */ + static text* decodeAndUnfold(const string& in, text* generateInExisting = NULL); + using component::parse; using component::generate; @@ -169,6 +203,8 @@ public: private: + static void decodeAndUnfold(const string::const_iterator& inStart, const string::const_iterator& inEnd, text& out); + std::vector m_words; }; diff --git a/tests/parser/textTest.cpp b/tests/parser/textTest.cpp index 87650811..6dfb5016 100644 --- a/tests/parser/textTest.cpp +++ b/tests/parser/textTest.cpp @@ -114,6 +114,8 @@ namespace assert_eq("2.7", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(2)->getCharset()); } + // TODO: tests for encodeAndFold() and decodeAndUnfold() + public: textTest() : suite("vmime::text")