aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--src/text.cpp520
-rw-r--r--src/word.cpp582
-rw-r--r--vmime/text.hpp2
-rw-r--r--vmime/word.hpp20
5 files changed, 621 insertions, 508 deletions
diff --git a/ChangeLog b/ChangeLog
index 0fdeac69..566a105c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,11 @@
VERSION 0.6.4cvs
================
+2005-03-15 Vincent Richard <[email protected]>
+
+ * text.{cpp|hpp}, word.{cpp|hpp}: moved word parsing from 'text' class
+ to 'word' class, which now inherits from 'component'.
+
2005-03-14 Vincent Richard <[email protected]>
* removed singleton<> and singletonManager classes: useless and quite
diff --git a/src/text.cpp b/src/text.cpp
index e510da12..97a58647 100644
--- a/src/text.cpp
+++ b/src/text.cpp
@@ -19,13 +19,8 @@
#include "vmime/text.hpp"
-#include "vmime/utility/stringUtils.hpp"
#include "vmime/parserHelpers.hpp"
-#include "vmime/encoder.hpp"
-#include "vmime/encoderB64.hpp"
-#include "vmime/encoderQP.hpp"
-
namespace vmime
{
@@ -70,12 +65,18 @@ text::~text()
void text::parse(const string& buffer, const string::size_type position,
const string::size_type end, string::size_type* newPosition)
{
- decodeAndUnfold(buffer.begin() + position, buffer.begin() + end, *this);
+ removeAllWords();
+
+ string::size_type newPos;
+
+ const std::vector <word*> words = word::parseMultiple(buffer, position, end, &newPos);
- setParsedBounds(position, end);
+ copy_vector(words, m_words);
+
+ setParsedBounds(position, newPos);
if (newPosition)
- *newPosition = end;
+ *newPosition = newPos;
}
@@ -331,327 +332,8 @@ void text::encodeAndFold(utility::outputStream& os, const string::size_type maxL
for (int wi = 0 ; wi < getWordCount() ; ++wi)
{
- const word& w = *getWordAt(wi);
- const string& buffer = w.getBuffer();
-
- // Calculate the number of ASCII chars to check whether encoding is needed
- // and _which_ encoding to use.
- const string::size_type asciiCount =
- utility::stringUtils::countASCIIchars(buffer.begin(), buffer.end());
-
- bool noEncoding = (flags & FORCE_NO_ENCODING) ||
- (!(flags & FORCE_ENCODING) && asciiCount == buffer.length());
-
- if (noEncoding)
- {
- // We will fold lines without encoding them.
-
- string::const_iterator lastWSpos = buffer.end(); // last white-space position
- string::const_iterator curLineStart = buffer.begin(); // current line start
-
- string::const_iterator p = buffer.begin();
- const string::const_iterator end = buffer.end();
-
- bool finished = false;
- bool newLine = false;
-
- while (!finished)
- {
- for ( ; p != end ; ++p, ++curLineLength)
- {
- // Exceeded maximum line length, but we have found a white-space
- // where we can cut the line...
- if (curLineLength >= maxLineLength && lastWSpos != end)
- break;
-
- if (*p == ' ' || *p == '\t')
- {
- // Remember the position of this white-space character
- lastWSpos = p;
- }
- }
-
- if (p != end)
- ++curLineLength;
-
- //if (p == end || curLineLength >= maxLineLength)
- {
- if (p == end || lastWSpos == end)
- {
- // If we are here, it means that we have found no whitespace
- // before the first "maxLineLength" characters. In this case,
- // we write the full line no matter of the max line length...
-
- if (!newLine && p != end && lastWSpos == end &&
- wi != 0 && curLineStart == buffer.begin())
- {
- // Here, we are continuing on the line of previous encoded
- // word, but there is not even enough space to put the
- // first word of this line, so we start a new line.
- if (flags & NO_NEW_LINE_SEQUENCE)
- {
- os << CRLF;
- curLineLength = 0;
- }
- else
- {
- os << NEW_LINE_SEQUENCE;
- curLineLength = NEW_LINE_SEQUENCE_LENGTH;
- }
-
- p = curLineStart;
- lastWSpos = end;
- newLine = true;
- }
- else
- {
- os << string(curLineStart, p);
-
- if (p == end)
- {
- finished = true;
- }
- else
- {
- if (flags & NO_NEW_LINE_SEQUENCE)
- {
- os << CRLF;
- curLineLength = 0;
- }
- else
- {
- os << NEW_LINE_SEQUENCE;
- curLineLength = NEW_LINE_SEQUENCE_LENGTH;
- }
-
- curLineStart = p;
- lastWSpos = end;
- newLine = true;
- }
- }
- }
- else
- {
- // In this case, there will not be enough space on the line for all the
- // characters _after_ the last white-space; so we cut the line at this
- // last white-space.
-
-#if 1
- if (curLineLength != 1 && wi != 0)
- os << " "; // Separate from previous word
-#endif
-
- os << string(curLineStart, lastWSpos);
-
- if (flags & NO_NEW_LINE_SEQUENCE)
- {
- os << CRLF;
- curLineLength = 0;
- }
- else
- {
- os << NEW_LINE_SEQUENCE;
- curLineLength = NEW_LINE_SEQUENCE_LENGTH;
- }
-
- curLineStart = lastWSpos + 1;
-
- p = lastWSpos + 1;
- lastWSpos = end;
- newLine = true;
- }
- }
- }
- }
- /*
- RFC #2047:
- 4. Encodings
-
- Initially, the legal values for "encoding" are "Q" and "B". These
- encodings are described below. The "Q" encoding is recommended for
- use when most of the characters to be encoded are in the ASCII
- character set; otherwise, the "B" encoding should be used.
- Nevertheless, a mail reader which claims to recognize 'encoded-word's
- MUST be able to accept either encoding for any character set which it
- supports.
- */
- else
- {
- // We will encode _AND_ fold lines
-
- /*
- RFC #2047:
- 2. Syntax of encoded-words
-
- " While there is no limit to the length of a multiple-line header
- field, each line of a header field that contains one or more
- 'encoded-word's is limited to 76 characters. "
- */
-
- const string::size_type maxLineLength3 =
- (maxLineLength == lineLengthLimits::infinite)
- ? maxLineLength
- : std::min(maxLineLength, static_cast <string::size_type>(76));
-
- // Base64 if more than 60% non-ascii, quoted-printable else (default)
- const string::size_type asciiPercent = (100 * asciiCount) / buffer.length();
- const string::value_type encoding = (asciiPercent <= 40) ? 'B' : 'Q';
-
- string wordStart("=?" + w.getCharset().getName() + "?" + encoding + "?");
- string wordEnd("?=");
-
- const string::size_type minWordLength = wordStart.length() + wordEnd.length();
- const string::size_type maxLineLength2 = (maxLineLength3 < minWordLength + 1)
- ? maxLineLength3 + minWordLength + 1 : maxLineLength3;
-
- // Checks whether remaining space on this line is usable. If too few
- // characters can be encoded, start a new line.
- bool startNewLine = true;
-
- if (curLineLength + 2 < maxLineLength2)
- {
- const string::size_type remainingSpaceOnLine = maxLineLength2 - curLineLength - 2;
-
- if (remainingSpaceOnLine < minWordLength + 10)
- {
- // Space for no more than 10 encoded chars!
- // It is not worth while to continue on this line...
- startNewLine = true;
- }
- else
- {
- // OK, there is enough usable space on the current line.
- startNewLine = false;
- }
- }
-
- if (startNewLine)
- {
- os << NEW_LINE_SEQUENCE;
- curLineLength = NEW_LINE_SEQUENCE_LENGTH;
- }
-
- // Encode and fold input buffer
- string::const_iterator pos = buffer.begin();
- string::size_type remaining = buffer.length();
-
- encoder* theEncoder;
-
- if (encoding == 'B') theEncoder = new encoderB64;
- else theEncoder = new encoderQP;
-
- string qpEncodedBuffer;
-
- if (encoding == 'Q')
- {
- theEncoder->getProperties()["rfc2047"] = true;
-
- // In the case of Quoted-Printable encoding, we cannot simply encode input
- // buffer line by line. So, we encode the whole buffer and we will fold it
- // in the next loop...
- utility::inputStreamStringAdapter in(buffer);
- utility::outputStreamStringAdapter out(qpEncodedBuffer);
-
- theEncoder->encode(in, out);
-
- pos = qpEncodedBuffer.begin();
- remaining = qpEncodedBuffer.length();
- }
-
-#if 1
- if (curLineLength != 1 && wi != 0)
- {
- os << " "; // Separate from previous word
- ++curLineLength;
- }
-#endif
-
- for ( ; remaining ; )
- {
- // Start a new encoded word
- os << wordStart;
- curLineLength += minWordLength;
-
- // Compute the number of encoded chars that will fit on this line
- const string::size_type fit = maxLineLength2 - curLineLength;
-
- // Base-64 encoding
- if (encoding == 'B')
- {
- // TODO: WARNING! "Any encoded word which encodes a non-integral
- // number of characters or octets is incorrectly formed."
-
- // Here, we have a formula to compute the maximum number of source
- // characters to encode knowing the maximum number of encoded chars
- // (with Base64, 3 bytes of input provide 4 bytes of output).
- string::size_type count = (fit > 1) ? ((fit - 1) * 3) / 4 : 1;
- if (count > remaining) count = remaining;
-
- utility::inputStreamStringAdapter in
- (buffer, pos - buffer.begin(), pos - buffer.begin() + count);
-
- curLineLength += theEncoder->encode(in, os);
-
- pos += count;
- remaining -= count;
- }
- // Quoted-Printable encoding
- else
- {
- // TODO: WARNING! "Any encoded word which encodes a non-integral
- // number of characters or octets is incorrectly formed."
-
- // All we have to do here is to take a certain number of character
- // (that is less than or equal to "fit") from the QP encoded buffer,
- // but we also make sure not to fold a "=XY" encoded char.
- const string::const_iterator qpEnd = qpEncodedBuffer.end();
- string::const_iterator lastFoldPos = pos;
- string::const_iterator p = pos;
- string::size_type n = 0;
-
- while (n < fit && p != qpEnd)
- {
- if (*p == '=')
- {
- if (n + 3 >= fit)
- {
- lastFoldPos = p;
- break;
- }
-
- p += 3;
- n += 3;
- }
- else
- {
- ++p;
- ++n;
- }
- }
-
- if (lastFoldPos == pos)
- lastFoldPos = p;
-
- os << string(pos, lastFoldPos);
-
- curLineLength += (lastFoldPos - pos) + 1;
-
- pos += n;
- remaining -= n;
- }
-
- // End of the encoded word
- os << wordEnd;
-
- if (remaining)
- {
- os << NEW_LINE_SEQUENCE;
- curLineLength = NEW_LINE_SEQUENCE_LENGTH;
- }
- }
-
- delete (theEncoder);
- }
+ getWordAt(wi)->generate(os, maxLineLength, curLineLength,
+ &curLineLength, flags, (wi == 0));
}
if (lastLineLength)
@@ -665,187 +347,21 @@ text* text::decodeAndUnfold(const string& in, text* generateInExisting)
out->removeAllWords();
- decodeAndUnfold(in.begin(), in.end(), *out);
+ const std::vector <word*> words = word::parseMultiple(in, 0, in.length(), NULL);
+
+ copy_vector(words, out->m_words);
return (out);
}
-void text::decodeAndUnfold(const string::const_iterator& inStart, const string::const_iterator& inEnd, text& out)
+const std::vector <const component*> text::getChildComponents() const
{
- // NOTE: See RFC-2047, Pages 11-12 for knowing about handling
- // of white-spaces between encoded words.
-
- out.removeAllWords();
-
- string::const_iterator p = inStart;
- const string::const_iterator end = inEnd;
-
- const charset defaultCharset(charsets::US_ASCII);
- charset prevWordCharset(defaultCharset);
-
- bool prevIsEncoded = false;
-
- string::const_iterator prevPos = p;
-
- for ( ; ; )
- {
- if (p == end) // || *p == '\n')
- {
- string::const_iterator textEnd = p;
-
- if (textEnd != inStart && *(textEnd - 1) == '\r')
- --textEnd;
-
- if (textEnd != prevPos)
- {
- if (!out.isEmpty() && prevWordCharset == defaultCharset)
- {
- out.getWordAt(out.getWordCount() - 1)->getBuffer() += string(prevPos, textEnd);
- }
- else
- {
- prevWordCharset = defaultCharset;
- out.appendWord(new word(string(prevPos, textEnd), defaultCharset));
- prevIsEncoded = false;
- }
- }
-
- if (p == end)
- {
- // Finished
- break;
- }
-
- // Skip the new-line character
- prevPos = ++p;
- }
- else if (*p == '=' && (p + 1) != end && *(p + 1) == '?')
- {
- string::const_iterator wordPos = p;
- p += 2; // skip '=?'
+ std::vector <const component*> list;
- if (p != end)
- {
- const string::const_iterator charsetPos = p;
+ copy_vector(m_words, list);
- for ( ; p != end && *p != '?' ; ++p);
-
- if (p != end) // a charset is specified
- {
- const string::const_iterator charsetEnd = p;
- const string::const_iterator encPos = ++p; // skip '?'
-
- for ( ; p != end && *p != '?' ; ++p);
-
- if (p != end) // an encoding is specified
- {
- //const string::const_iterator encEnd = p;
- const string::const_iterator dataPos = ++p; // skip '?'
-
- for ( ; p != end && !(*p == '?' && *(p + 1) == '=') ; ++p);
-
- if (p != end) // some data is specified
- {
- const string::const_iterator dataEnd = p;
- p += 2; // skip '?='
-
- encoder* theEncoder = NULL;
-
- // Base-64 encoding
- if (*encPos == 'B' || *encPos == 'b')
- {
- theEncoder = new encoderB64;
- }
- // Quoted-Printable encoding
- else if (*encPos == 'Q' || *encPos == 'q')
- {
- theEncoder = new encoderQP;
- theEncoder->getProperties()["rfc2047"] = true;
- }
-
- if (theEncoder)
- {
- // Decode text
- string decodedBuffer;
-
- utility::inputStreamStringAdapter ein(string(dataPos, dataEnd));
- utility::outputStreamStringAdapter eout(decodedBuffer);
-
- theEncoder->decode(ein, eout);
- delete (theEncoder);
-
- // Append all the unencoded text before this word
- if (prevPos != wordPos)
- {
- string::const_iterator p = prevPos;
-
- if (prevIsEncoded)
- {
- // Check whether there are only white-spaces between
- // the two encoded words
- for ( ; (p != wordPos) && parserHelpers::isspace(*p) ; ++p);
- }
-
- if (p != wordPos) // if not empty
- {
- if (!out.isEmpty() && prevWordCharset == defaultCharset)
- {
- out.getWordAt(out.getWordCount() - 1)->
- getBuffer() += string(prevPos, wordPos);
- }
- else
- {
- out.appendWord(new word
- (string(prevPos, wordPos), defaultCharset));
-
- prevWordCharset = defaultCharset;
- }
- }
- }
-
- // Append this fresh decoded word to output text
- charset thisCharset(string(charsetPos, charsetEnd));
-
- if (!out.isEmpty() && prevWordCharset == thisCharset)
- {
- out.getWordAt(out.getWordCount() - 1)->
- getBuffer() += decodedBuffer;
- }
- else
- {
- prevWordCharset = thisCharset;
- out.appendWord(new word(decodedBuffer, thisCharset));
- }
-
- // This word has been decoded: we can advance in the input buffer
- prevPos = p;
- prevIsEncoded = true;
- }
- else
- {
- // Unknown encoding: can't decode this word, we will
- // treat this word as ordinary text (RFC-2047, Page 9).
- }
- }
- }
- }
- }
- }
- else
- {
- ++p;
- }
-
- for ( ; p != end && *p != '=' && *p != '\n' ; ++p);
- }
-}
-
-
-const std::vector <const component*> text::getChildComponents() const
-{
- // TODO: 'word' should inherit from 'component'
- return std::vector <const component*>();
+ return (list);
}
diff --git a/src/word.cpp b/src/word.cpp
index 6801fb47..7fde1fab 100644
--- a/src/word.cpp
+++ b/src/word.cpp
@@ -18,6 +18,15 @@
//
#include "vmime/word.hpp"
+#include "vmime/text.hpp"
+
+#include "vmime/utility/stringUtils.hpp"
+#include "vmime/utility/smartPtr.hpp"
+#include "vmime/parserHelpers.hpp"
+
+#include "vmime/encoder.hpp"
+#include "vmime/encoderB64.hpp"
+#include "vmime/encoderQP.hpp"
namespace vmime
@@ -31,7 +40,7 @@ word::word()
word::word(const word& w)
- : m_buffer(w.m_buffer), m_charset(w.m_charset)
+ : component(), m_buffer(w.m_buffer), m_charset(w.m_charset)
{
}
@@ -48,6 +57,567 @@ word::word(const string& buffer, const charset& charset)
}
+word* word::parseNext(const string& buffer, const string::size_type position,
+ const string::size_type end, string::size_type* newPosition,
+ bool prevIsEncoded, bool* isEncoded, bool isFirst)
+{
+ string::size_type pos = position;
+
+ // Ignore white-spaces:
+ // - before the first word
+ // - between two encoded words
+ // - after the last word
+ while (pos < end && parserHelpers::isspace(buffer[pos]))
+ ++pos;
+
+ string::size_type startPos = pos;
+ string unencoded;
+
+ while (pos < end)
+ {
+ // End of line: does not occur in the middle of an encoded word. This is
+ // used to remove folding white-spaces from unencoded text.
+ if (buffer[pos] == '\n')
+ {
+ string::size_type endPos = pos;
+
+ if (pos > position && buffer[pos - 1] == '\r')
+ --endPos;
+
+ while (pos != end && parserHelpers::isspace(buffer[pos]))
+ ++pos;
+
+ unencoded += string(buffer.begin() + startPos, buffer.begin() + endPos);
+ unencoded += ' ';
+
+ startPos = pos;
+ }
+ // Start of an encoded word
+ else if (pos + 6 < end && // 6 = "=?(.+)?(.*)?="
+ buffer[pos] == '=' && buffer[pos + 1] == '?')
+ {
+ // Check whether there is some unencoded text before
+ unencoded += string(buffer.begin() + startPos, buffer.begin() + pos);
+
+ if (!unencoded.empty())
+ {
+ word* w = new word(unencoded, charset(charsets::US_ASCII));
+ w->setParsedBounds(position, pos);
+
+ if (newPosition)
+ *newPosition = pos;
+
+ if (isEncoded)
+ *isEncoded = false;
+
+ return (w);
+ }
+
+ // ...else find the finish sequence '?=' and return an encoded word
+ const string::size_type wordStart = pos;
+
+ pos += 4;
+
+ while (pos < end)
+ {
+ if (buffer[pos] == '\n')
+ {
+ // End of line not allowed in the middle of an encoded word:
+ // treat this text as unencoded text (see *).
+ break;
+ }
+ else if (buffer[pos] == '?' && pos + 1 < end && buffer[pos + 1] == '=')
+ {
+ // Found the finish sequence
+ break;
+ }
+
+ ++pos;
+ }
+
+ if (pos == end) // not a valid word (no finish sequence)
+ continue;
+ else if (buffer[pos] == '\n') // (*)
+ continue;
+
+ pos += 2; // ?=
+
+ word* w = new word();
+ w->parse(buffer, wordStart, pos, NULL);
+
+ if (newPosition)
+ *newPosition = pos;
+
+ if (isEncoded)
+ *isEncoded = true;
+
+ return (w);
+ }
+
+ ++pos;
+ }
+
+ // Treat unencoded text at the end of the buffer
+ if (end != startPos)
+ {
+ if (startPos != pos && !isFirst && prevIsEncoded)
+ unencoded += ' ';
+
+ unencoded += string(buffer.begin() + startPos, buffer.begin() + end);
+
+ word* w = new word(unencoded, charset(charsets::US_ASCII));
+ w->setParsedBounds(position, end);
+
+ if (newPosition)
+ *newPosition = end;
+
+ if (isEncoded)
+ *isEncoded = false;
+
+ return (w);
+ }
+
+ return (NULL);
+}
+
+
+const std::vector <word*> word::parseMultiple(const string& buffer, const string::size_type position,
+ const string::size_type end, string::size_type* newPosition)
+{
+ std::vector <word*> res;
+ word* w = NULL;
+
+ string::size_type pos = position;
+
+ bool prevIsEncoded = false;
+
+ while ((w = word::parseNext(buffer, pos, end, &pos, prevIsEncoded, &prevIsEncoded, (w == NULL))) != NULL)
+ res.push_back(w);
+
+ if (newPosition)
+ *newPosition = pos;
+
+ return (res);
+}
+
+
+void word::parse(const string& buffer, const string::size_type position,
+ const string::size_type end, string::size_type* newPosition)
+{
+ if (position + 6 < end && // 6 = "=?(.+)?(.*)?="
+ buffer[position] == '=' && buffer[position + 1] == '?')
+ {
+ string::const_iterator p = buffer.begin() + position + 2;
+ const string::const_iterator pend = buffer.begin() + end;
+
+ const string::const_iterator charsetPos = p;
+
+ for ( ; p != pend && *p != '?' ; ++p);
+
+ if (p != pend) // a charset is specified
+ {
+ const string::const_iterator charsetEnd = p;
+ const string::const_iterator encPos = ++p; // skip '?'
+
+ for ( ; p != pend && *p != '?' ; ++p);
+
+ if (p != pend) // an encoding is specified
+ {
+ //const string::const_iterator encEnd = p;
+ const string::const_iterator dataPos = ++p; // skip '?'
+
+ for ( ; p != pend && !(*p == '?' && *(p + 1) == '=') ; ++p);
+
+ if (p != pend) // some data is specified
+ {
+ const string::const_iterator dataEnd = p;
+ p += 2; // skip '?='
+
+ encoder* theEncoder = NULL;
+
+ // Base-64 encoding
+ if (*encPos == 'B' || *encPos == 'b')
+ {
+ theEncoder = new encoderB64;
+ }
+ // Quoted-Printable encoding
+ else if (*encPos == 'Q' || *encPos == 'q')
+ {
+ theEncoder = new encoderQP;
+ theEncoder->getProperties()["rfc2047"] = true;
+ }
+
+ if (theEncoder)
+ {
+ // Decode text
+ string decodedBuffer;
+
+ utility::inputStreamStringAdapter ein(string(dataPos, dataEnd));
+ utility::outputStreamStringAdapter eout(decodedBuffer);
+
+ theEncoder->decode(ein, eout);
+ delete (theEncoder);
+
+ m_buffer = decodedBuffer;
+ m_charset = charset(string(charsetPos, charsetEnd));
+
+ setParsedBounds(position, p - buffer.begin());
+
+ if (newPosition)
+ *newPosition = (p - buffer.begin());
+
+ return;
+ }
+ }
+ }
+ }
+ }
+
+ // Unknown encoding or malformed encoded word: treat the buffer as ordinary text (RFC-2047, Page 9).
+ m_buffer = string(buffer.begin() + position, buffer.begin() + end);
+ m_charset = charsets::US_ASCII;
+
+ setParsedBounds(position, end);
+
+ if (newPosition)
+ *newPosition = end;
+}
+
+
+void word::generate(utility::outputStream& os, const string::size_type maxLineLength,
+ const string::size_type curLinePos, string::size_type* newLinePos) const
+{
+ generate(os, maxLineLength, curLinePos, newLinePos, 0, true);
+}
+
+
+void word::generate(utility::outputStream& os, const string::size_type maxLineLength,
+ const string::size_type curLinePos, string::size_type* newLinePos, const int flags,
+ const bool isFirstWord) const
+{
+ string::size_type curLineLength = curLinePos;
+
+ // Calculate the number of ASCII chars to check whether encoding is needed
+ // and _which_ encoding to use.
+ const string::size_type asciiCount =
+ utility::stringUtils::countASCIIchars(m_buffer.begin(), m_buffer.end());
+
+ bool noEncoding = (flags & text::FORCE_NO_ENCODING) ||
+ (!(flags & text::FORCE_ENCODING) && asciiCount == m_buffer.length());
+
+ if (noEncoding)
+ {
+ // We will fold lines without encoding them.
+
+ string::const_iterator lastWSpos = m_buffer.end(); // last white-space position
+ string::const_iterator curLineStart = m_buffer.begin(); // current line start
+
+ string::const_iterator p = m_buffer.begin();
+ const string::const_iterator end = m_buffer.end();
+
+ bool finished = false;
+ bool newLine = false;
+
+ while (!finished)
+ {
+ for ( ; p != end ; ++p, ++curLineLength)
+ {
+ // Exceeded maximum line length, but we have found a white-space
+ // where we can cut the line...
+ if (curLineLength >= maxLineLength && lastWSpos != end)
+ break;
+
+ if (*p == ' ' || *p == '\t')
+ {
+ // Remember the position of this white-space character
+ lastWSpos = p;
+ }
+ }
+
+ if (p != end)
+ ++curLineLength;
+
+ if (p == end || lastWSpos == end)
+ {
+ // If we are here, it means that we have found no whitespace
+ // before the first "maxLineLength" characters. In this case,
+ // we write the full line no matter of the max line length...
+
+ if (!newLine && p != end && lastWSpos == end &&
+ !isFirstWord && curLineStart == m_buffer.begin())
+ {
+ // Here, we are continuing on the line of previous encoded
+ // word, but there is not even enough space to put the
+ // first word of this line, so we start a new line.
+ if (flags & text::NO_NEW_LINE_SEQUENCE)
+ {
+ os << CRLF;
+ curLineLength = 0;
+ }
+ else
+ {
+ os << NEW_LINE_SEQUENCE;
+ curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+ }
+
+ p = curLineStart;
+ lastWSpos = end;
+ newLine = true;
+ }
+ else
+ {
+ os << string(curLineStart, p);
+
+ if (p == end)
+ {
+ finished = true;
+ }
+ else
+ {
+ if (flags & text::NO_NEW_LINE_SEQUENCE)
+ {
+ os << CRLF;
+ curLineLength = 0;
+ }
+ else
+ {
+ os << NEW_LINE_SEQUENCE;
+ curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+ }
+
+ curLineStart = p;
+ lastWSpos = end;
+ newLine = true;
+ }
+ }
+ }
+ else
+ {
+ // In this case, there will not be enough space on the line for all the
+ // characters _after_ the last white-space; so we cut the line at this
+ // last white-space.
+
+#if 1
+ if (curLineLength != 1 && !isFirstWord)
+ os << " "; // Separate from previous word
+#endif
+
+ os << string(curLineStart, lastWSpos);
+
+ if (flags & text::NO_NEW_LINE_SEQUENCE)
+ {
+ os << CRLF;
+ curLineLength = 0;
+ }
+ else
+ {
+ os << NEW_LINE_SEQUENCE;
+ curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+ }
+
+ curLineStart = lastWSpos + 1;
+
+ p = lastWSpos + 1;
+ lastWSpos = end;
+ newLine = true;
+ }
+ }
+ }
+ /*
+ RFC #2047:
+ 4. Encodings
+
+ Initially, the legal values for "encoding" are "Q" and "B". These
+ encodings are described below. The "Q" encoding is recommended for
+ use when most of the characters to be encoded are in the ASCII
+ character set; otherwise, the "B" encoding should be used.
+ Nevertheless, a mail reader which claims to recognize 'encoded-word's
+ MUST be able to accept either encoding for any character set which it
+ supports.
+ */
+ else
+ {
+ // We will encode _AND_ fold lines
+
+ /*
+ RFC #2047:
+ 2. Syntax of encoded-words
+
+ " While there is no limit to the length of a multiple-line header
+ field, each line of a header field that contains one or more
+ 'encoded-word's is limited to 76 characters. "
+ */
+
+ const string::size_type maxLineLength3 =
+ (maxLineLength == lineLengthLimits::infinite)
+ ? maxLineLength
+ : std::min(maxLineLength, static_cast <string::size_type>(76));
+
+ // Base64 if more than 60% non-ascii, quoted-printable else (default)
+ const string::size_type asciiPercent = (m_buffer.length() == 0 ? 100 : (100 * asciiCount) / m_buffer.length());
+ const string::value_type encoding = (asciiPercent <= 40) ? 'B' : 'Q';
+
+ string wordStart("=?" + m_charset.getName() + "?" + encoding + "?");
+ string wordEnd("?=");
+
+ const string::size_type minWordLength = wordStart.length() + wordEnd.length();
+ const string::size_type maxLineLength2 = (maxLineLength3 < minWordLength + 1)
+ ? maxLineLength3 + minWordLength + 1 : maxLineLength3;
+
+ // Checks whether remaining space on this line is usable. If too few
+ // characters can be encoded, start a new line.
+ bool startNewLine = true;
+
+ if (curLineLength + 2 < maxLineLength2)
+ {
+ const string::size_type remainingSpaceOnLine = maxLineLength2 - curLineLength - 2;
+
+ if (remainingSpaceOnLine < minWordLength + 10)
+ {
+ // Space for no more than 10 encoded chars!
+ // It is not worth while to continue on this line...
+ startNewLine = true;
+ }
+ else
+ {
+ // OK, there is enough usable space on the current line.
+ startNewLine = false;
+ }
+ }
+
+ if (startNewLine)
+ {
+ os << NEW_LINE_SEQUENCE;
+ curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+ }
+
+ // Encode and fold input buffer
+ string::const_iterator pos = m_buffer.begin();
+ string::size_type remaining = m_buffer.length();
+
+ encoder* theEncoder = NULL;
+
+ if (encoding == 'B') theEncoder = new encoderB64;
+ else theEncoder = new encoderQP;
+
+ string qpEncodedBuffer;
+
+ if (encoding == 'Q')
+ {
+ theEncoder->getProperties()["rfc2047"] = true;
+
+ // In the case of Quoted-Printable encoding, we cannot simply encode input
+ // buffer line by line. So, we encode the whole buffer and we will fold it
+ // in the next loop...
+ utility::inputStreamStringAdapter in(m_buffer);
+ utility::outputStreamStringAdapter out(qpEncodedBuffer);
+
+ theEncoder->encode(in, out);
+
+ pos = qpEncodedBuffer.begin();
+ remaining = qpEncodedBuffer.length();
+ }
+
+#if 1
+ if (curLineLength != 1 && !isFirstWord)
+ {
+ os << " "; // Separate from previous word
+ ++curLineLength;
+ }
+#endif
+
+ for ( ; remaining ; )
+ {
+ // Start a new encoded word
+ os << wordStart;
+ curLineLength += minWordLength;
+
+ // Compute the number of encoded chars that will fit on this line
+ const string::size_type fit = maxLineLength2 - curLineLength;
+
+ // Base-64 encoding
+ if (encoding == 'B')
+ {
+ // TODO: WARNING! "Any encoded word which encodes a non-integral
+ // number of characters or octets is incorrectly formed."
+
+ // Here, we have a formula to compute the maximum number of source
+ // characters to encode knowing the maximum number of encoded chars
+ // (with Base64, 3 bytes of input provide 4 bytes of output).
+ string::size_type count = (fit > 1) ? ((fit - 1) * 3) / 4 : 1;
+ if (count > remaining) count = remaining;
+
+ utility::inputStreamStringAdapter in
+ (m_buffer, pos - m_buffer.begin(), pos - m_buffer.begin() + count);
+
+ curLineLength += theEncoder->encode(in, os);
+
+ pos += count;
+ remaining -= count;
+ }
+ // Quoted-Printable encoding
+ else
+ {
+ // TODO: WARNING! "Any encoded word which encodes a non-integral
+ // number of characters or octets is incorrectly formed."
+
+ // All we have to do here is to take a certain number of character
+ // (that is less than or equal to "fit") from the QP encoded buffer,
+ // but we also make sure not to fold a "=XY" encoded char.
+ const string::const_iterator qpEnd = qpEncodedBuffer.end();
+ string::const_iterator lastFoldPos = pos;
+ string::const_iterator p = pos;
+ string::size_type n = 0;
+
+ while (n < fit && p != qpEnd)
+ {
+ if (*p == '=')
+ {
+ if (n + 3 >= fit)
+ {
+ lastFoldPos = p;
+ break;
+ }
+
+ p += 3;
+ n += 3;
+ }
+ else
+ {
+ ++p;
+ ++n;
+ }
+ }
+
+ if (lastFoldPos == pos)
+ lastFoldPos = p;
+
+ os << string(pos, lastFoldPos);
+
+ curLineLength += (lastFoldPos - pos) + 1;
+
+ pos += n;
+ remaining -= n;
+ }
+
+ // End of the encoded word
+ os << wordEnd;
+
+ if (remaining)
+ {
+ os << NEW_LINE_SEQUENCE;
+ curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+ }
+ }
+
+ delete (theEncoder);
+ }
+
+ if (newLinePos)
+ *newLinePos = curLineLength;
+}
+
+
#if VMIME_WIDE_CHAR_SUPPORT
const wstring word::getDecodedText() const
@@ -77,8 +647,10 @@ word& word::operator=(const string& s)
}
-void word::copyFrom(const word& w)
+void word::copyFrom(const component& other)
{
+ const word& w = dynamic_cast <const word&>(other);
+
m_buffer = w.m_buffer;
m_charset = w.m_charset;
}
@@ -142,4 +714,10 @@ void word::setBuffer(const string& buffer)
}
+const std::vector <const component*> word::getChildComponents() const
+{
+ return std::vector <const component*>();
+}
+
+
} // vmime
diff --git a/vmime/text.hpp b/vmime/text.hpp
index a7a6a793..4af9c495 100644
--- a/vmime/text.hpp
+++ b/vmime/text.hpp
@@ -205,8 +205,6 @@ public:
private:
- static void decodeAndUnfold(const string::const_iterator& inStart, const string::const_iterator& inEnd, text& out);
-
std::vector <word*> m_words;
};
diff --git a/vmime/word.hpp b/vmime/word.hpp
index c5e07423..b0a9a035 100644
--- a/vmime/word.hpp
+++ b/vmime/word.hpp
@@ -21,6 +21,7 @@
#define VMIME_WORD_HPP_INCLUDED
+#include "vmime/component.hpp"
#include "vmime/charset.hpp"
@@ -32,7 +33,7 @@ namespace vmime
* some text encoded into one specified charset.
*/
-class word
+class word : public component
{
public:
@@ -93,7 +94,7 @@ public:
*
* @param other other word to copy data from
*/
- void copyFrom(const word& other);
+ void copyFrom(const component& other);
/** Clone this word.
*
@@ -101,6 +102,21 @@ public:
*/
word* clone() const;
+
+ using component::parse;
+ using component::generate;
+
+ void parse(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL);
+ void generate(utility::outputStream& os, const string::size_type maxLineLength = lineLengthLimits::infinite, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const;
+
+ void generate(utility::outputStream& os, const string::size_type maxLineLength, const string::size_type curLinePos, string::size_type* newLinePos, const int flags, const bool isFirstWord) const;
+
+ const std::vector <const component*> getChildComponents() const;
+
+ static word* parseNext(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition, bool prevIsEncoded, bool* isEncoded, bool isFirst);
+
+ static const std::vector <word*> parseMultiple(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition);
+
private:
// The "m_buffer" of this word holds the data, and this data is encoded