Moved word parsing from 'text' class to 'word' class.

2005-03-15 10:30:42 +00:00 · 2005-03-15 10:30:42 +00:00 · 1901c6fdb6
commit 1901c6fdb6
parent 5b638bbfdf
5 changed files with 622 additions and 509 deletions
--- a/5
+++ b/5
@ -2,6 +2,11 @@
 VERSION 0.6.4cvs
 ================

+2005-03-15  Vincent Richard  <vincent@vincent-richard.net>
+
+ * text.{cpp|hpp}, word.{cpp|hpp}: moved word parsing from 'text' class
+   to 'word' class, which now inherits from 'component'.
+
 2005-03-14  Vincent Richard  <vincent@vincent-richard.net>

 * removed singleton<> and singletonManager classes: useless and quite
--- a/src/text.cpp
+++ b/src/text.cpp
@ -19,13 +19,8 @@

 #include "vmime/text.hpp"

-#include "vmime/utility/stringUtils.hpp"
 #include "vmime/parserHelpers.hpp"

-#include "vmime/encoder.hpp"
-#include "vmime/encoderB64.hpp"
-#include "vmime/encoderQP.hpp"
-

 namespace vmime
 {
@ -70,12 +65,18 @@ text::~text()
 void text::parse(const string& buffer, const string::size_type position,
 	const string::size_type end, string::size_type* newPosition)
 {
-	decodeAndUnfold(buffer.begin() + position, buffer.begin() + end, *this);
+	removeAllWords();

-	setParsedBounds(position, end);
+	string::size_type newPos;
+
+	const std::vector <word*> words = word::parseMultiple(buffer, position, end, &newPos);
+
+	copy_vector(words, m_words);
+
+	setParsedBounds(position, newPos);

 	if (newPosition)
-		*newPosition = end;
+		*newPosition = newPos;
 }


@ -331,327 +332,8 @@ void text::encodeAndFold(utility::outputStream& os, const string::size_type maxL

 	for (int wi = 0 ; wi < getWordCount() ; ++wi)
 	{
-		const word& w = *getWordAt(wi);
-		const string& buffer = w.getBuffer();
-
-		// Calculate the number of ASCII chars to check whether encoding is needed
-		// and _which_ encoding to use.
-		const string::size_type asciiCount =
-			utility::stringUtils::countASCIIchars(buffer.begin(), buffer.end());
-
-		bool noEncoding = (flags & FORCE_NO_ENCODING) ||
-		    (!(flags & FORCE_ENCODING) && asciiCount == buffer.length());
-
-		if (noEncoding)
-		{
-			// We will fold lines without encoding them.
-
-			string::const_iterator lastWSpos = buffer.end(); // last white-space position
-			string::const_iterator curLineStart = buffer.begin(); // current line start
-
-			string::const_iterator p = buffer.begin();
-			const string::const_iterator end = buffer.end();
-
-			bool finished = false;
-			bool newLine = false;
-
-			while (!finished)
-			{
-				for ( ; p != end ; ++p, ++curLineLength)
-				{
-					// Exceeded maximum line length, but we have found a white-space
-					// where we can cut the line...
-					if (curLineLength >= maxLineLength && lastWSpos != end)
-						break;
-
-					if (*p == ' ' || *p == '\t')
-					{
-						// Remember the position of this white-space character
-						lastWSpos = p;
-					}
-				}
-
-				if (p != end)
-					++curLineLength;
-
-				//if (p == end || curLineLength >= maxLineLength)
-				{
-					if (p == end || lastWSpos == end)
-					{
-						// If we are here, it means that we have found no whitespace
-						// before the first "maxLineLength" characters. In this case,
-						// we write the full line no matter of the max line length...
-
-						if (!newLine && p != end && lastWSpos == end &&
-						    wi != 0 && curLineStart == buffer.begin())
-						{
-							// Here, we are continuing on the line of previous encoded
-							// word, but there is not even enough space to put the
-							// first word of this line, so we start a new line.
-							if (flags & NO_NEW_LINE_SEQUENCE)
-							{
-								os << CRLF;
-								curLineLength = 0;
-							}
-							else
-							{
-								os << NEW_LINE_SEQUENCE;
-								curLineLength = NEW_LINE_SEQUENCE_LENGTH;
-							}
-
-							p = curLineStart;
-							lastWSpos = end;
-							newLine = true;
-						}
-						else
-						{
-							os << string(curLineStart, p);
-
-							if (p == end)
-							{
-								finished = true;
-							}
-							else
-							{
-								if (flags & NO_NEW_LINE_SEQUENCE)
-								{
-									os << CRLF;
-									curLineLength = 0;
-								}
-								else
-								{
-									os << NEW_LINE_SEQUENCE;
-									curLineLength = NEW_LINE_SEQUENCE_LENGTH;
-								}
-
-								curLineStart = p;
-								lastWSpos = end;
-								newLine = true;
-							}
-						}
-					}
-					else
-					{
-						// In this case, there will not be enough space on the line for all the
-						// characters _after_ the last white-space; so we cut the line at this
-						// last white-space.
-
-#if 1
-						if (curLineLength != 1 && wi != 0)
-							os << " "; // Separate from previous word
-#endif
-
-						os << string(curLineStart, lastWSpos);
-
-						if (flags & NO_NEW_LINE_SEQUENCE)
-						{
-							os << CRLF;
-							curLineLength = 0;
-						}
-						else
-						{
-							os << NEW_LINE_SEQUENCE;
-							curLineLength = NEW_LINE_SEQUENCE_LENGTH;
-						}
-
-						curLineStart = lastWSpos + 1;
-
-						p = lastWSpos + 1;
-						lastWSpos = end;
-						newLine = true;
-					}
-				}
-			}
-		}
-		/*
-			RFC #2047:
-			4. Encodings
-
-			Initially, the legal values for "encoding" are "Q" and "B".  These
-			encodings are described below.  The "Q" encoding is recommended for
-			use when most of the characters to be encoded are in the ASCII
-			character set; otherwise, the "B" encoding should be used.
-			Nevertheless, a mail reader which claims to recognize 'encoded-word's
-			MUST be able to accept either encoding for any character set which it
-			supports.
-		*/
-		else
-		{
-			// We will encode _AND_ fold lines
-
-			/*
-				RFC #2047:
-				2. Syntax of encoded-words
-
-				" While there is no limit to the length of a multiple-line header
-				  field, each line of a header field that contains one or more
-				  'encoded-word's is limited to 76 characters. "
-			*/
-
-			const string::size_type maxLineLength3 =
-				(maxLineLength == lineLengthLimits::infinite)
-					? maxLineLength
-					: std::min(maxLineLength, static_cast <string::size_type>(76));
-
-			// Base64 if more than 60% non-ascii, quoted-printable else (default)
-			const string::size_type asciiPercent = (100 * asciiCount) / buffer.length();
-			const string::value_type encoding = (asciiPercent <= 40) ? 'B' : 'Q';
-
-			string wordStart("=?" + w.getCharset().getName() + "?" + encoding + "?");
-			string wordEnd("?=");
-
-			const string::size_type minWordLength = wordStart.length() + wordEnd.length();
-			const string::size_type maxLineLength2 = (maxLineLength3 < minWordLength + 1)
-				? maxLineLength3 + minWordLength + 1 : maxLineLength3;
-
-			// Checks whether remaining space on this line is usable. If too few
-			// characters can be encoded, start a new line.
-			bool startNewLine = true;
-
-			if (curLineLength + 2 < maxLineLength2)
-			{
-				const string::size_type remainingSpaceOnLine = maxLineLength2 - curLineLength - 2;
-
-				if (remainingSpaceOnLine < minWordLength + 10)
-				{
-					// Space for no more than 10 encoded chars!
-					// It is not worth while to continue on this line...
-					startNewLine = true;
-				}
-				else
-				{
-					// OK, there is enough usable space on the current line.
-					startNewLine = false;
-				}
-			}
-
-			if (startNewLine)
-			{
-				os << NEW_LINE_SEQUENCE;
-				curLineLength = NEW_LINE_SEQUENCE_LENGTH;
-			}
-
-			// Encode and fold input buffer
-			string::const_iterator pos = buffer.begin();
-			string::size_type remaining = buffer.length();
-
-			encoder* theEncoder;
-
-			if (encoding == 'B') theEncoder = new encoderB64;
-			else theEncoder = new encoderQP;
-
-			string qpEncodedBuffer;
-
-			if (encoding == 'Q')
-			{
-				theEncoder->getProperties()["rfc2047"] = true;
-
-				// In the case of Quoted-Printable encoding, we cannot simply encode input
-				// buffer line by line. So, we encode the whole buffer and we will fold it
-				// in the next loop...
-				utility::inputStreamStringAdapter in(buffer);
-				utility::outputStreamStringAdapter out(qpEncodedBuffer);
-
-				theEncoder->encode(in, out);
-
-				pos = qpEncodedBuffer.begin();
-				remaining = qpEncodedBuffer.length();
-			}
-
-#if 1
-			if (curLineLength != 1 && wi != 0)
-			{
-				os << " "; // Separate from previous word
-				++curLineLength;
-			}
-#endif
-
-			for ( ; remaining ; )
-			{
-				// Start a new encoded word
-				os << wordStart;
-				curLineLength += minWordLength;
-
-				// Compute the number of encoded chars that will fit on this line
-				const string::size_type fit = maxLineLength2 - curLineLength;
-
-				// Base-64 encoding
-				if (encoding == 'B')
-				{
-					// TODO: WARNING! "Any encoded word which encodes a non-integral
-					// number of characters or octets is incorrectly formed."
-
-					// Here, we have a formula to compute the maximum number of source
-					// characters to encode knowing the maximum number of encoded chars
-					// (with Base64, 3 bytes of input provide 4 bytes of output).
-					string::size_type count = (fit > 1) ? ((fit - 1) * 3) / 4 : 1;
-					if (count > remaining) count = remaining;
-
-					utility::inputStreamStringAdapter in
-						(buffer, pos - buffer.begin(), pos - buffer.begin() + count);
-
-					curLineLength += theEncoder->encode(in, os);
-
-					pos += count;
-					remaining -= count;
-				}
-				// Quoted-Printable encoding
-				else
-				{
-					// TODO: WARNING! "Any encoded word which encodes a non-integral
-					// number of characters or octets is incorrectly formed."
-
-					// All we have to do here is to take a certain number of character
-					// (that is less than or equal to "fit") from the QP encoded buffer,
-					// but we also make sure not to fold a "=XY" encoded char.
-					const string::const_iterator qpEnd = qpEncodedBuffer.end();
-					string::const_iterator lastFoldPos = pos;
-					string::const_iterator p = pos;
-					string::size_type n = 0;
-
-					while (n < fit && p != qpEnd)
-					{
-						if (*p == '=')
-						{
-							if (n + 3 >= fit)
-							{
-								lastFoldPos = p;
-								break;
-							}
-
-							p += 3;
-							n += 3;
-						}
-						else
-						{
-							++p;
-							++n;
-						}
-					}
-
-					if (lastFoldPos == pos)
-						lastFoldPos = p;
-
-					os << string(pos, lastFoldPos);
-
-					curLineLength += (lastFoldPos - pos) + 1;
-
-					pos += n;
-					remaining -= n;
-				}
-
-				// End of the encoded word
-				os << wordEnd;
-
-				if (remaining)
-				{
-					os << NEW_LINE_SEQUENCE;
-					curLineLength = NEW_LINE_SEQUENCE_LENGTH;
-				}
-			}
-
-			delete (theEncoder);
-  		}
+		getWordAt(wi)->generate(os, maxLineLength, curLineLength,
+			&curLineLength, flags, (wi == 0));
 	}

 	if (lastLineLength)
@ -665,187 +347,21 @@ text* text::decodeAndUnfold(const string& in, text* generateInExisting)

 	out->removeAllWords();

-	decodeAndUnfold(in.begin(), in.end(), *out);
+	const std::vector <word*> words = word::parseMultiple(in, 0, in.length(), NULL);
+
+	copy_vector(words, out->m_words);

 	return (out);
 }


-void text::decodeAndUnfold(const string::const_iterator& inStart, const string::const_iterator& inEnd, text& out)
-{
-	// NOTE: See RFC-2047, Pages 11-12 for knowing about handling
-	// of white-spaces between encoded words.
-
-	out.removeAllWords();
-
-	string::const_iterator p = inStart;
-	const string::const_iterator end = inEnd;
-
-	const charset defaultCharset(charsets::US_ASCII);
-	charset prevWordCharset(defaultCharset);
-
-	bool prevIsEncoded = false;
-
-	string::const_iterator prevPos = p;
-
-	for ( ; ; )
-	{
-		if (p == end) // || *p == '\n')
-		{
-			string::const_iterator textEnd = p;
-
-			if (textEnd != inStart && *(textEnd - 1) == '\r')
-				--textEnd;
-
-			if (textEnd != prevPos)
-			{
-				if (!out.isEmpty() && prevWordCharset == defaultCharset)
-				{
-					out.getWordAt(out.getWordCount() - 1)->getBuffer() += string(prevPos, textEnd);
-				}
-				else
-				{
-					prevWordCharset = defaultCharset;
-					out.appendWord(new word(string(prevPos, textEnd), defaultCharset));
-					prevIsEncoded = false;
-				}
-			}
-
-			if (p == end)
-			{
-				// Finished
-				break;
-			}
-
-			// Skip the new-line character
-			prevPos = ++p;
-		}
-		else if (*p == '=' && (p + 1) != end && *(p + 1) == '?')
-		{
-			string::const_iterator wordPos = p;
-			p += 2; // skip '=?'
-
-			if (p != end)
-			{
-				const string::const_iterator charsetPos = p;
-
-				for ( ; p != end && *p != '?' ; ++p);
-
-				if (p != end) // a charset is specified
-				{
-					const string::const_iterator charsetEnd = p;
-					const string::const_iterator encPos = ++p; // skip '?'
-
-					for ( ; p != end && *p != '?' ; ++p);
-
-					if (p != end) // an encoding is specified
-					{
-						//const string::const_iterator encEnd = p;
-						const string::const_iterator dataPos = ++p; // skip '?'
-
-						for ( ; p != end && !(*p == '?' && *(p + 1) == '=') ; ++p);
-
-						if (p != end) // some data is specified
-						{
-							const string::const_iterator dataEnd = p;
-							p += 2; // skip '?='
-
-							encoder* theEncoder = NULL;
-
-							// Base-64 encoding
-							if (*encPos == 'B' || *encPos == 'b')
-							{
-								theEncoder = new encoderB64;
-							}
-							// Quoted-Printable encoding
-							else if (*encPos == 'Q' || *encPos == 'q')
-							{
-								theEncoder = new encoderQP;
-								theEncoder->getProperties()["rfc2047"] = true;
-							}
-
-							if (theEncoder)
-							{
-								// Decode text
-								string decodedBuffer;
-
-								utility::inputStreamStringAdapter ein(string(dataPos, dataEnd));
-								utility::outputStreamStringAdapter eout(decodedBuffer);
-
-								theEncoder->decode(ein, eout);
-								delete (theEncoder);
-
-								// Append all the unencoded text before this word
-								if (prevPos != wordPos)
-								{
-									string::const_iterator p = prevPos;
-
-									if (prevIsEncoded)
-									{
-										// Check whether there are only white-spaces between
-										// the two encoded words
-										for ( ; (p != wordPos) && parserHelpers::isspace(*p) ; ++p);
-									}
-
-									if (p != wordPos) // if not empty
-									{
-										if (!out.isEmpty() && prevWordCharset == defaultCharset)
-										{
-											out.getWordAt(out.getWordCount() - 1)->
-												getBuffer() += string(prevPos, wordPos);
-										}
-										else
-										{
-											out.appendWord(new word
-												(string(prevPos, wordPos), defaultCharset));
-
-											prevWordCharset = defaultCharset;
-										}
-									}
-								}
-
-								// Append this fresh decoded word to output text
-								charset thisCharset(string(charsetPos, charsetEnd));
-
-								if (!out.isEmpty() && prevWordCharset == thisCharset)
-								{
-									out.getWordAt(out.getWordCount() - 1)->
-										getBuffer() += decodedBuffer;
-								}
-								else
-								{
-									prevWordCharset = thisCharset;
-									out.appendWord(new word(decodedBuffer, thisCharset));
-								}
-
-								// This word has been decoded: we can advance in the input buffer
-								prevPos = p;
-								prevIsEncoded = true;
-							}
-							else
-							{
-								// Unknown encoding: can't decode this word, we will
-								// treat this word as ordinary text (RFC-2047, Page 9).
-							}
-						}
-					}
-				}
-			}
-		}
-		else
-		{
-			++p;
-		}
-
-		for ( ; p != end && *p != '=' && *p != '\n' ; ++p);
-	}
-}
-
-
 const std::vector <const component*> text::getChildComponents() const
 {
-	// TODO: 'word' should inherit from 'component'
-	return std::vector <const component*>();
+	std::vector <const component*> list;
+
+	copy_vector(m_words, list);
+
+	return (list);
 }


--- a/src/word.cpp
+++ b/src/word.cpp
@ -18,6 +18,15 @@
 //

 #include "vmime/word.hpp"
+#include "vmime/text.hpp"
+
+#include "vmime/utility/stringUtils.hpp"
+#include "vmime/utility/smartPtr.hpp"
+#include "vmime/parserHelpers.hpp"
+
+#include "vmime/encoder.hpp"
+#include "vmime/encoderB64.hpp"
+#include "vmime/encoderQP.hpp"


 namespace vmime
@ -31,7 +40,7 @@ word::word()


 word::word(const word& w)
-	: m_buffer(w.m_buffer), m_charset(w.m_charset)
+	: component(), m_buffer(w.m_buffer), m_charset(w.m_charset)
 {
 }

@ -48,6 +57,567 @@ word::word(const string& buffer, const charset& charset)
 }


+word* word::parseNext(const string& buffer, const string::size_type position,
+	const string::size_type end, string::size_type* newPosition,
+	bool prevIsEncoded, bool* isEncoded, bool isFirst)
+{
+	string::size_type pos = position;
+
+	// Ignore white-spaces:
+	//   - before the first word
+	//   - between two encoded words
+	//   - after the last word
+	while (pos < end && parserHelpers::isspace(buffer[pos]))
+		++pos;
+
+	string::size_type startPos = pos;
+	string unencoded;
+
+	while (pos < end)
+	{
+		// End of line: does not occur in the middle of an encoded word. This is
+		// used to remove folding white-spaces from unencoded text.
+		if (buffer[pos] == '\n')
+		{
+			string::size_type endPos = pos;
+
+			if (pos > position && buffer[pos - 1] == '\r')
+				--endPos;
+
+			while (pos != end && parserHelpers::isspace(buffer[pos]))
+				++pos;
+
+			unencoded += string(buffer.begin() + startPos, buffer.begin() + endPos);
+			unencoded += ' ';
+
+			startPos = pos;
+		}
+		// Start of an encoded word
+		else if (pos + 6 < end &&  // 6 = "=?(.+)?(.*)?="
+		         buffer[pos] == '=' && buffer[pos + 1] == '?')
+		{
+			// Check whether there is some unencoded text before
+			unencoded += string(buffer.begin() + startPos, buffer.begin() + pos);
+
+			if (!unencoded.empty())
+			{
+				word* w = new word(unencoded, charset(charsets::US_ASCII));
+				w->setParsedBounds(position, pos);
+
+				if (newPosition)
+					*newPosition = pos;
+
+				if (isEncoded)
+					*isEncoded = false;
+
+				return (w);
+			}
+
+			// ...else find the finish sequence '?=' and return an encoded word
+			const string::size_type wordStart = pos;
+
+			pos += 4;
+
+			while (pos < end)
+			{
+				if (buffer[pos] == '\n')
+				{
+					// End of line not allowed in the middle of an encoded word:
+					// treat this text as unencoded text (see *).
+					break;
+				}
+				else if (buffer[pos] == '?' && pos + 1 < end && buffer[pos + 1] == '=')
+				{
+					// Found the finish sequence
+					break;
+				}
+
+				++pos;
+			}
+
+			if (pos == end) // not a valid word (no finish sequence)
+				continue;
+			else if (buffer[pos] == '\n')  // (*)
+				continue;
+
+			pos += 2; // ?=
+
+			word* w = new word();
+			w->parse(buffer, wordStart, pos, NULL);
+
+			if (newPosition)
+				*newPosition = pos;
+
+			if (isEncoded)
+				*isEncoded = true;
+
+			return (w);
+		}
+
+		++pos;
+	}
+
+	// Treat unencoded text at the end of the buffer
+	if (end != startPos)
+	{
+		if (startPos != pos && !isFirst && prevIsEncoded)
+			unencoded += ' ';
+
+		unencoded += string(buffer.begin() + startPos, buffer.begin() + end);
+
+		word* w = new word(unencoded, charset(charsets::US_ASCII));
+		w->setParsedBounds(position, end);
+
+		if (newPosition)
+			*newPosition = end;
+
+		if (isEncoded)
+			*isEncoded = false;
+
+		return (w);
+	}
+
+	return (NULL);
+}
+
+
+const std::vector <word*> word::parseMultiple(const string& buffer, const string::size_type position,
+	const string::size_type end, string::size_type* newPosition)
+{
+	std::vector <word*> res;
+	word* w = NULL;
+
+	string::size_type pos = position;
+
+	bool prevIsEncoded = false;
+
+	while ((w = word::parseNext(buffer, pos, end, &pos, prevIsEncoded, &prevIsEncoded, (w == NULL))) != NULL)
+		res.push_back(w);
+
+	if (newPosition)
+		*newPosition = pos;
+
+	return (res);
+}
+
+
+void word::parse(const string& buffer, const string::size_type position,
+	const string::size_type end, string::size_type* newPosition)
+{
+	if (position + 6 < end && // 6 = "=?(.+)?(.*)?="
+	    buffer[position] == '=' && buffer[position + 1] == '?')
+	{
+		string::const_iterator p = buffer.begin() + position + 2;
+		const string::const_iterator pend = buffer.begin() + end;
+
+		const string::const_iterator charsetPos = p;
+
+		for ( ; p != pend && *p != '?' ; ++p);
+
+		if (p != pend) // a charset is specified
+		{
+			const string::const_iterator charsetEnd = p;
+			const string::const_iterator encPos = ++p; // skip '?'
+
+			for ( ; p != pend && *p != '?' ; ++p);
+
+			if (p != pend) // an encoding is specified
+			{
+				//const string::const_iterator encEnd = p;
+				const string::const_iterator dataPos = ++p; // skip '?'
+
+				for ( ; p != pend && !(*p == '?' && *(p + 1) == '=') ; ++p);
+
+				if (p != pend) // some data is specified
+				{
+					const string::const_iterator dataEnd = p;
+					p += 2; // skip '?='
+
+					encoder* theEncoder = NULL;
+
+					// Base-64 encoding
+					if (*encPos == 'B' || *encPos == 'b')
+					{
+						theEncoder = new encoderB64;
+					}
+					// Quoted-Printable encoding
+					else if (*encPos == 'Q' || *encPos == 'q')
+					{
+						theEncoder = new encoderQP;
+						theEncoder->getProperties()["rfc2047"] = true;
+					}
+
+					if (theEncoder)
+					{
+						// Decode text
+						string decodedBuffer;
+
+						utility::inputStreamStringAdapter ein(string(dataPos, dataEnd));
+						utility::outputStreamStringAdapter eout(decodedBuffer);
+
+						theEncoder->decode(ein, eout);
+						delete (theEncoder);
+
+						m_buffer = decodedBuffer;
+						m_charset = charset(string(charsetPos, charsetEnd));
+
+						setParsedBounds(position, p - buffer.begin());
+
+						if (newPosition)
+							*newPosition = (p - buffer.begin());
+
+						return;
+					}
+				}
+			}
+		}
+	}
+
+	// Unknown encoding or malformed encoded word: treat the buffer as ordinary text (RFC-2047, Page 9).
+	m_buffer = string(buffer.begin() + position, buffer.begin() + end);
+	m_charset = charsets::US_ASCII;
+
+	setParsedBounds(position, end);
+
+	if (newPosition)
+		*newPosition = end;
+}
+
+
+void word::generate(utility::outputStream& os, const string::size_type maxLineLength,
+	const string::size_type curLinePos, string::size_type* newLinePos) const
+{
+	generate(os, maxLineLength, curLinePos, newLinePos, 0, true);
+}
+
+
+void word::generate(utility::outputStream& os, const string::size_type maxLineLength,
+	const string::size_type curLinePos, string::size_type* newLinePos, const int flags,
+	const bool isFirstWord) const
+{
+	string::size_type curLineLength = curLinePos;
+
+	// Calculate the number of ASCII chars to check whether encoding is needed
+	// and _which_ encoding to use.
+	const string::size_type asciiCount =
+		utility::stringUtils::countASCIIchars(m_buffer.begin(), m_buffer.end());
+
+	bool noEncoding = (flags & text::FORCE_NO_ENCODING) ||
+	    (!(flags & text::FORCE_ENCODING) && asciiCount == m_buffer.length());
+
+	if (noEncoding)
+	{
+		// We will fold lines without encoding them.
+
+		string::const_iterator lastWSpos = m_buffer.end(); // last white-space position
+		string::const_iterator curLineStart = m_buffer.begin(); // current line start
+
+		string::const_iterator p = m_buffer.begin();
+		const string::const_iterator end = m_buffer.end();
+
+		bool finished = false;
+		bool newLine = false;
+
+		while (!finished)
+		{
+			for ( ; p != end ; ++p, ++curLineLength)
+			{
+				// Exceeded maximum line length, but we have found a white-space
+				// where we can cut the line...
+				if (curLineLength >= maxLineLength && lastWSpos != end)
+					break;
+
+				if (*p == ' ' || *p == '\t')
+				{
+					// Remember the position of this white-space character
+					lastWSpos = p;
+				}
+			}
+
+			if (p != end)
+				++curLineLength;
+
+			if (p == end || lastWSpos == end)
+			{
+				// If we are here, it means that we have found no whitespace
+				// before the first "maxLineLength" characters. In this case,
+				// we write the full line no matter of the max line length...
+
+				if (!newLine && p != end && lastWSpos == end &&
+				    !isFirstWord && curLineStart == m_buffer.begin())
+				{
+					// Here, we are continuing on the line of previous encoded
+					// word, but there is not even enough space to put the
+					// first word of this line, so we start a new line.
+					if (flags & text::NO_NEW_LINE_SEQUENCE)
+					{
+						os << CRLF;
+						curLineLength = 0;
+					}
+					else
+					{
+						os << NEW_LINE_SEQUENCE;
+						curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+					}
+
+					p = curLineStart;
+					lastWSpos = end;
+					newLine = true;
+				}
+				else
+				{
+					os << string(curLineStart, p);
+
+					if (p == end)
+					{
+						finished = true;
+					}
+					else
+					{
+						if (flags & text::NO_NEW_LINE_SEQUENCE)
+						{
+							os << CRLF;
+							curLineLength = 0;
+						}
+						else
+						{
+							os << NEW_LINE_SEQUENCE;
+							curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+						}
+
+						curLineStart = p;
+						lastWSpos = end;
+						newLine = true;
+					}
+				}
+			}
+			else
+			{
+				// In this case, there will not be enough space on the line for all the
+				// characters _after_ the last white-space; so we cut the line at this
+				// last white-space.
+
+#if 1
+				if (curLineLength != 1 && !isFirstWord)
+					os << " "; // Separate from previous word
+#endif
+
+				os << string(curLineStart, lastWSpos);
+
+				if (flags & text::NO_NEW_LINE_SEQUENCE)
+				{
+					os << CRLF;
+					curLineLength = 0;
+				}
+				else
+				{
+					os << NEW_LINE_SEQUENCE;
+					curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+				}
+
+				curLineStart = lastWSpos + 1;
+
+				p = lastWSpos + 1;
+				lastWSpos = end;
+				newLine = true;
+			}
+		}
+	}
+	/*
+		RFC #2047:
+		4. Encodings
+
+		Initially, the legal values for "encoding" are "Q" and "B".  These
+		encodings are described below.  The "Q" encoding is recommended for
+		use when most of the characters to be encoded are in the ASCII
+		character set; otherwise, the "B" encoding should be used.
+		Nevertheless, a mail reader which claims to recognize 'encoded-word's
+		MUST be able to accept either encoding for any character set which it
+		supports.
+	*/
+	else
+	{
+		// We will encode _AND_ fold lines
+
+		/*
+			RFC #2047:
+			2. Syntax of encoded-words
+
+			" While there is no limit to the length of a multiple-line header
+			  field, each line of a header field that contains one or more
+			  'encoded-word's is limited to 76 characters. "
+		*/
+
+		const string::size_type maxLineLength3 =
+			(maxLineLength == lineLengthLimits::infinite)
+				? maxLineLength
+				: std::min(maxLineLength, static_cast <string::size_type>(76));
+
+		// Base64 if more than 60% non-ascii, quoted-printable else (default)
+		const string::size_type asciiPercent = (m_buffer.length() == 0 ? 100 : (100 * asciiCount) / m_buffer.length());
+		const string::value_type encoding = (asciiPercent <= 40) ? 'B' : 'Q';
+
+		string wordStart("=?" + m_charset.getName() + "?" + encoding + "?");
+		string wordEnd("?=");
+
+		const string::size_type minWordLength = wordStart.length() + wordEnd.length();
+		const string::size_type maxLineLength2 = (maxLineLength3 < minWordLength + 1)
+			? maxLineLength3 + minWordLength + 1 : maxLineLength3;
+
+		// Checks whether remaining space on this line is usable. If too few
+		// characters can be encoded, start a new line.
+		bool startNewLine = true;
+
+		if (curLineLength + 2 < maxLineLength2)
+		{
+			const string::size_type remainingSpaceOnLine = maxLineLength2 - curLineLength - 2;
+
+			if (remainingSpaceOnLine < minWordLength + 10)
+			{
+				// Space for no more than 10 encoded chars!
+				// It is not worth while to continue on this line...
+				startNewLine = true;
+			}
+			else
+			{
+				// OK, there is enough usable space on the current line.
+				startNewLine = false;
+			}
+		}
+
+		if (startNewLine)
+		{
+			os << NEW_LINE_SEQUENCE;
+			curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+		}
+
+		// Encode and fold input buffer
+		string::const_iterator pos = m_buffer.begin();
+		string::size_type remaining = m_buffer.length();
+
+		encoder* theEncoder = NULL;
+
+		if (encoding == 'B') theEncoder = new encoderB64;
+		else theEncoder = new encoderQP;
+
+		string qpEncodedBuffer;
+
+		if (encoding == 'Q')
+		{
+			theEncoder->getProperties()["rfc2047"] = true;
+
+			// In the case of Quoted-Printable encoding, we cannot simply encode input
+			// buffer line by line. So, we encode the whole buffer and we will fold it
+			// in the next loop...
+			utility::inputStreamStringAdapter in(m_buffer);
+			utility::outputStreamStringAdapter out(qpEncodedBuffer);
+
+			theEncoder->encode(in, out);
+
+			pos = qpEncodedBuffer.begin();
+			remaining = qpEncodedBuffer.length();
+		}
+
+#if 1
+		if (curLineLength != 1 && !isFirstWord)
+		{
+			os << " "; // Separate from previous word
+			++curLineLength;
+		}
+#endif
+
+		for ( ; remaining ; )
+		{
+			// Start a new encoded word
+			os << wordStart;
+			curLineLength += minWordLength;
+
+			// Compute the number of encoded chars that will fit on this line
+			const string::size_type fit = maxLineLength2 - curLineLength;
+
+			// Base-64 encoding
+			if (encoding == 'B')
+			{
+				// TODO: WARNING! "Any encoded word which encodes a non-integral
+				// number of characters or octets is incorrectly formed."
+
+				// Here, we have a formula to compute the maximum number of source
+				// characters to encode knowing the maximum number of encoded chars
+				// (with Base64, 3 bytes of input provide 4 bytes of output).
+				string::size_type count = (fit > 1) ? ((fit - 1) * 3) / 4 : 1;
+				if (count > remaining) count = remaining;
+
+				utility::inputStreamStringAdapter in
+					(m_buffer, pos - m_buffer.begin(), pos - m_buffer.begin() + count);
+
+				curLineLength += theEncoder->encode(in, os);
+
+				pos += count;
+				remaining -= count;
+			}
+			// Quoted-Printable encoding
+			else
+			{
+				// TODO: WARNING! "Any encoded word which encodes a non-integral
+				// number of characters or octets is incorrectly formed."
+
+				// All we have to do here is to take a certain number of character
+				// (that is less than or equal to "fit") from the QP encoded buffer,
+				// but we also make sure not to fold a "=XY" encoded char.
+				const string::const_iterator qpEnd = qpEncodedBuffer.end();
+				string::const_iterator lastFoldPos = pos;
+				string::const_iterator p = pos;
+				string::size_type n = 0;
+
+				while (n < fit && p != qpEnd)
+				{
+					if (*p == '=')
+					{
+						if (n + 3 >= fit)
+						{
+							lastFoldPos = p;
+							break;
+						}
+
+						p += 3;
+						n += 3;
+					}
+					else
+					{
+						++p;
+						++n;
+					}
+				}
+
+				if (lastFoldPos == pos)
+					lastFoldPos = p;
+
+				os << string(pos, lastFoldPos);
+
+				curLineLength += (lastFoldPos - pos) + 1;
+
+				pos += n;
+				remaining -= n;
+			}
+
+			// End of the encoded word
+			os << wordEnd;
+
+			if (remaining)
+			{
+				os << NEW_LINE_SEQUENCE;
+				curLineLength = NEW_LINE_SEQUENCE_LENGTH;
+			}
+		}
+
+		delete (theEncoder);
+	}
+
+	if (newLinePos)
+		*newLinePos = curLineLength;
+}
+
+
 #if VMIME_WIDE_CHAR_SUPPORT

 const wstring word::getDecodedText() const
@ -77,8 +647,10 @@ word& word::operator=(const string& s)
 }


-void word::copyFrom(const word& w)
+void word::copyFrom(const component& other)
 {
+	const word& w = dynamic_cast <const word&>(other);
+
 	m_buffer = w.m_buffer;
 	m_charset = w.m_charset;
 }
@ -142,4 +714,10 @@ void word::setBuffer(const string& buffer)
 }


+const std::vector <const component*> word::getChildComponents() const
+{
+	return std::vector <const component*>();
+}
+
+
 } // vmime
--- a/vmime/text.hpp
+++ b/vmime/text.hpp
@ -205,8 +205,6 @@ public:

 private:

-	static void decodeAndUnfold(const string::const_iterator& inStart, const string::const_iterator& inEnd, text& out);
-
 	std::vector <word*> m_words;
 };

--- a/vmime/word.hpp
+++ b/vmime/word.hpp
@ -21,6 +21,7 @@
 #define VMIME_WORD_HPP_INCLUDED


+#include "vmime/component.hpp"
 #include "vmime/charset.hpp"


@ -32,7 +33,7 @@ namespace vmime
  * some text encoded into one specified charset.
  */

-class word
+class word : public component
 {
 public:

@ -93,7 +94,7 @@ public:
 	  *
 	  * @param other other word to copy data from
 	  */
-	void copyFrom(const word& other);
+	void copyFrom(const component& other);

 	/** Clone this word.
 	  *
@ -101,6 +102,21 @@ public:
 	  */
 	word* clone() const;

+
+	using component::parse;
+	using component::generate;
+
+	void parse(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL);
+	void generate(utility::outputStream& os, const string::size_type maxLineLength = lineLengthLimits::infinite, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const;
+
+	void generate(utility::outputStream& os, const string::size_type maxLineLength, const string::size_type curLinePos, string::size_type* newLinePos, const int flags, const bool isFirstWord) const;
+
+	const std::vector <const component*> getChildComponents() const;
+
+	static word* parseNext(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition, bool prevIsEncoded, bool* isEncoded, bool isFirst);
+
+	static const std::vector <word*> parseMultiple(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition);
+
 private:

 	// The "m_buffer" of this word holds the data, and this data is encoded