3 files changed, 90 insertions, 22 deletions
diff --git a/src/utility/stringUtils.cpp b/src/utility/stringUtils.cpp
index abae8f9a..7f61a7d3 100644
--- a/src/utility/stringUtils.cpp
+++ b/src/utility/stringUtils.cpp
@@ -151,6 +151,24 @@ string::size_type stringUtils::countASCIIchars
 }
 
 
+string::size_type stringUtils::findFirstNonASCIIchar
+	(const string::const_iterator begin, const string::const_iterator end)
+{
+	string::size_type pos = string::npos;
+
+	for (string::const_iterator i = begin ; i != end ; ++i)
+	{
+		if (!parserHelpers::isAscii(*i))
+		{
+			pos = i - begin;
+			break;
+		}
+	}
+
+	return pos;
+}
+
+
 const string stringUtils::unquote(const string& str)
 {
 	if (str.length() < 2)
diff --git a/src/word.cpp b/src/word.cpp
index 667f1fbb..9d0177fa 100644
--- a/src/word.cpp
+++ b/src/word.cpp
@@ -336,30 +336,22 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
 	if (state == NULL)
 		state = &defaultGeneratorState;
 
-	// Calculate the number of ASCII chars to check whether encoding is needed
-	// and _which_ encoding to use.
-	const string::size_type asciiCount =
-		utility::stringUtils::countASCIIchars(m_buffer.begin(), m_buffer.end());
+	// Find out if encoding is forced or required by contents + charset
+	bool encodingNeeded = (flags & text::FORCE_ENCODING) != 0;
 
-	bool noEncoding = (flags & text::FORCE_NO_ENCODING) ||
-	    (!(flags & text::FORCE_ENCODING) && asciiCount == m_buffer.length());
-
-	if (!(flags & text::FORCE_NO_ENCODING) &&
-	    m_buffer.find_first_of("\n\r") != string::npos)
-	{
-		// Force encoding when there are only ASCII chars, but there is
-		// also at least one of '\n' or '\r' (header fields)
-		noEncoding = false;
-	}
+	if (encodingNeeded == false)
+		encodingNeeded = wordEncoder::isEncodingNeeded(m_buffer, m_charset);
+	else if ((flags & text::FORCE_NO_ENCODING) != 0)
+		encodingNeeded = false;
 
 	// If possible and requested (with flag), quote the buffer (no folding is performed).
 	// Quoting is possible if and only if:
-	//  - the whole buffer is ASCII-only
+	//  - the buffer does not need to be encoded
 	//  - the buffer does not contain quoting character (")
 	//  - there is enough remaining space on the current line to hold the whole buffer
-	if (!noEncoding &&
+	if (!encodingNeeded &&
 	    (flags & text::QUOTE_IF_POSSIBLE) &&
-	    asciiCount == m_buffer.length() &&
+	    !encodingNeeded &&
 	    m_buffer.find('"') == string::npos &&
 	    (curLineLength + 2 /* 2 x " */ + m_buffer.length()) < maxLineLength)
 	{
@@ -367,7 +359,7 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
 		curLineLength += 2 + m_buffer.length();
 	}
 	// We will fold lines without encoding them.
-	else if (noEncoding)
+	else if (!encodingNeeded)
 	{
 		string::const_iterator lastWSpos = m_buffer.end(); // last white-space position
 		string::const_iterator curLineStart = m_buffer.begin(); // current line start
diff --git a/src/wordEncoder.cpp b/src/wordEncoder.cpp
index 154b4efc..cc8292f8 100644
--- a/src/wordEncoder.cpp
+++ b/src/wordEncoder.cpp
@@ -260,17 +260,75 @@ wordEncoder::Encoding wordEncoder::getEncoding() const
 }
 
 
+// Explicitly force encoding for some charsets
+struct CharsetEncodingEntry
+{
+	CharsetEncodingEntry(const std::string& charset_, const wordEncoder::Encoding encoding_)
+		: charset(charset_), encoding(encoding_)
+	{
+	}
+
+	std::string charset;
+	wordEncoder::Encoding encoding;
+};
+
+CharsetEncodingEntry g_charsetEncodingMap[] =
+{
+	// Use QP encoding for ISO-8859-x charsets
+	CharsetEncodingEntry("iso-8859",     wordEncoder::ENCODING_QP),
+	CharsetEncodingEntry("iso8859",      wordEncoder::ENCODING_QP),
+
+	// RFC-1468 states:
+	//   " ISO-2022-JP may also be used in MIME Part 2 headers.  The "B"
+	//     encoding should be used with ISO-2022-JP text. "
+	// Use Base64 encoding for all ISO-2022 charsets.
+	CharsetEncodingEntry("iso-2022",     wordEncoder::ENCODING_B64),
+	CharsetEncodingEntry("iso2022",      wordEncoder::ENCODING_B64),
+
+	// Last entry is not used
+	CharsetEncodingEntry("", wordEncoder::ENCODING_AUTO)
+};
+
+
+// static
+bool wordEncoder::isEncodingNeeded(const string& buffer, const charset& charset)
+{
+	// Special treatment for some charsets
+	const string cset = utility::stringUtils::toLower(charset.getName());
+
+	for (unsigned int i = 0 ; i < (sizeof(g_charsetEncodingMap) / sizeof(g_charsetEncodingMap[0])) - 1 ; ++i)
+	{
+		if (cset.find(g_charsetEncodingMap[i].charset) != string::npos)
+		{
+			if (g_charsetEncodingMap[i].encoding != wordEncoder::ENCODING_AUTO)
+				return true;
+		}
+	}
+
+	// No encoding is needed if the buffer only contains ASCII chars
+	if (utility::stringUtils::findFirstNonASCIIchar(buffer.begin(), buffer.end()) != string::npos)
+		return true;
+
+	// Force encoding when there are only ASCII chars, but there is
+	// also at least one of '\n' or '\r' (header fields)
+	if (buffer.find_first_of("\n\r") != string::npos)
+		return true;
+
+	return false;
+}
+
+
 // static
 wordEncoder::Encoding wordEncoder::guessBestEncoding
 	(const string& buffer, const charset& charset)
 {
-	// If the charset is ISO-8859-x, set to QP encoding
+	// Special treatment for some charsets
 	const string cset = utility::stringUtils::toLower(charset.getName());
 
-	if (cset.find("iso-8859") != string::npos ||
-	    cset.find("iso8859") != string::npos)
+	for (unsigned int i = 0 ; i < (sizeof(g_charsetEncodingMap) / sizeof(g_charsetEncodingMap[0])) - 1 ; ++i)
 	{
-		return ENCODING_QP;
+		if (cset.find(g_charsetEncodingMap[i].charset) != string::npos)
+			return g_charsetEncodingMap[i].encoding;
 	}
 
 	// Use Base64 if more than 40% non-ASCII, or Quoted-Printable else (default)