aboutsummaryrefslogtreecommitdiffstats
path: root/src/wordEncoder.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/wordEncoder.cpp')
-rw-r--r--src/wordEncoder.cpp66
1 files changed, 62 insertions, 4 deletions
diff --git a/src/wordEncoder.cpp b/src/wordEncoder.cpp
index 154b4efc..cc8292f8 100644
--- a/src/wordEncoder.cpp
+++ b/src/wordEncoder.cpp
@@ -260,17 +260,75 @@ wordEncoder::Encoding wordEncoder::getEncoding() const
}
+// Explicitly force encoding for some charsets
+struct CharsetEncodingEntry
+{
+ CharsetEncodingEntry(const std::string& charset_, const wordEncoder::Encoding encoding_)
+ : charset(charset_), encoding(encoding_)
+ {
+ }
+
+ std::string charset;
+ wordEncoder::Encoding encoding;
+};
+
+CharsetEncodingEntry g_charsetEncodingMap[] =
+{
+ // Use QP encoding for ISO-8859-x charsets
+ CharsetEncodingEntry("iso-8859", wordEncoder::ENCODING_QP),
+ CharsetEncodingEntry("iso8859", wordEncoder::ENCODING_QP),
+
+ // RFC-1468 states:
+ // " ISO-2022-JP may also be used in MIME Part 2 headers. The "B"
+ // encoding should be used with ISO-2022-JP text. "
+ // Use Base64 encoding for all ISO-2022 charsets.
+ CharsetEncodingEntry("iso-2022", wordEncoder::ENCODING_B64),
+ CharsetEncodingEntry("iso2022", wordEncoder::ENCODING_B64),
+
+ // Last entry is not used
+ CharsetEncodingEntry("", wordEncoder::ENCODING_AUTO)
+};
+
+
+// static
+bool wordEncoder::isEncodingNeeded(const string& buffer, const charset& charset)
+{
+ // Special treatment for some charsets
+ const string cset = utility::stringUtils::toLower(charset.getName());
+
+ for (unsigned int i = 0 ; i < (sizeof(g_charsetEncodingMap) / sizeof(g_charsetEncodingMap[0])) - 1 ; ++i)
+ {
+ if (cset.find(g_charsetEncodingMap[i].charset) != string::npos)
+ {
+ if (g_charsetEncodingMap[i].encoding != wordEncoder::ENCODING_AUTO)
+ return true;
+ }
+ }
+
+ // No encoding is needed if the buffer only contains ASCII chars
+ if (utility::stringUtils::findFirstNonASCIIchar(buffer.begin(), buffer.end()) != string::npos)
+ return true;
+
+ // Force encoding when there are only ASCII chars, but there is
+ // also at least one of '\n' or '\r' (header fields)
+ if (buffer.find_first_of("\n\r") != string::npos)
+ return true;
+
+ return false;
+}
+
+
// static
wordEncoder::Encoding wordEncoder::guessBestEncoding
(const string& buffer, const charset& charset)
{
- // If the charset is ISO-8859-x, set to QP encoding
+ // Special treatment for some charsets
const string cset = utility::stringUtils::toLower(charset.getName());
- if (cset.find("iso-8859") != string::npos ||
- cset.find("iso8859") != string::npos)
+ for (unsigned int i = 0 ; i < (sizeof(g_charsetEncodingMap) / sizeof(g_charsetEncodingMap[0])) - 1 ; ++i)
{
- return ENCODING_QP;
+ if (cset.find(g_charsetEncodingMap[i].charset) != string::npos)
+ return g_charsetEncodingMap[i].encoding;
}
// Use Base64 if more than 40% non-ASCII, or Quoted-Printable else (default)