diff options
Diffstat (limited to 'src/wordEncoder.cpp')
-rw-r--r-- | src/wordEncoder.cpp | 66 |
1 files changed, 62 insertions, 4 deletions
diff --git a/src/wordEncoder.cpp b/src/wordEncoder.cpp index 154b4efc..cc8292f8 100644 --- a/src/wordEncoder.cpp +++ b/src/wordEncoder.cpp @@ -260,17 +260,75 @@ wordEncoder::Encoding wordEncoder::getEncoding() const } +// Explicitly force encoding for some charsets +struct CharsetEncodingEntry +{ + CharsetEncodingEntry(const std::string& charset_, const wordEncoder::Encoding encoding_) + : charset(charset_), encoding(encoding_) + { + } + + std::string charset; + wordEncoder::Encoding encoding; +}; + +CharsetEncodingEntry g_charsetEncodingMap[] = +{ + // Use QP encoding for ISO-8859-x charsets + CharsetEncodingEntry("iso-8859", wordEncoder::ENCODING_QP), + CharsetEncodingEntry("iso8859", wordEncoder::ENCODING_QP), + + // RFC-1468 states: + // " ISO-2022-JP may also be used in MIME Part 2 headers. The "B" + // encoding should be used with ISO-2022-JP text. " + // Use Base64 encoding for all ISO-2022 charsets. + CharsetEncodingEntry("iso-2022", wordEncoder::ENCODING_B64), + CharsetEncodingEntry("iso2022", wordEncoder::ENCODING_B64), + + // Last entry is not used + CharsetEncodingEntry("", wordEncoder::ENCODING_AUTO) +}; + + +// static +bool wordEncoder::isEncodingNeeded(const string& buffer, const charset& charset) +{ + // Special treatment for some charsets + const string cset = utility::stringUtils::toLower(charset.getName()); + + for (unsigned int i = 0 ; i < (sizeof(g_charsetEncodingMap) / sizeof(g_charsetEncodingMap[0])) - 1 ; ++i) + { + if (cset.find(g_charsetEncodingMap[i].charset) != string::npos) + { + if (g_charsetEncodingMap[i].encoding != wordEncoder::ENCODING_AUTO) + return true; + } + } + + // No encoding is needed if the buffer only contains ASCII chars + if (utility::stringUtils::findFirstNonASCIIchar(buffer.begin(), buffer.end()) != string::npos) + return true; + + // Force encoding when there are only ASCII chars, but there is + // also at least one of '\n' or '\r' (header fields) + if (buffer.find_first_of("\n\r") != string::npos) + return true; + + return false; +} + + // static wordEncoder::Encoding wordEncoder::guessBestEncoding (const string& buffer, const charset& charset) { - // If the charset is ISO-8859-x, set to QP encoding + // Special treatment for some charsets const string cset = utility::stringUtils::toLower(charset.getName()); - if (cset.find("iso-8859") != string::npos || - cset.find("iso8859") != string::npos) + for (unsigned int i = 0 ; i < (sizeof(g_charsetEncodingMap) / sizeof(g_charsetEncodingMap[0])) - 1 ; ++i) { - return ENCODING_QP; + if (cset.find(g_charsetEncodingMap[i].charset) != string::npos) + return g_charsetEncodingMap[i].encoding; } // Use Base64 if more than 40% non-ASCII, or Quoted-Printable else (default) |