aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/utility/stringUtils.cpp18
-rw-r--r--src/word.cpp28
-rw-r--r--src/wordEncoder.cpp66
-rw-r--r--tests/parser/textTest.cpp33
-rw-r--r--vmime/utility/stringUtils.hpp8
-rw-r--r--vmime/wordEncoder.hpp17
6 files changed, 144 insertions, 26 deletions
diff --git a/src/utility/stringUtils.cpp b/src/utility/stringUtils.cpp
index abae8f9a..7f61a7d3 100644
--- a/src/utility/stringUtils.cpp
+++ b/src/utility/stringUtils.cpp
@@ -151,6 +151,24 @@ string::size_type stringUtils::countASCIIchars
}
+string::size_type stringUtils::findFirstNonASCIIchar
+ (const string::const_iterator begin, const string::const_iterator end)
+{
+ string::size_type pos = string::npos;
+
+ for (string::const_iterator i = begin ; i != end ; ++i)
+ {
+ if (!parserHelpers::isAscii(*i))
+ {
+ pos = i - begin;
+ break;
+ }
+ }
+
+ return pos;
+}
+
+
const string stringUtils::unquote(const string& str)
{
if (str.length() < 2)
diff --git a/src/word.cpp b/src/word.cpp
index 667f1fbb..9d0177fa 100644
--- a/src/word.cpp
+++ b/src/word.cpp
@@ -336,30 +336,22 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
if (state == NULL)
state = &defaultGeneratorState;
- // Calculate the number of ASCII chars to check whether encoding is needed
- // and _which_ encoding to use.
- const string::size_type asciiCount =
- utility::stringUtils::countASCIIchars(m_buffer.begin(), m_buffer.end());
+ // Find out if encoding is forced or required by contents + charset
+ bool encodingNeeded = (flags & text::FORCE_ENCODING) != 0;
- bool noEncoding = (flags & text::FORCE_NO_ENCODING) ||
- (!(flags & text::FORCE_ENCODING) && asciiCount == m_buffer.length());
-
- if (!(flags & text::FORCE_NO_ENCODING) &&
- m_buffer.find_first_of("\n\r") != string::npos)
- {
- // Force encoding when there are only ASCII chars, but there is
- // also at least one of '\n' or '\r' (header fields)
- noEncoding = false;
- }
+ if (encodingNeeded == false)
+ encodingNeeded = wordEncoder::isEncodingNeeded(m_buffer, m_charset);
+ else if ((flags & text::FORCE_NO_ENCODING) != 0)
+ encodingNeeded = false;
// If possible and requested (with flag), quote the buffer (no folding is performed).
// Quoting is possible if and only if:
- // - the whole buffer is ASCII-only
+ // - the buffer does not need to be encoded
// - the buffer does not contain quoting character (")
// - there is enough remaining space on the current line to hold the whole buffer
- if (!noEncoding &&
+ if (!encodingNeeded &&
(flags & text::QUOTE_IF_POSSIBLE) &&
- asciiCount == m_buffer.length() &&
+ !encodingNeeded &&
m_buffer.find('"') == string::npos &&
(curLineLength + 2 /* 2 x " */ + m_buffer.length()) < maxLineLength)
{
@@ -367,7 +359,7 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe
curLineLength += 2 + m_buffer.length();
}
// We will fold lines without encoding them.
- else if (noEncoding)
+ else if (!encodingNeeded)
{
string::const_iterator lastWSpos = m_buffer.end(); // last white-space position
string::const_iterator curLineStart = m_buffer.begin(); // current line start
diff --git a/src/wordEncoder.cpp b/src/wordEncoder.cpp
index 154b4efc..cc8292f8 100644
--- a/src/wordEncoder.cpp
+++ b/src/wordEncoder.cpp
@@ -260,17 +260,75 @@ wordEncoder::Encoding wordEncoder::getEncoding() const
}
+// Explicitly force encoding for some charsets
+struct CharsetEncodingEntry
+{
+ CharsetEncodingEntry(const std::string& charset_, const wordEncoder::Encoding encoding_)
+ : charset(charset_), encoding(encoding_)
+ {
+ }
+
+ std::string charset;
+ wordEncoder::Encoding encoding;
+};
+
+CharsetEncodingEntry g_charsetEncodingMap[] =
+{
+ // Use QP encoding for ISO-8859-x charsets
+ CharsetEncodingEntry("iso-8859", wordEncoder::ENCODING_QP),
+ CharsetEncodingEntry("iso8859", wordEncoder::ENCODING_QP),
+
+ // RFC-1468 states:
+ // " ISO-2022-JP may also be used in MIME Part 2 headers. The "B"
+ // encoding should be used with ISO-2022-JP text. "
+ // Use Base64 encoding for all ISO-2022 charsets.
+ CharsetEncodingEntry("iso-2022", wordEncoder::ENCODING_B64),
+ CharsetEncodingEntry("iso2022", wordEncoder::ENCODING_B64),
+
+ // Last entry is not used
+ CharsetEncodingEntry("", wordEncoder::ENCODING_AUTO)
+};
+
+
+// static
+bool wordEncoder::isEncodingNeeded(const string& buffer, const charset& charset)
+{
+ // Special treatment for some charsets
+ const string cset = utility::stringUtils::toLower(charset.getName());
+
+ for (unsigned int i = 0 ; i < (sizeof(g_charsetEncodingMap) / sizeof(g_charsetEncodingMap[0])) - 1 ; ++i)
+ {
+ if (cset.find(g_charsetEncodingMap[i].charset) != string::npos)
+ {
+ if (g_charsetEncodingMap[i].encoding != wordEncoder::ENCODING_AUTO)
+ return true;
+ }
+ }
+
+ // No encoding is needed if the buffer only contains ASCII chars
+ if (utility::stringUtils::findFirstNonASCIIchar(buffer.begin(), buffer.end()) != string::npos)
+ return true;
+
+ // Force encoding when there are only ASCII chars, but there is
+ // also at least one of '\n' or '\r' (header fields)
+ if (buffer.find_first_of("\n\r") != string::npos)
+ return true;
+
+ return false;
+}
+
+
// static
wordEncoder::Encoding wordEncoder::guessBestEncoding
(const string& buffer, const charset& charset)
{
- // If the charset is ISO-8859-x, set to QP encoding
+ // Special treatment for some charsets
const string cset = utility::stringUtils::toLower(charset.getName());
- if (cset.find("iso-8859") != string::npos ||
- cset.find("iso8859") != string::npos)
+ for (unsigned int i = 0 ; i < (sizeof(g_charsetEncodingMap) / sizeof(g_charsetEncodingMap[0])) - 1 ; ++i)
{
- return ENCODING_QP;
+ if (cset.find(g_charsetEncodingMap[i].charset) != string::npos)
+ return g_charsetEncodingMap[i].encoding;
}
// Use Base64 if more than 40% non-ASCII, or Quoted-Printable else (default)
diff --git a/tests/parser/textTest.cpp b/tests/parser/textTest.cpp
index 5c9b5213..4a7e394f 100644
--- a/tests/parser/textTest.cpp
+++ b/tests/parser/textTest.cpp
@@ -44,6 +44,8 @@ VMIME_TEST_SUITE_BEGIN
VMIME_TEST(testWordGenerateSpace)
VMIME_TEST(testWordGenerateSpace2)
VMIME_TEST(testWordGenerateMultiBytes)
+ VMIME_TEST(testWordGenerateQuote)
+ VMIME_TEST(testWordGenerateSpecialCharsets)
VMIME_TEST_LIST_END
@@ -335,9 +337,38 @@ VMIME_TEST_SUITE_BEGIN
VASSERT_EQ("1", "=?utf-8?Q?aaa?==?utf-8?Q?=C3=A9?==?utf-8?Q?zzz?=",
cleanGeneratedWords(vmime::word("aaa\xc3\xa9zzz", vmime::charset("utf-8")).generate(16)));
- VASSERT_EQ("1", "=?utf-8?Q?aaa=C3=A9?==?utf-8?Q?zzz?=",
+ VASSERT_EQ("2", "=?utf-8?Q?aaa=C3=A9?==?utf-8?Q?zzz?=",
cleanGeneratedWords(vmime::word("aaa\xc3\xa9zzz", vmime::charset("utf-8")).generate(17)));
}
+ void testWordGenerateQuote()
+ {
+ std::string str;
+ vmime::utility::outputStreamStringAdapter os(str);
+
+ // ASCII-only text is quotable
+ str.clear();
+ vmime::word("Quoted text").generate(os, 1000, 0, NULL, vmime::text::QUOTE_IF_POSSIBLE, NULL);
+ VASSERT_EQ("1", "\"Quoted text\"", cleanGeneratedWords(str));
+
+ // Text with CR/LF is not quotable
+ str.clear();
+ vmime::word("Non-quotable\ntext", "us-ascii").generate(os, 1000, 0, NULL, vmime::text::QUOTE_IF_POSSIBLE, NULL);
+ VASSERT_EQ("2", "=?us-ascii?Q?Non-quotable=0Atext?=", cleanGeneratedWords(str));
+
+ // Text with non-ASCII chars is not quotable
+ str.clear();
+ vmime::word("Non-quotable text \xc3\xa9").generate(os, 1000, 0, NULL, vmime::text::QUOTE_IF_POSSIBLE, NULL);
+ VASSERT_EQ("3", "=?UTF-8?Q?Non-quotable_text_=C3=A9?=", cleanGeneratedWords(str));
+ }
+
+ void testWordGenerateSpecialCharsets()
+ {
+ // ISO-2022-JP only uses 7-bit chars but should be encoded in Base64
+ VASSERT_EQ("1", "=?iso-2022-jp?B?XlskQiVRITwlPSVKJWshJiU9JVUlSCUmJSclIl5bKEI=?=",
+ cleanGeneratedWords(vmime::word("^[$B%Q!<%=%J%k!&%=%U%H%&%'%\"^[(B",
+ vmime::charset("iso-2022-jp")).generate(100)));
+ }
+
VMIME_TEST_SUITE_END
diff --git a/vmime/utility/stringUtils.hpp b/vmime/utility/stringUtils.hpp
index b6589dbc..a8270d32 100644
--- a/vmime/utility/stringUtils.hpp
+++ b/vmime/utility/stringUtils.hpp
@@ -104,6 +104,14 @@ public:
*/
static string::size_type countASCIIchars(const string::const_iterator begin, const string::const_iterator end);
+ /** Returns the position of the first non 7-bit US-ASCII character in a string.
+ *
+ * @param begin start position
+ * @param end end position
+ * @return position since begin, or string::npos
+ */
+ static string::size_type findFirstNonASCIIchar(const string::const_iterator begin, const string::const_iterator end);
+
/** Convert the specified value to a string value.
*
* @param value to convert
diff --git a/vmime/wordEncoder.hpp b/vmime/wordEncoder.hpp
index 17ca8081..1a492ea6 100644
--- a/vmime/wordEncoder.hpp
+++ b/vmime/wordEncoder.hpp
@@ -73,12 +73,23 @@ public:
*/
Encoding getEncoding() const;
-private:
+ /** Test whether RFC-2047 encoding is needed.
+ *
+ * @param buffer buffer to analyze
+ * @param charset charset of the buffer
+ * @return true if encoding is needed, false otherwise.
+ */
+ static bool isEncodingNeeded(const string& buffer, const charset& charset);
+ /** Guess the best RFC-2047 encoding to use for the specified buffer.
+ *
+ * @param buffer buffer to analyze
+ * @param charset charset of the buffer
+ * @return RFC-2047 encoding
+ */
static Encoding guessBestEncoding(const string& buffer, const charset& charset);
- void guessBestEncoding();
-
+private:
string m_buffer;
string::size_type m_pos;