From 03a0e36e91070a1bcfb13babaaefec4ea258723e Mon Sep 17 00:00:00 2001 From: Vincent Richard Date: Mon, 30 Jun 2014 22:48:42 +0200 Subject: [PATCH] Added support for language specification in RFC-2047 encoded words and RFC-2231 parameter values. --- src/vmime/parameter.cpp | 8 +++-- src/vmime/word.cpp | 54 +++++++++++++++++++++++++++----- src/vmime/word.hpp | 16 ++++++++++ src/vmime/wordEncoder.cpp | 7 ++++- src/vmime/wordEncoder.hpp | 4 ++- tests/parser/parameterTest.cpp | 31 ++++++++++++++++++ tests/parser/textTest.cpp | 11 ++++++- tests/parser/wordEncoderTest.cpp | 17 +++++++--- tests/testUtils.hpp | 9 +++++- 9 files changed, 139 insertions(+), 18 deletions(-) diff --git a/src/vmime/parameter.cpp b/src/vmime/parameter.cpp index 45e68ad6..8773e857 100644 --- a/src/vmime/parameter.cpp +++ b/src/vmime/parameter.cpp @@ -134,6 +134,7 @@ void parameter::parse(const parsingContext& ctx, const std::vector & bool foundCharsetChunk = false; charset ch(charsets::US_ASCII); + string lang; std::ostringstream value; value.imbue(std::locale::classic()); @@ -170,7 +171,9 @@ void parameter::parse(const parsingContext& ctx, const std::vector & if (q != string::npos) { - // Ignore language + // Extract language + lang = chunk.data.substr(pos, q - pos); + ++q; pos = q; } @@ -268,6 +271,7 @@ void parameter::parse(const parsingContext& ctx, const std::vector & m_value->setBuffer(value.str()); m_value->setCharset(ch); + m_value->setLanguage(lang); } @@ -372,7 +376,7 @@ void parameter::generateImpl const bool alwaysEncode = m_value->getCharset().getRecommendedEncoding(recommendedEnc); bool extended = alwaysEncode; - if ((needQuotedPrintable || cutValue) && + if ((needQuotedPrintable || cutValue || !m_value->getLanguage().empty()) && genMode != generationContext::PARAMETER_VALUE_NO_ENCODING) { // Send the name in quoted-printable, so outlook express et.al. diff --git a/src/vmime/word.cpp b/src/vmime/word.cpp index 7944fc77..2229857c 100644 --- a/src/vmime/word.cpp +++ b/src/vmime/word.cpp @@ -48,7 +48,8 @@ word::word() word::word(const word& w) - : headerFieldValue(), m_buffer(w.m_buffer), m_charset(w.m_charset) + : headerFieldValue(), m_buffer(w.m_buffer), + m_charset(w.m_charset), m_lang(w.m_lang) { } @@ -65,6 +66,12 @@ word::word(const string& buffer, const charset& charset) } +word::word(const string& buffer, const charset& charset, const string& lang) + : m_buffer(buffer), m_charset(charset), m_lang(lang) +{ +} + + shared_ptr word::parseNext (const parsingContext& ctx, const string& buffer, const size_t position, const size_t end, size_t* newPosition, @@ -296,6 +303,21 @@ void word::parseImpl if (theEncoder) { + // Extract charset and language + const string charsetAndLang(charsetPos, charsetEnd); + const string::size_type asteriskPos = charsetAndLang.find('*'); + + if (asteriskPos != string::npos) + { + m_charset = charset(string(charsetAndLang.begin(), charsetAndLang.begin() + asteriskPos)); + m_lang = string(charsetAndLang.begin() + asteriskPos + 1, charsetAndLang.end()); + } + else + { + m_charset = charset(charsetAndLang); + m_lang.clear(); + } + // Decode text string decodedBuffer; @@ -306,7 +328,6 @@ void word::parseImpl delete (theEncoder); m_buffer = decodedBuffer; - m_charset = charset(string(charsetPos, charsetEnd)); setParsedBounds(position, p - buffer.begin()); @@ -358,7 +379,7 @@ void word::generate(const generationContext& ctx, utility::outputStream& os, else if ((flags & text::FORCE_ENCODING) != 0) encodingNeeded = true; else // auto-detect - encodingNeeded = wordEncoder::isEncodingNeeded(ctx, m_buffer, m_charset); + encodingNeeded = wordEncoder::isEncodingNeeded(ctx, m_buffer, m_charset, m_lang); // If text does not need to be encoded, quote the buffer (no folding is performed). if (!encodingNeeded && @@ -600,8 +621,12 @@ void word::generate(const generationContext& ctx, utility::outputStream& os, wordEncoder wordEnc(m_buffer, m_charset); - const string wordStart("=?" + m_charset.getName() + "?" + - (wordEnc.getEncoding() == wordEncoder::ENCODING_B64 ? 'B' : 'Q') + "?"); + const string wordStart("=?" + + m_charset.getName() + + (m_lang.empty() ? "" : string("*") + m_lang) + + "?" + + (wordEnc.getEncoding() == wordEncoder::ENCODING_B64 ? 'B' : 'Q') + + "?"); const string wordEnd("?="); const size_t minWordLength = wordStart.length() + wordEnd.length(); @@ -690,6 +715,7 @@ word& word::operator=(const word& w) { m_buffer = w.m_buffer; m_charset = w.m_charset; + m_lang = w.m_lang; return (*this); } @@ -698,6 +724,7 @@ word& word::operator=(const string& s) { m_buffer = s; m_charset = charset::getLocalCharset(); + m_lang.clear(); return (*this); } @@ -708,18 +735,19 @@ void word::copyFrom(const component& other) m_buffer = w.m_buffer; m_charset = w.m_charset; + m_lang = w.m_lang; } bool word::operator==(const word& w) const { - return (m_charset == w.m_charset && m_buffer == w.m_buffer); + return (m_charset == w.m_charset && m_buffer == w.m_buffer && m_lang == w.m_lang); } bool word::operator!=(const word& w) const { - return (m_charset != w.m_charset || m_buffer != w.m_buffer); + return (m_charset != w.m_charset || m_buffer != w.m_buffer || m_lang != w.m_lang); } @@ -769,6 +797,18 @@ void word::setCharset(const charset& ch) } +const string word::getLanguage() const +{ + return m_lang; +} + + +void word::setLanguage(const string& lang) +{ + m_lang = lang; +} + + const string& word::getBuffer() const { return (m_buffer); diff --git a/src/vmime/word.hpp b/src/vmime/word.hpp index 4122228d..0e60225e 100644 --- a/src/vmime/word.hpp +++ b/src/vmime/word.hpp @@ -48,6 +48,7 @@ public: word(const word& w); word(const string& buffer); // Defaults to local charset word(const string& buffer, const charset& charset); + word(const string& buffer, const charset& charset, const string& lang); /** Return the raw data for this encoded word. * @@ -85,6 +86,20 @@ public: */ void setCharset(const charset& ch); + /** Return the language used in this word (optional). + * If not specified, the value is empty. + * + * @return language tag for this word, in the format specified + * by RFC-1766 + */ + const string getLanguage() const; + + /** Set the language used in this word (optional). + * + * @param lang language tag, in the format specified by RFC-1766 + */ + void setLanguage(const string& lang); + /** Returns whether two words actually represent the same text, * regardless of their charset. * @@ -194,6 +209,7 @@ private: // in the specified "m_charset". string m_buffer; charset m_charset; + string m_lang; }; diff --git a/src/vmime/wordEncoder.cpp b/src/vmime/wordEncoder.cpp index 421a9ecd..b40f5371 100644 --- a/src/vmime/wordEncoder.cpp +++ b/src/vmime/wordEncoder.cpp @@ -226,7 +226,8 @@ wordEncoder::Encoding wordEncoder::getEncoding() const // static bool wordEncoder::isEncodingNeeded - (const generationContext& ctx, const string& buffer, const charset& charset) + (const generationContext& ctx, const string& buffer, + const charset& charset, const string& lang) { if (!ctx.getInternationalizedEmailSupport()) { @@ -250,6 +251,10 @@ bool wordEncoder::isEncodingNeeded if (buffer.find("=?") != string::npos || buffer.find("?=") != string::npos) return true; + // If a language is specified, force encoding + if (!lang.empty()) + return true; + return false; } diff --git a/src/vmime/wordEncoder.hpp b/src/vmime/wordEncoder.hpp index 6f652fa2..8abd3d3c 100644 --- a/src/vmime/wordEncoder.hpp +++ b/src/vmime/wordEncoder.hpp @@ -78,9 +78,11 @@ public: * @param ctx generation context * @param buffer buffer to analyze * @param charset charset of the buffer + * @param lang language code, in the format specified by RFC-1766 * @return true if encoding is needed, false otherwise. */ - static bool isEncodingNeeded(const generationContext& ctx, const string& buffer, const charset& charset); + static bool isEncodingNeeded(const generationContext& ctx, const string& buffer, + const charset& charset, const string& lang); /** Guess the best RFC-2047 encoding to use for the specified buffer. * diff --git a/tests/parser/parameterTest.cpp b/tests/parser/parameterTest.cpp index 12c17444..cf8f8477 100644 --- a/tests/parser/parameterTest.cpp +++ b/tests/parser/parameterTest.cpp @@ -81,6 +81,8 @@ VMIME_TEST_SUITE_BEGIN(parameterTest) #define PARAM_NAME(p, n) (p.getParameterAt(n)->getName()) #define PARAM_CHARSET(p, n) \ (p.getParameterAt(n)->getValue().getCharset().generate()) +#define PARAM_LANG(p, n) \ + (p.getParameterAt(n)->getValue().getLanguage()) #define PARAM_BUFFER(p, n) \ (p.getParameterAt(n)->getValue().getBuffer()) @@ -235,6 +237,16 @@ VMIME_TEST_SUITE_BEGIN(parameterTest) VASSERT_EQ("5.2", "param1", PARAM_NAME(p5, 0)); VASSERT_EQ("5.3", "us-ascii", PARAM_CHARSET(p5, 0)); VASSERT_EQ("5.4", "value1", PARAM_BUFFER(p5, 0)); + + // Language specification + parameterizedHeaderField p6; + p6.parse("X; param1*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A"); + + VASSERT_EQ("6.1", 1, p6.getParameterCount()); + VASSERT_EQ("6.2", "param1", PARAM_NAME(p6, 0)); + VASSERT_EQ("6.3", "us-ascii", PARAM_CHARSET(p6, 0)); + VASSERT_EQ("6.4", "en-us", PARAM_LANG(p6, 0)); + VASSERT_EQ("6.5", "This is ***fun***", PARAM_BUFFER(p6, 0)); } void testGenerate() @@ -370,6 +382,25 @@ VMIME_TEST_SUITE_BEGIN(parameterTest) VASSERT_EQ("4.both", "F: X; param1=\"va lue\"", p4.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047)); + + // Language specification + parameterizedHeaderField p5; + p5.appendParameter(vmime::make_shared ("param1", + vmime::word("This is ***fun***", vmime::charset("us-ascii"), "en-us"))); + + VASSERT_EQ("5.no-encoding", "F: X; param1=\"This is ***fun***\"", + p5.generate(vmime::generationContext::PARAMETER_VALUE_NO_ENCODING)); + + VASSERT_EQ("5.rfc2047", "F: X; param1=\"=?us-ascii*en-us?Q?This_is_***fun***?=\"", + p5.generate(vmime::generationContext::PARAMETER_VALUE_RFC2047_ONLY)); + + VASSERT_EQ("5.rfc2231", "F: X; param1*=us-ascii''This%20is%20***fun***", + p5.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_ONLY)); + + VASSERT_EQ("5.both", "F: X; " + "param1=\"=?us-ascii*en-us?Q?This_is_***fun***?=\";\r\n " + "param1*=us-ascii''This%20is%20***fun***", + p5.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047)); } void testNonStandardEncodedParam() diff --git a/tests/parser/textTest.cpp b/tests/parser/textTest.cpp index f56f972d..58d5bff5 100644 --- a/tests/parser/textTest.cpp +++ b/tests/parser/textTest.cpp @@ -221,12 +221,21 @@ VMIME_TEST_SUITE_BEGIN(textTest) VASSERT_EQ("6", "[text: [[word: charset=iso-8859-1, buffer=Know wh\xe4t? It works!]]]", parseText("=?iso-8859-1?Q?Know_wh=E4t?_It_works!?=")); - // TODO: add more + // With language specifier + VASSERT_EQ("7", "[text: [[word: charset=US-ASCII, buffer=Keith Moore, lang=EN]]]", + parseText("=?US-ASCII*EN?Q?Keith_Moore?=")); } void testGenerate() { // TODO + + // With language specifier + vmime::word wlang1("Émeline", vmime::charset("UTF-8"), "FR"); + VASSERT_EQ("lang1", "=?UTF-8*FR?Q?=C3=89meline?=", wlang1.generate()); + + vmime::word wlang2("Keith Moore", vmime::charset("US-ASCII"), "EN"); + VASSERT_EQ("lang2", "=?US-ASCII*EN?Q?Keith_Moore?=", wlang2.generate()); } void testDisplayForm() diff --git a/tests/parser/wordEncoderTest.cpp b/tests/parser/wordEncoderTest.cpp index 9bc4dcfd..c3c44a87 100644 --- a/tests/parser/wordEncoderTest.cpp +++ b/tests/parser/wordEncoderTest.cpp @@ -32,6 +32,7 @@ VMIME_TEST_SUITE_BEGIN(wordEncoderTest) VMIME_TEST(testGetNextChunk) VMIME_TEST(testGetNextChunk_integral) VMIME_TEST(testIsEncodingNeeded_ascii) + VMIME_TEST(testIsEncodingNeeded_withLanguage) VMIME_TEST(testIsEncodingNeeded_specialChars) VMIME_TEST(testGuessBestEncoding_QP) VMIME_TEST(testGuessBestEncoding_B64) @@ -70,25 +71,31 @@ VMIME_TEST_SUITE_BEGIN(wordEncoderTest) ctx.setInternationalizedEmailSupport(false); VASSERT_FALSE("ascii", vmime::wordEncoder::isEncodingNeeded - (ctx, "ASCII-only buffer", vmime::charset("utf-8"))); + (ctx, "ASCII-only buffer", vmime::charset("utf-8"), "")); VASSERT_TRUE("non-ascii", vmime::wordEncoder::isEncodingNeeded - (ctx, "Buffer with some UTF-8 '\xc3\xa0'", vmime::charset("utf-8"))); + (ctx, "Buffer with some UTF-8 '\xc3\xa0'", vmime::charset("utf-8"), "")); + } + + void testIsEncodingNeeded_withLanguage() + { + VASSERT_TRUE("ascii", vmime::wordEncoder::isEncodingNeeded + (vmime::generationContext::getDefaultContext(), "ASCII-only buffer", vmime::charset("utf-8"), "en")); } void testIsEncodingNeeded_specialChars() { VASSERT_TRUE("rfc2047", vmime::wordEncoder::isEncodingNeeded (vmime::generationContext::getDefaultContext(), - "foo bar =? foo bar", vmime::charset("us-ascii"))); + "foo bar =? foo bar", vmime::charset("us-ascii"), "")); VASSERT_TRUE("new line 1", vmime::wordEncoder::isEncodingNeeded (vmime::generationContext::getDefaultContext(), - "foo bar \n foo bar", vmime::charset("us-ascii"))); + "foo bar \n foo bar", vmime::charset("us-ascii"), "")); VASSERT_TRUE("new line 2", vmime::wordEncoder::isEncodingNeeded (vmime::generationContext::getDefaultContext(), - "foo bar \r foo bar", vmime::charset("us-ascii"))); + "foo bar \r foo bar", vmime::charset("us-ascii"), "")); } void testGuessBestEncoding_QP() diff --git a/tests/testUtils.hpp b/tests/testUtils.hpp index 2476787b..367be623 100644 --- a/tests/testUtils.hpp +++ b/tests/testUtils.hpp @@ -119,7 +119,14 @@ inline std::ostream& operator<<(std::ostream& os, const vmime::charset& ch) inline std::ostream& operator<<(std::ostream& os, const vmime::word& w) { - os << "[word: charset=" << w.getCharset().getName() << ", buffer=" << w.getBuffer() << "]"; + os << "[word: charset=" << w.getCharset().getName() + << ", buffer=" << w.getBuffer(); + + if (!w.getLanguage().empty()) + os << ", lang=" << w.getLanguage(); + + os << "]"; + return (os); }