Added support for language specification in RFC-2047 encoded words and RFC-2231 parameter values.

This commit is contained in:
Vincent Richard 2014-06-30 22:48:42 +02:00
parent 0863f50c26
commit 03a0e36e91
9 changed files with 139 additions and 18 deletions

View File

@ -134,6 +134,7 @@ void parameter::parse(const parsingContext& ctx, const std::vector <valueChunk>&
bool foundCharsetChunk = false; bool foundCharsetChunk = false;
charset ch(charsets::US_ASCII); charset ch(charsets::US_ASCII);
string lang;
std::ostringstream value; std::ostringstream value;
value.imbue(std::locale::classic()); value.imbue(std::locale::classic());
@ -170,7 +171,9 @@ void parameter::parse(const parsingContext& ctx, const std::vector <valueChunk>&
if (q != string::npos) if (q != string::npos)
{ {
// Ignore language // Extract language
lang = chunk.data.substr(pos, q - pos);
++q; ++q;
pos = q; pos = q;
} }
@ -268,6 +271,7 @@ void parameter::parse(const parsingContext& ctx, const std::vector <valueChunk>&
m_value->setBuffer(value.str()); m_value->setBuffer(value.str());
m_value->setCharset(ch); m_value->setCharset(ch);
m_value->setLanguage(lang);
} }
@ -372,7 +376,7 @@ void parameter::generateImpl
const bool alwaysEncode = m_value->getCharset().getRecommendedEncoding(recommendedEnc); const bool alwaysEncode = m_value->getCharset().getRecommendedEncoding(recommendedEnc);
bool extended = alwaysEncode; bool extended = alwaysEncode;
if ((needQuotedPrintable || cutValue) && if ((needQuotedPrintable || cutValue || !m_value->getLanguage().empty()) &&
genMode != generationContext::PARAMETER_VALUE_NO_ENCODING) genMode != generationContext::PARAMETER_VALUE_NO_ENCODING)
{ {
// Send the name in quoted-printable, so outlook express et.al. // Send the name in quoted-printable, so outlook express et.al.

View File

@ -48,7 +48,8 @@ word::word()
word::word(const word& w) word::word(const word& w)
: headerFieldValue(), m_buffer(w.m_buffer), m_charset(w.m_charset) : headerFieldValue(), m_buffer(w.m_buffer),
m_charset(w.m_charset), m_lang(w.m_lang)
{ {
} }
@ -65,6 +66,12 @@ word::word(const string& buffer, const charset& charset)
} }
word::word(const string& buffer, const charset& charset, const string& lang)
: m_buffer(buffer), m_charset(charset), m_lang(lang)
{
}
shared_ptr <word> word::parseNext shared_ptr <word> word::parseNext
(const parsingContext& ctx, const string& buffer, const size_t position, (const parsingContext& ctx, const string& buffer, const size_t position,
const size_t end, size_t* newPosition, const size_t end, size_t* newPosition,
@ -296,6 +303,21 @@ void word::parseImpl
if (theEncoder) if (theEncoder)
{ {
// Extract charset and language
const string charsetAndLang(charsetPos, charsetEnd);
const string::size_type asteriskPos = charsetAndLang.find('*');
if (asteriskPos != string::npos)
{
m_charset = charset(string(charsetAndLang.begin(), charsetAndLang.begin() + asteriskPos));
m_lang = string(charsetAndLang.begin() + asteriskPos + 1, charsetAndLang.end());
}
else
{
m_charset = charset(charsetAndLang);
m_lang.clear();
}
// Decode text // Decode text
string decodedBuffer; string decodedBuffer;
@ -306,7 +328,6 @@ void word::parseImpl
delete (theEncoder); delete (theEncoder);
m_buffer = decodedBuffer; m_buffer = decodedBuffer;
m_charset = charset(string(charsetPos, charsetEnd));
setParsedBounds(position, p - buffer.begin()); setParsedBounds(position, p - buffer.begin());
@ -358,7 +379,7 @@ void word::generate(const generationContext& ctx, utility::outputStream& os,
else if ((flags & text::FORCE_ENCODING) != 0) else if ((flags & text::FORCE_ENCODING) != 0)
encodingNeeded = true; encodingNeeded = true;
else // auto-detect else // auto-detect
encodingNeeded = wordEncoder::isEncodingNeeded(ctx, m_buffer, m_charset); encodingNeeded = wordEncoder::isEncodingNeeded(ctx, m_buffer, m_charset, m_lang);
// If text does not need to be encoded, quote the buffer (no folding is performed). // If text does not need to be encoded, quote the buffer (no folding is performed).
if (!encodingNeeded && if (!encodingNeeded &&
@ -600,8 +621,12 @@ void word::generate(const generationContext& ctx, utility::outputStream& os,
wordEncoder wordEnc(m_buffer, m_charset); wordEncoder wordEnc(m_buffer, m_charset);
const string wordStart("=?" + m_charset.getName() + "?" + const string wordStart("=?"
(wordEnc.getEncoding() == wordEncoder::ENCODING_B64 ? 'B' : 'Q') + "?"); + m_charset.getName()
+ (m_lang.empty() ? "" : string("*") + m_lang)
+ "?"
+ (wordEnc.getEncoding() == wordEncoder::ENCODING_B64 ? 'B' : 'Q')
+ "?");
const string wordEnd("?="); const string wordEnd("?=");
const size_t minWordLength = wordStart.length() + wordEnd.length(); const size_t minWordLength = wordStart.length() + wordEnd.length();
@ -690,6 +715,7 @@ word& word::operator=(const word& w)
{ {
m_buffer = w.m_buffer; m_buffer = w.m_buffer;
m_charset = w.m_charset; m_charset = w.m_charset;
m_lang = w.m_lang;
return (*this); return (*this);
} }
@ -698,6 +724,7 @@ word& word::operator=(const string& s)
{ {
m_buffer = s; m_buffer = s;
m_charset = charset::getLocalCharset(); m_charset = charset::getLocalCharset();
m_lang.clear();
return (*this); return (*this);
} }
@ -708,18 +735,19 @@ void word::copyFrom(const component& other)
m_buffer = w.m_buffer; m_buffer = w.m_buffer;
m_charset = w.m_charset; m_charset = w.m_charset;
m_lang = w.m_lang;
} }
bool word::operator==(const word& w) const bool word::operator==(const word& w) const
{ {
return (m_charset == w.m_charset && m_buffer == w.m_buffer); return (m_charset == w.m_charset && m_buffer == w.m_buffer && m_lang == w.m_lang);
} }
bool word::operator!=(const word& w) const bool word::operator!=(const word& w) const
{ {
return (m_charset != w.m_charset || m_buffer != w.m_buffer); return (m_charset != w.m_charset || m_buffer != w.m_buffer || m_lang != w.m_lang);
} }
@ -769,6 +797,18 @@ void word::setCharset(const charset& ch)
} }
const string word::getLanguage() const
{
return m_lang;
}
void word::setLanguage(const string& lang)
{
m_lang = lang;
}
const string& word::getBuffer() const const string& word::getBuffer() const
{ {
return (m_buffer); return (m_buffer);

View File

@ -48,6 +48,7 @@ public:
word(const word& w); word(const word& w);
word(const string& buffer); // Defaults to local charset word(const string& buffer); // Defaults to local charset
word(const string& buffer, const charset& charset); word(const string& buffer, const charset& charset);
word(const string& buffer, const charset& charset, const string& lang);
/** Return the raw data for this encoded word. /** Return the raw data for this encoded word.
* *
@ -85,6 +86,20 @@ public:
*/ */
void setCharset(const charset& ch); void setCharset(const charset& ch);
/** Return the language used in this word (optional).
* If not specified, the value is empty.
*
* @return language tag for this word, in the format specified
* by RFC-1766
*/
const string getLanguage() const;
/** Set the language used in this word (optional).
*
* @param lang language tag, in the format specified by RFC-1766
*/
void setLanguage(const string& lang);
/** Returns whether two words actually represent the same text, /** Returns whether two words actually represent the same text,
* regardless of their charset. * regardless of their charset.
* *
@ -194,6 +209,7 @@ private:
// in the specified "m_charset". // in the specified "m_charset".
string m_buffer; string m_buffer;
charset m_charset; charset m_charset;
string m_lang;
}; };

View File

@ -226,7 +226,8 @@ wordEncoder::Encoding wordEncoder::getEncoding() const
// static // static
bool wordEncoder::isEncodingNeeded bool wordEncoder::isEncodingNeeded
(const generationContext& ctx, const string& buffer, const charset& charset) (const generationContext& ctx, const string& buffer,
const charset& charset, const string& lang)
{ {
if (!ctx.getInternationalizedEmailSupport()) if (!ctx.getInternationalizedEmailSupport())
{ {
@ -250,6 +251,10 @@ bool wordEncoder::isEncodingNeeded
if (buffer.find("=?") != string::npos || buffer.find("?=") != string::npos) if (buffer.find("=?") != string::npos || buffer.find("?=") != string::npos)
return true; return true;
// If a language is specified, force encoding
if (!lang.empty())
return true;
return false; return false;
} }

View File

@ -78,9 +78,11 @@ public:
* @param ctx generation context * @param ctx generation context
* @param buffer buffer to analyze * @param buffer buffer to analyze
* @param charset charset of the buffer * @param charset charset of the buffer
* @param lang language code, in the format specified by RFC-1766
* @return true if encoding is needed, false otherwise. * @return true if encoding is needed, false otherwise.
*/ */
static bool isEncodingNeeded(const generationContext& ctx, const string& buffer, const charset& charset); static bool isEncodingNeeded(const generationContext& ctx, const string& buffer,
const charset& charset, const string& lang);
/** Guess the best RFC-2047 encoding to use for the specified buffer. /** Guess the best RFC-2047 encoding to use for the specified buffer.
* *

View File

@ -81,6 +81,8 @@ VMIME_TEST_SUITE_BEGIN(parameterTest)
#define PARAM_NAME(p, n) (p.getParameterAt(n)->getName()) #define PARAM_NAME(p, n) (p.getParameterAt(n)->getName())
#define PARAM_CHARSET(p, n) \ #define PARAM_CHARSET(p, n) \
(p.getParameterAt(n)->getValue().getCharset().generate()) (p.getParameterAt(n)->getValue().getCharset().generate())
#define PARAM_LANG(p, n) \
(p.getParameterAt(n)->getValue().getLanguage())
#define PARAM_BUFFER(p, n) \ #define PARAM_BUFFER(p, n) \
(p.getParameterAt(n)->getValue().getBuffer()) (p.getParameterAt(n)->getValue().getBuffer())
@ -235,6 +237,16 @@ VMIME_TEST_SUITE_BEGIN(parameterTest)
VASSERT_EQ("5.2", "param1", PARAM_NAME(p5, 0)); VASSERT_EQ("5.2", "param1", PARAM_NAME(p5, 0));
VASSERT_EQ("5.3", "us-ascii", PARAM_CHARSET(p5, 0)); VASSERT_EQ("5.3", "us-ascii", PARAM_CHARSET(p5, 0));
VASSERT_EQ("5.4", "value1", PARAM_BUFFER(p5, 0)); VASSERT_EQ("5.4", "value1", PARAM_BUFFER(p5, 0));
// Language specification
parameterizedHeaderField p6;
p6.parse("X; param1*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A");
VASSERT_EQ("6.1", 1, p6.getParameterCount());
VASSERT_EQ("6.2", "param1", PARAM_NAME(p6, 0));
VASSERT_EQ("6.3", "us-ascii", PARAM_CHARSET(p6, 0));
VASSERT_EQ("6.4", "en-us", PARAM_LANG(p6, 0));
VASSERT_EQ("6.5", "This is ***fun***", PARAM_BUFFER(p6, 0));
} }
void testGenerate() void testGenerate()
@ -370,6 +382,25 @@ VMIME_TEST_SUITE_BEGIN(parameterTest)
VASSERT_EQ("4.both", "F: X; param1=\"va lue\"", VASSERT_EQ("4.both", "F: X; param1=\"va lue\"",
p4.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047)); p4.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047));
// Language specification
parameterizedHeaderField p5;
p5.appendParameter(vmime::make_shared <vmime::parameter>("param1",
vmime::word("This is ***fun***", vmime::charset("us-ascii"), "en-us")));
VASSERT_EQ("5.no-encoding", "F: X; param1=\"This is ***fun***\"",
p5.generate(vmime::generationContext::PARAMETER_VALUE_NO_ENCODING));
VASSERT_EQ("5.rfc2047", "F: X; param1=\"=?us-ascii*en-us?Q?This_is_***fun***?=\"",
p5.generate(vmime::generationContext::PARAMETER_VALUE_RFC2047_ONLY));
VASSERT_EQ("5.rfc2231", "F: X; param1*=us-ascii''This%20is%20***fun***",
p5.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_ONLY));
VASSERT_EQ("5.both", "F: X; "
"param1=\"=?us-ascii*en-us?Q?This_is_***fun***?=\";\r\n "
"param1*=us-ascii''This%20is%20***fun***",
p5.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047));
} }
void testNonStandardEncodedParam() void testNonStandardEncodedParam()

View File

@ -221,12 +221,21 @@ VMIME_TEST_SUITE_BEGIN(textTest)
VASSERT_EQ("6", "[text: [[word: charset=iso-8859-1, buffer=Know wh\xe4t? It works!]]]", VASSERT_EQ("6", "[text: [[word: charset=iso-8859-1, buffer=Know wh\xe4t? It works!]]]",
parseText("=?iso-8859-1?Q?Know_wh=E4t?_It_works!?=")); parseText("=?iso-8859-1?Q?Know_wh=E4t?_It_works!?="));
// TODO: add more // With language specifier
VASSERT_EQ("7", "[text: [[word: charset=US-ASCII, buffer=Keith Moore, lang=EN]]]",
parseText("=?US-ASCII*EN?Q?Keith_Moore?="));
} }
void testGenerate() void testGenerate()
{ {
// TODO // TODO
// With language specifier
vmime::word wlang1("Émeline", vmime::charset("UTF-8"), "FR");
VASSERT_EQ("lang1", "=?UTF-8*FR?Q?=C3=89meline?=", wlang1.generate());
vmime::word wlang2("Keith Moore", vmime::charset("US-ASCII"), "EN");
VASSERT_EQ("lang2", "=?US-ASCII*EN?Q?Keith_Moore?=", wlang2.generate());
} }
void testDisplayForm() void testDisplayForm()

View File

@ -32,6 +32,7 @@ VMIME_TEST_SUITE_BEGIN(wordEncoderTest)
VMIME_TEST(testGetNextChunk) VMIME_TEST(testGetNextChunk)
VMIME_TEST(testGetNextChunk_integral) VMIME_TEST(testGetNextChunk_integral)
VMIME_TEST(testIsEncodingNeeded_ascii) VMIME_TEST(testIsEncodingNeeded_ascii)
VMIME_TEST(testIsEncodingNeeded_withLanguage)
VMIME_TEST(testIsEncodingNeeded_specialChars) VMIME_TEST(testIsEncodingNeeded_specialChars)
VMIME_TEST(testGuessBestEncoding_QP) VMIME_TEST(testGuessBestEncoding_QP)
VMIME_TEST(testGuessBestEncoding_B64) VMIME_TEST(testGuessBestEncoding_B64)
@ -70,25 +71,31 @@ VMIME_TEST_SUITE_BEGIN(wordEncoderTest)
ctx.setInternationalizedEmailSupport(false); ctx.setInternationalizedEmailSupport(false);
VASSERT_FALSE("ascii", vmime::wordEncoder::isEncodingNeeded VASSERT_FALSE("ascii", vmime::wordEncoder::isEncodingNeeded
(ctx, "ASCII-only buffer", vmime::charset("utf-8"))); (ctx, "ASCII-only buffer", vmime::charset("utf-8"), ""));
VASSERT_TRUE("non-ascii", vmime::wordEncoder::isEncodingNeeded VASSERT_TRUE("non-ascii", vmime::wordEncoder::isEncodingNeeded
(ctx, "Buffer with some UTF-8 '\xc3\xa0'", vmime::charset("utf-8"))); (ctx, "Buffer with some UTF-8 '\xc3\xa0'", vmime::charset("utf-8"), ""));
}
void testIsEncodingNeeded_withLanguage()
{
VASSERT_TRUE("ascii", vmime::wordEncoder::isEncodingNeeded
(vmime::generationContext::getDefaultContext(), "ASCII-only buffer", vmime::charset("utf-8"), "en"));
} }
void testIsEncodingNeeded_specialChars() void testIsEncodingNeeded_specialChars()
{ {
VASSERT_TRUE("rfc2047", vmime::wordEncoder::isEncodingNeeded VASSERT_TRUE("rfc2047", vmime::wordEncoder::isEncodingNeeded
(vmime::generationContext::getDefaultContext(), (vmime::generationContext::getDefaultContext(),
"foo bar =? foo bar", vmime::charset("us-ascii"))); "foo bar =? foo bar", vmime::charset("us-ascii"), ""));
VASSERT_TRUE("new line 1", vmime::wordEncoder::isEncodingNeeded VASSERT_TRUE("new line 1", vmime::wordEncoder::isEncodingNeeded
(vmime::generationContext::getDefaultContext(), (vmime::generationContext::getDefaultContext(),
"foo bar \n foo bar", vmime::charset("us-ascii"))); "foo bar \n foo bar", vmime::charset("us-ascii"), ""));
VASSERT_TRUE("new line 2", vmime::wordEncoder::isEncodingNeeded VASSERT_TRUE("new line 2", vmime::wordEncoder::isEncodingNeeded
(vmime::generationContext::getDefaultContext(), (vmime::generationContext::getDefaultContext(),
"foo bar \r foo bar", vmime::charset("us-ascii"))); "foo bar \r foo bar", vmime::charset("us-ascii"), ""));
} }
void testGuessBestEncoding_QP() void testGuessBestEncoding_QP()

View File

@ -119,7 +119,14 @@ inline std::ostream& operator<<(std::ostream& os, const vmime::charset& ch)
inline std::ostream& operator<<(std::ostream& os, const vmime::word& w) inline std::ostream& operator<<(std::ostream& os, const vmime::word& w)
{ {
os << "[word: charset=" << w.getCharset().getName() << ", buffer=" << w.getBuffer() << "]"; os << "[word: charset=" << w.getCharset().getName()
<< ", buffer=" << w.getBuffer();
if (!w.getLanguage().empty())
os << ", lang=" << w.getLanguage();
os << "]";
return (os); return (os);
} }