Added support for language specification in RFC-2047 encoded words and RFC-2231 parameter values.

This commit is contained in:
Vincent Richard 2014-06-30 22:48:42 +02:00
parent 0863f50c26
commit 03a0e36e91
9 changed files with 139 additions and 18 deletions

View File

@ -134,6 +134,7 @@ void parameter::parse(const parsingContext& ctx, const std::vector <valueChunk>&
bool foundCharsetChunk = false;
charset ch(charsets::US_ASCII);
string lang;
std::ostringstream value;
value.imbue(std::locale::classic());
@ -170,7 +171,9 @@ void parameter::parse(const parsingContext& ctx, const std::vector <valueChunk>&
if (q != string::npos)
{
// Ignore language
// Extract language
lang = chunk.data.substr(pos, q - pos);
++q;
pos = q;
}
@ -268,6 +271,7 @@ void parameter::parse(const parsingContext& ctx, const std::vector <valueChunk>&
m_value->setBuffer(value.str());
m_value->setCharset(ch);
m_value->setLanguage(lang);
}
@ -372,7 +376,7 @@ void parameter::generateImpl
const bool alwaysEncode = m_value->getCharset().getRecommendedEncoding(recommendedEnc);
bool extended = alwaysEncode;
if ((needQuotedPrintable || cutValue) &&
if ((needQuotedPrintable || cutValue || !m_value->getLanguage().empty()) &&
genMode != generationContext::PARAMETER_VALUE_NO_ENCODING)
{
// Send the name in quoted-printable, so outlook express et.al.

View File

@ -48,7 +48,8 @@ word::word()
word::word(const word& w)
: headerFieldValue(), m_buffer(w.m_buffer), m_charset(w.m_charset)
: headerFieldValue(), m_buffer(w.m_buffer),
m_charset(w.m_charset), m_lang(w.m_lang)
{
}
@ -65,6 +66,12 @@ word::word(const string& buffer, const charset& charset)
}
word::word(const string& buffer, const charset& charset, const string& lang)
: m_buffer(buffer), m_charset(charset), m_lang(lang)
{
}
shared_ptr <word> word::parseNext
(const parsingContext& ctx, const string& buffer, const size_t position,
const size_t end, size_t* newPosition,
@ -296,6 +303,21 @@ void word::parseImpl
if (theEncoder)
{
// Extract charset and language
const string charsetAndLang(charsetPos, charsetEnd);
const string::size_type asteriskPos = charsetAndLang.find('*');
if (asteriskPos != string::npos)
{
m_charset = charset(string(charsetAndLang.begin(), charsetAndLang.begin() + asteriskPos));
m_lang = string(charsetAndLang.begin() + asteriskPos + 1, charsetAndLang.end());
}
else
{
m_charset = charset(charsetAndLang);
m_lang.clear();
}
// Decode text
string decodedBuffer;
@ -306,7 +328,6 @@ void word::parseImpl
delete (theEncoder);
m_buffer = decodedBuffer;
m_charset = charset(string(charsetPos, charsetEnd));
setParsedBounds(position, p - buffer.begin());
@ -358,7 +379,7 @@ void word::generate(const generationContext& ctx, utility::outputStream& os,
else if ((flags & text::FORCE_ENCODING) != 0)
encodingNeeded = true;
else // auto-detect
encodingNeeded = wordEncoder::isEncodingNeeded(ctx, m_buffer, m_charset);
encodingNeeded = wordEncoder::isEncodingNeeded(ctx, m_buffer, m_charset, m_lang);
// If text does not need to be encoded, quote the buffer (no folding is performed).
if (!encodingNeeded &&
@ -600,8 +621,12 @@ void word::generate(const generationContext& ctx, utility::outputStream& os,
wordEncoder wordEnc(m_buffer, m_charset);
const string wordStart("=?" + m_charset.getName() + "?" +
(wordEnc.getEncoding() == wordEncoder::ENCODING_B64 ? 'B' : 'Q') + "?");
const string wordStart("=?"
+ m_charset.getName()
+ (m_lang.empty() ? "" : string("*") + m_lang)
+ "?"
+ (wordEnc.getEncoding() == wordEncoder::ENCODING_B64 ? 'B' : 'Q')
+ "?");
const string wordEnd("?=");
const size_t minWordLength = wordStart.length() + wordEnd.length();
@ -690,6 +715,7 @@ word& word::operator=(const word& w)
{
m_buffer = w.m_buffer;
m_charset = w.m_charset;
m_lang = w.m_lang;
return (*this);
}
@ -698,6 +724,7 @@ word& word::operator=(const string& s)
{
m_buffer = s;
m_charset = charset::getLocalCharset();
m_lang.clear();
return (*this);
}
@ -708,18 +735,19 @@ void word::copyFrom(const component& other)
m_buffer = w.m_buffer;
m_charset = w.m_charset;
m_lang = w.m_lang;
}
bool word::operator==(const word& w) const
{
return (m_charset == w.m_charset && m_buffer == w.m_buffer);
return (m_charset == w.m_charset && m_buffer == w.m_buffer && m_lang == w.m_lang);
}
bool word::operator!=(const word& w) const
{
return (m_charset != w.m_charset || m_buffer != w.m_buffer);
return (m_charset != w.m_charset || m_buffer != w.m_buffer || m_lang != w.m_lang);
}
@ -769,6 +797,18 @@ void word::setCharset(const charset& ch)
}
const string word::getLanguage() const
{
return m_lang;
}
void word::setLanguage(const string& lang)
{
m_lang = lang;
}
const string& word::getBuffer() const
{
return (m_buffer);

View File

@ -48,6 +48,7 @@ public:
word(const word& w);
word(const string& buffer); // Defaults to local charset
word(const string& buffer, const charset& charset);
word(const string& buffer, const charset& charset, const string& lang);
/** Return the raw data for this encoded word.
*
@ -85,6 +86,20 @@ public:
*/
void setCharset(const charset& ch);
/** Return the language used in this word (optional).
* If not specified, the value is empty.
*
* @return language tag for this word, in the format specified
* by RFC-1766
*/
const string getLanguage() const;
/** Set the language used in this word (optional).
*
* @param lang language tag, in the format specified by RFC-1766
*/
void setLanguage(const string& lang);
/** Returns whether two words actually represent the same text,
* regardless of their charset.
*
@ -194,6 +209,7 @@ private:
// in the specified "m_charset".
string m_buffer;
charset m_charset;
string m_lang;
};

View File

@ -226,7 +226,8 @@ wordEncoder::Encoding wordEncoder::getEncoding() const
// static
bool wordEncoder::isEncodingNeeded
(const generationContext& ctx, const string& buffer, const charset& charset)
(const generationContext& ctx, const string& buffer,
const charset& charset, const string& lang)
{
if (!ctx.getInternationalizedEmailSupport())
{
@ -250,6 +251,10 @@ bool wordEncoder::isEncodingNeeded
if (buffer.find("=?") != string::npos || buffer.find("?=") != string::npos)
return true;
// If a language is specified, force encoding
if (!lang.empty())
return true;
return false;
}

View File

@ -78,9 +78,11 @@ public:
* @param ctx generation context
* @param buffer buffer to analyze
* @param charset charset of the buffer
* @param lang language code, in the format specified by RFC-1766
* @return true if encoding is needed, false otherwise.
*/
static bool isEncodingNeeded(const generationContext& ctx, const string& buffer, const charset& charset);
static bool isEncodingNeeded(const generationContext& ctx, const string& buffer,
const charset& charset, const string& lang);
/** Guess the best RFC-2047 encoding to use for the specified buffer.
*

View File

@ -81,6 +81,8 @@ VMIME_TEST_SUITE_BEGIN(parameterTest)
#define PARAM_NAME(p, n) (p.getParameterAt(n)->getName())
#define PARAM_CHARSET(p, n) \
(p.getParameterAt(n)->getValue().getCharset().generate())
#define PARAM_LANG(p, n) \
(p.getParameterAt(n)->getValue().getLanguage())
#define PARAM_BUFFER(p, n) \
(p.getParameterAt(n)->getValue().getBuffer())
@ -235,6 +237,16 @@ VMIME_TEST_SUITE_BEGIN(parameterTest)
VASSERT_EQ("5.2", "param1", PARAM_NAME(p5, 0));
VASSERT_EQ("5.3", "us-ascii", PARAM_CHARSET(p5, 0));
VASSERT_EQ("5.4", "value1", PARAM_BUFFER(p5, 0));
// Language specification
parameterizedHeaderField p6;
p6.parse("X; param1*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A");
VASSERT_EQ("6.1", 1, p6.getParameterCount());
VASSERT_EQ("6.2", "param1", PARAM_NAME(p6, 0));
VASSERT_EQ("6.3", "us-ascii", PARAM_CHARSET(p6, 0));
VASSERT_EQ("6.4", "en-us", PARAM_LANG(p6, 0));
VASSERT_EQ("6.5", "This is ***fun***", PARAM_BUFFER(p6, 0));
}
void testGenerate()
@ -370,6 +382,25 @@ VMIME_TEST_SUITE_BEGIN(parameterTest)
VASSERT_EQ("4.both", "F: X; param1=\"va lue\"",
p4.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047));
// Language specification
parameterizedHeaderField p5;
p5.appendParameter(vmime::make_shared <vmime::parameter>("param1",
vmime::word("This is ***fun***", vmime::charset("us-ascii"), "en-us")));
VASSERT_EQ("5.no-encoding", "F: X; param1=\"This is ***fun***\"",
p5.generate(vmime::generationContext::PARAMETER_VALUE_NO_ENCODING));
VASSERT_EQ("5.rfc2047", "F: X; param1=\"=?us-ascii*en-us?Q?This_is_***fun***?=\"",
p5.generate(vmime::generationContext::PARAMETER_VALUE_RFC2047_ONLY));
VASSERT_EQ("5.rfc2231", "F: X; param1*=us-ascii''This%20is%20***fun***",
p5.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_ONLY));
VASSERT_EQ("5.both", "F: X; "
"param1=\"=?us-ascii*en-us?Q?This_is_***fun***?=\";\r\n "
"param1*=us-ascii''This%20is%20***fun***",
p5.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047));
}
void testNonStandardEncodedParam()

View File

@ -221,12 +221,21 @@ VMIME_TEST_SUITE_BEGIN(textTest)
VASSERT_EQ("6", "[text: [[word: charset=iso-8859-1, buffer=Know wh\xe4t? It works!]]]",
parseText("=?iso-8859-1?Q?Know_wh=E4t?_It_works!?="));
// TODO: add more
// With language specifier
VASSERT_EQ("7", "[text: [[word: charset=US-ASCII, buffer=Keith Moore, lang=EN]]]",
parseText("=?US-ASCII*EN?Q?Keith_Moore?="));
}
void testGenerate()
{
// TODO
// With language specifier
vmime::word wlang1("Émeline", vmime::charset("UTF-8"), "FR");
VASSERT_EQ("lang1", "=?UTF-8*FR?Q?=C3=89meline?=", wlang1.generate());
vmime::word wlang2("Keith Moore", vmime::charset("US-ASCII"), "EN");
VASSERT_EQ("lang2", "=?US-ASCII*EN?Q?Keith_Moore?=", wlang2.generate());
}
void testDisplayForm()

View File

@ -32,6 +32,7 @@ VMIME_TEST_SUITE_BEGIN(wordEncoderTest)
VMIME_TEST(testGetNextChunk)
VMIME_TEST(testGetNextChunk_integral)
VMIME_TEST(testIsEncodingNeeded_ascii)
VMIME_TEST(testIsEncodingNeeded_withLanguage)
VMIME_TEST(testIsEncodingNeeded_specialChars)
VMIME_TEST(testGuessBestEncoding_QP)
VMIME_TEST(testGuessBestEncoding_B64)
@ -70,25 +71,31 @@ VMIME_TEST_SUITE_BEGIN(wordEncoderTest)
ctx.setInternationalizedEmailSupport(false);
VASSERT_FALSE("ascii", vmime::wordEncoder::isEncodingNeeded
(ctx, "ASCII-only buffer", vmime::charset("utf-8")));
(ctx, "ASCII-only buffer", vmime::charset("utf-8"), ""));
VASSERT_TRUE("non-ascii", vmime::wordEncoder::isEncodingNeeded
(ctx, "Buffer with some UTF-8 '\xc3\xa0'", vmime::charset("utf-8")));
(ctx, "Buffer with some UTF-8 '\xc3\xa0'", vmime::charset("utf-8"), ""));
}
void testIsEncodingNeeded_withLanguage()
{
VASSERT_TRUE("ascii", vmime::wordEncoder::isEncodingNeeded
(vmime::generationContext::getDefaultContext(), "ASCII-only buffer", vmime::charset("utf-8"), "en"));
}
void testIsEncodingNeeded_specialChars()
{
VASSERT_TRUE("rfc2047", vmime::wordEncoder::isEncodingNeeded
(vmime::generationContext::getDefaultContext(),
"foo bar =? foo bar", vmime::charset("us-ascii")));
"foo bar =? foo bar", vmime::charset("us-ascii"), ""));
VASSERT_TRUE("new line 1", vmime::wordEncoder::isEncodingNeeded
(vmime::generationContext::getDefaultContext(),
"foo bar \n foo bar", vmime::charset("us-ascii")));
"foo bar \n foo bar", vmime::charset("us-ascii"), ""));
VASSERT_TRUE("new line 2", vmime::wordEncoder::isEncodingNeeded
(vmime::generationContext::getDefaultContext(),
"foo bar \r foo bar", vmime::charset("us-ascii")));
"foo bar \r foo bar", vmime::charset("us-ascii"), ""));
}
void testGuessBestEncoding_QP()

View File

@ -119,7 +119,14 @@ inline std::ostream& operator<<(std::ostream& os, const vmime::charset& ch)
inline std::ostream& operator<<(std::ostream& os, const vmime::word& w)
{
os << "[word: charset=" << w.getCharset().getName() << ", buffer=" << w.getBuffer() << "]";
os << "[word: charset=" << w.getCharset().getName()
<< ", buffer=" << w.getBuffer();
if (!w.getLanguage().empty())
os << ", lang=" << w.getLanguage();
os << "]";
return (os);
}