Added support for language specification in RFC-2047 encoded words and RFC-2231 parameter values.
This commit is contained in:
parent
0863f50c26
commit
03a0e36e91
@ -134,6 +134,7 @@ void parameter::parse(const parsingContext& ctx, const std::vector <valueChunk>&
|
||||
bool foundCharsetChunk = false;
|
||||
|
||||
charset ch(charsets::US_ASCII);
|
||||
string lang;
|
||||
|
||||
std::ostringstream value;
|
||||
value.imbue(std::locale::classic());
|
||||
@ -170,7 +171,9 @@ void parameter::parse(const parsingContext& ctx, const std::vector <valueChunk>&
|
||||
|
||||
if (q != string::npos)
|
||||
{
|
||||
// Ignore language
|
||||
// Extract language
|
||||
lang = chunk.data.substr(pos, q - pos);
|
||||
|
||||
++q;
|
||||
pos = q;
|
||||
}
|
||||
@ -268,6 +271,7 @@ void parameter::parse(const parsingContext& ctx, const std::vector <valueChunk>&
|
||||
|
||||
m_value->setBuffer(value.str());
|
||||
m_value->setCharset(ch);
|
||||
m_value->setLanguage(lang);
|
||||
}
|
||||
|
||||
|
||||
@ -372,7 +376,7 @@ void parameter::generateImpl
|
||||
const bool alwaysEncode = m_value->getCharset().getRecommendedEncoding(recommendedEnc);
|
||||
bool extended = alwaysEncode;
|
||||
|
||||
if ((needQuotedPrintable || cutValue) &&
|
||||
if ((needQuotedPrintable || cutValue || !m_value->getLanguage().empty()) &&
|
||||
genMode != generationContext::PARAMETER_VALUE_NO_ENCODING)
|
||||
{
|
||||
// Send the name in quoted-printable, so outlook express et.al.
|
||||
|
@ -48,7 +48,8 @@ word::word()
|
||||
|
||||
|
||||
word::word(const word& w)
|
||||
: headerFieldValue(), m_buffer(w.m_buffer), m_charset(w.m_charset)
|
||||
: headerFieldValue(), m_buffer(w.m_buffer),
|
||||
m_charset(w.m_charset), m_lang(w.m_lang)
|
||||
{
|
||||
}
|
||||
|
||||
@ -65,6 +66,12 @@ word::word(const string& buffer, const charset& charset)
|
||||
}
|
||||
|
||||
|
||||
word::word(const string& buffer, const charset& charset, const string& lang)
|
||||
: m_buffer(buffer), m_charset(charset), m_lang(lang)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
shared_ptr <word> word::parseNext
|
||||
(const parsingContext& ctx, const string& buffer, const size_t position,
|
||||
const size_t end, size_t* newPosition,
|
||||
@ -296,6 +303,21 @@ void word::parseImpl
|
||||
|
||||
if (theEncoder)
|
||||
{
|
||||
// Extract charset and language
|
||||
const string charsetAndLang(charsetPos, charsetEnd);
|
||||
const string::size_type asteriskPos = charsetAndLang.find('*');
|
||||
|
||||
if (asteriskPos != string::npos)
|
||||
{
|
||||
m_charset = charset(string(charsetAndLang.begin(), charsetAndLang.begin() + asteriskPos));
|
||||
m_lang = string(charsetAndLang.begin() + asteriskPos + 1, charsetAndLang.end());
|
||||
}
|
||||
else
|
||||
{
|
||||
m_charset = charset(charsetAndLang);
|
||||
m_lang.clear();
|
||||
}
|
||||
|
||||
// Decode text
|
||||
string decodedBuffer;
|
||||
|
||||
@ -306,7 +328,6 @@ void word::parseImpl
|
||||
delete (theEncoder);
|
||||
|
||||
m_buffer = decodedBuffer;
|
||||
m_charset = charset(string(charsetPos, charsetEnd));
|
||||
|
||||
setParsedBounds(position, p - buffer.begin());
|
||||
|
||||
@ -358,7 +379,7 @@ void word::generate(const generationContext& ctx, utility::outputStream& os,
|
||||
else if ((flags & text::FORCE_ENCODING) != 0)
|
||||
encodingNeeded = true;
|
||||
else // auto-detect
|
||||
encodingNeeded = wordEncoder::isEncodingNeeded(ctx, m_buffer, m_charset);
|
||||
encodingNeeded = wordEncoder::isEncodingNeeded(ctx, m_buffer, m_charset, m_lang);
|
||||
|
||||
// If text does not need to be encoded, quote the buffer (no folding is performed).
|
||||
if (!encodingNeeded &&
|
||||
@ -600,8 +621,12 @@ void word::generate(const generationContext& ctx, utility::outputStream& os,
|
||||
|
||||
wordEncoder wordEnc(m_buffer, m_charset);
|
||||
|
||||
const string wordStart("=?" + m_charset.getName() + "?" +
|
||||
(wordEnc.getEncoding() == wordEncoder::ENCODING_B64 ? 'B' : 'Q') + "?");
|
||||
const string wordStart("=?"
|
||||
+ m_charset.getName()
|
||||
+ (m_lang.empty() ? "" : string("*") + m_lang)
|
||||
+ "?"
|
||||
+ (wordEnc.getEncoding() == wordEncoder::ENCODING_B64 ? 'B' : 'Q')
|
||||
+ "?");
|
||||
const string wordEnd("?=");
|
||||
|
||||
const size_t minWordLength = wordStart.length() + wordEnd.length();
|
||||
@ -690,6 +715,7 @@ word& word::operator=(const word& w)
|
||||
{
|
||||
m_buffer = w.m_buffer;
|
||||
m_charset = w.m_charset;
|
||||
m_lang = w.m_lang;
|
||||
return (*this);
|
||||
}
|
||||
|
||||
@ -698,6 +724,7 @@ word& word::operator=(const string& s)
|
||||
{
|
||||
m_buffer = s;
|
||||
m_charset = charset::getLocalCharset();
|
||||
m_lang.clear();
|
||||
return (*this);
|
||||
}
|
||||
|
||||
@ -708,18 +735,19 @@ void word::copyFrom(const component& other)
|
||||
|
||||
m_buffer = w.m_buffer;
|
||||
m_charset = w.m_charset;
|
||||
m_lang = w.m_lang;
|
||||
}
|
||||
|
||||
|
||||
bool word::operator==(const word& w) const
|
||||
{
|
||||
return (m_charset == w.m_charset && m_buffer == w.m_buffer);
|
||||
return (m_charset == w.m_charset && m_buffer == w.m_buffer && m_lang == w.m_lang);
|
||||
}
|
||||
|
||||
|
||||
bool word::operator!=(const word& w) const
|
||||
{
|
||||
return (m_charset != w.m_charset || m_buffer != w.m_buffer);
|
||||
return (m_charset != w.m_charset || m_buffer != w.m_buffer || m_lang != w.m_lang);
|
||||
}
|
||||
|
||||
|
||||
@ -769,6 +797,18 @@ void word::setCharset(const charset& ch)
|
||||
}
|
||||
|
||||
|
||||
const string word::getLanguage() const
|
||||
{
|
||||
return m_lang;
|
||||
}
|
||||
|
||||
|
||||
void word::setLanguage(const string& lang)
|
||||
{
|
||||
m_lang = lang;
|
||||
}
|
||||
|
||||
|
||||
const string& word::getBuffer() const
|
||||
{
|
||||
return (m_buffer);
|
||||
|
@ -48,6 +48,7 @@ public:
|
||||
word(const word& w);
|
||||
word(const string& buffer); // Defaults to local charset
|
||||
word(const string& buffer, const charset& charset);
|
||||
word(const string& buffer, const charset& charset, const string& lang);
|
||||
|
||||
/** Return the raw data for this encoded word.
|
||||
*
|
||||
@ -85,6 +86,20 @@ public:
|
||||
*/
|
||||
void setCharset(const charset& ch);
|
||||
|
||||
/** Return the language used in this word (optional).
|
||||
* If not specified, the value is empty.
|
||||
*
|
||||
* @return language tag for this word, in the format specified
|
||||
* by RFC-1766
|
||||
*/
|
||||
const string getLanguage() const;
|
||||
|
||||
/** Set the language used in this word (optional).
|
||||
*
|
||||
* @param lang language tag, in the format specified by RFC-1766
|
||||
*/
|
||||
void setLanguage(const string& lang);
|
||||
|
||||
/** Returns whether two words actually represent the same text,
|
||||
* regardless of their charset.
|
||||
*
|
||||
@ -194,6 +209,7 @@ private:
|
||||
// in the specified "m_charset".
|
||||
string m_buffer;
|
||||
charset m_charset;
|
||||
string m_lang;
|
||||
};
|
||||
|
||||
|
||||
|
@ -226,7 +226,8 @@ wordEncoder::Encoding wordEncoder::getEncoding() const
|
||||
|
||||
// static
|
||||
bool wordEncoder::isEncodingNeeded
|
||||
(const generationContext& ctx, const string& buffer, const charset& charset)
|
||||
(const generationContext& ctx, const string& buffer,
|
||||
const charset& charset, const string& lang)
|
||||
{
|
||||
if (!ctx.getInternationalizedEmailSupport())
|
||||
{
|
||||
@ -250,6 +251,10 @@ bool wordEncoder::isEncodingNeeded
|
||||
if (buffer.find("=?") != string::npos || buffer.find("?=") != string::npos)
|
||||
return true;
|
||||
|
||||
// If a language is specified, force encoding
|
||||
if (!lang.empty())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -78,9 +78,11 @@ public:
|
||||
* @param ctx generation context
|
||||
* @param buffer buffer to analyze
|
||||
* @param charset charset of the buffer
|
||||
* @param lang language code, in the format specified by RFC-1766
|
||||
* @return true if encoding is needed, false otherwise.
|
||||
*/
|
||||
static bool isEncodingNeeded(const generationContext& ctx, const string& buffer, const charset& charset);
|
||||
static bool isEncodingNeeded(const generationContext& ctx, const string& buffer,
|
||||
const charset& charset, const string& lang);
|
||||
|
||||
/** Guess the best RFC-2047 encoding to use for the specified buffer.
|
||||
*
|
||||
|
@ -81,6 +81,8 @@ VMIME_TEST_SUITE_BEGIN(parameterTest)
|
||||
#define PARAM_NAME(p, n) (p.getParameterAt(n)->getName())
|
||||
#define PARAM_CHARSET(p, n) \
|
||||
(p.getParameterAt(n)->getValue().getCharset().generate())
|
||||
#define PARAM_LANG(p, n) \
|
||||
(p.getParameterAt(n)->getValue().getLanguage())
|
||||
#define PARAM_BUFFER(p, n) \
|
||||
(p.getParameterAt(n)->getValue().getBuffer())
|
||||
|
||||
@ -235,6 +237,16 @@ VMIME_TEST_SUITE_BEGIN(parameterTest)
|
||||
VASSERT_EQ("5.2", "param1", PARAM_NAME(p5, 0));
|
||||
VASSERT_EQ("5.3", "us-ascii", PARAM_CHARSET(p5, 0));
|
||||
VASSERT_EQ("5.4", "value1", PARAM_BUFFER(p5, 0));
|
||||
|
||||
// Language specification
|
||||
parameterizedHeaderField p6;
|
||||
p6.parse("X; param1*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A");
|
||||
|
||||
VASSERT_EQ("6.1", 1, p6.getParameterCount());
|
||||
VASSERT_EQ("6.2", "param1", PARAM_NAME(p6, 0));
|
||||
VASSERT_EQ("6.3", "us-ascii", PARAM_CHARSET(p6, 0));
|
||||
VASSERT_EQ("6.4", "en-us", PARAM_LANG(p6, 0));
|
||||
VASSERT_EQ("6.5", "This is ***fun***", PARAM_BUFFER(p6, 0));
|
||||
}
|
||||
|
||||
void testGenerate()
|
||||
@ -370,6 +382,25 @@ VMIME_TEST_SUITE_BEGIN(parameterTest)
|
||||
|
||||
VASSERT_EQ("4.both", "F: X; param1=\"va lue\"",
|
||||
p4.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047));
|
||||
|
||||
// Language specification
|
||||
parameterizedHeaderField p5;
|
||||
p5.appendParameter(vmime::make_shared <vmime::parameter>("param1",
|
||||
vmime::word("This is ***fun***", vmime::charset("us-ascii"), "en-us")));
|
||||
|
||||
VASSERT_EQ("5.no-encoding", "F: X; param1=\"This is ***fun***\"",
|
||||
p5.generate(vmime::generationContext::PARAMETER_VALUE_NO_ENCODING));
|
||||
|
||||
VASSERT_EQ("5.rfc2047", "F: X; param1=\"=?us-ascii*en-us?Q?This_is_***fun***?=\"",
|
||||
p5.generate(vmime::generationContext::PARAMETER_VALUE_RFC2047_ONLY));
|
||||
|
||||
VASSERT_EQ("5.rfc2231", "F: X; param1*=us-ascii''This%20is%20***fun***",
|
||||
p5.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_ONLY));
|
||||
|
||||
VASSERT_EQ("5.both", "F: X; "
|
||||
"param1=\"=?us-ascii*en-us?Q?This_is_***fun***?=\";\r\n "
|
||||
"param1*=us-ascii''This%20is%20***fun***",
|
||||
p5.generate(vmime::generationContext::PARAMETER_VALUE_RFC2231_AND_RFC2047));
|
||||
}
|
||||
|
||||
void testNonStandardEncodedParam()
|
||||
|
@ -221,12 +221,21 @@ VMIME_TEST_SUITE_BEGIN(textTest)
|
||||
VASSERT_EQ("6", "[text: [[word: charset=iso-8859-1, buffer=Know wh\xe4t? It works!]]]",
|
||||
parseText("=?iso-8859-1?Q?Know_wh=E4t?_It_works!?="));
|
||||
|
||||
// TODO: add more
|
||||
// With language specifier
|
||||
VASSERT_EQ("7", "[text: [[word: charset=US-ASCII, buffer=Keith Moore, lang=EN]]]",
|
||||
parseText("=?US-ASCII*EN?Q?Keith_Moore?="));
|
||||
}
|
||||
|
||||
void testGenerate()
|
||||
{
|
||||
// TODO
|
||||
|
||||
// With language specifier
|
||||
vmime::word wlang1("Émeline", vmime::charset("UTF-8"), "FR");
|
||||
VASSERT_EQ("lang1", "=?UTF-8*FR?Q?=C3=89meline?=", wlang1.generate());
|
||||
|
||||
vmime::word wlang2("Keith Moore", vmime::charset("US-ASCII"), "EN");
|
||||
VASSERT_EQ("lang2", "=?US-ASCII*EN?Q?Keith_Moore?=", wlang2.generate());
|
||||
}
|
||||
|
||||
void testDisplayForm()
|
||||
|
@ -32,6 +32,7 @@ VMIME_TEST_SUITE_BEGIN(wordEncoderTest)
|
||||
VMIME_TEST(testGetNextChunk)
|
||||
VMIME_TEST(testGetNextChunk_integral)
|
||||
VMIME_TEST(testIsEncodingNeeded_ascii)
|
||||
VMIME_TEST(testIsEncodingNeeded_withLanguage)
|
||||
VMIME_TEST(testIsEncodingNeeded_specialChars)
|
||||
VMIME_TEST(testGuessBestEncoding_QP)
|
||||
VMIME_TEST(testGuessBestEncoding_B64)
|
||||
@ -70,25 +71,31 @@ VMIME_TEST_SUITE_BEGIN(wordEncoderTest)
|
||||
ctx.setInternationalizedEmailSupport(false);
|
||||
|
||||
VASSERT_FALSE("ascii", vmime::wordEncoder::isEncodingNeeded
|
||||
(ctx, "ASCII-only buffer", vmime::charset("utf-8")));
|
||||
(ctx, "ASCII-only buffer", vmime::charset("utf-8"), ""));
|
||||
|
||||
VASSERT_TRUE("non-ascii", vmime::wordEncoder::isEncodingNeeded
|
||||
(ctx, "Buffer with some UTF-8 '\xc3\xa0'", vmime::charset("utf-8")));
|
||||
(ctx, "Buffer with some UTF-8 '\xc3\xa0'", vmime::charset("utf-8"), ""));
|
||||
}
|
||||
|
||||
void testIsEncodingNeeded_withLanguage()
|
||||
{
|
||||
VASSERT_TRUE("ascii", vmime::wordEncoder::isEncodingNeeded
|
||||
(vmime::generationContext::getDefaultContext(), "ASCII-only buffer", vmime::charset("utf-8"), "en"));
|
||||
}
|
||||
|
||||
void testIsEncodingNeeded_specialChars()
|
||||
{
|
||||
VASSERT_TRUE("rfc2047", vmime::wordEncoder::isEncodingNeeded
|
||||
(vmime::generationContext::getDefaultContext(),
|
||||
"foo bar =? foo bar", vmime::charset("us-ascii")));
|
||||
"foo bar =? foo bar", vmime::charset("us-ascii"), ""));
|
||||
|
||||
VASSERT_TRUE("new line 1", vmime::wordEncoder::isEncodingNeeded
|
||||
(vmime::generationContext::getDefaultContext(),
|
||||
"foo bar \n foo bar", vmime::charset("us-ascii")));
|
||||
"foo bar \n foo bar", vmime::charset("us-ascii"), ""));
|
||||
|
||||
VASSERT_TRUE("new line 2", vmime::wordEncoder::isEncodingNeeded
|
||||
(vmime::generationContext::getDefaultContext(),
|
||||
"foo bar \r foo bar", vmime::charset("us-ascii")));
|
||||
"foo bar \r foo bar", vmime::charset("us-ascii"), ""));
|
||||
}
|
||||
|
||||
void testGuessBestEncoding_QP()
|
||||
|
@ -119,7 +119,14 @@ inline std::ostream& operator<<(std::ostream& os, const vmime::charset& ch)
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const vmime::word& w)
|
||||
{
|
||||
os << "[word: charset=" << w.getCharset().getName() << ", buffer=" << w.getBuffer() << "]";
|
||||
os << "[word: charset=" << w.getCharset().getName()
|
||||
<< ", buffer=" << w.getBuffer();
|
||||
|
||||
if (!w.getLanguage().empty())
|
||||
os << ", lang=" << w.getLanguage();
|
||||
|
||||
os << "]";
|
||||
|
||||
return (os);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user