Fixed parsing of UTF8 email addresses (RFC-2047 local part + IDNA domain name).
This commit is contained in:
parent
3848556372
commit
e88b8eeac2
@ -35,6 +35,98 @@ namespace vmime
|
||||
{
|
||||
|
||||
|
||||
/** Decode an IDNA-encoded domain name ("xn--5rtw95l.xn--wgv71a")
|
||||
* to a fully decoded domain name in UTF-8 ("黒川.日本").
|
||||
*
|
||||
* @param idnaDomain domain name encoded with IDNA
|
||||
* @return decoded domain name in UTF-8
|
||||
*/
|
||||
static const string domainNameFromIDNA(const string& idnaDomain)
|
||||
{
|
||||
std::ostringstream domainName;
|
||||
size_t p = 0;
|
||||
|
||||
for (size_t n = idnaDomain.find('.', p) ;
|
||||
(n = idnaDomain.find('.', p)) != string::npos ; p = n + 1)
|
||||
{
|
||||
const string encodedPart(idnaDomain.begin() + p, idnaDomain.begin() + n);
|
||||
|
||||
if (encodedPart.length() > 4 &&
|
||||
encodedPart[0] == 'x' && encodedPart[1] == 'n' &&
|
||||
encodedPart[2] == '-' && encodedPart[3] == '-')
|
||||
{
|
||||
string decodedPart;
|
||||
charset::convert(encodedPart, decodedPart,
|
||||
vmime::charsets::IDNA, vmime::charsets::UTF_8);
|
||||
|
||||
domainName << decodedPart << '.';
|
||||
}
|
||||
else
|
||||
{
|
||||
domainName << encodedPart << '.'; // not encoded
|
||||
}
|
||||
}
|
||||
|
||||
if (p < idnaDomain.length())
|
||||
{
|
||||
const string encodedPart(idnaDomain.begin() + p, idnaDomain.end());
|
||||
|
||||
if (encodedPart.length() > 4 &&
|
||||
encodedPart[0] == 'x' && encodedPart[1] == 'n' &&
|
||||
encodedPart[2] == '-' && encodedPart[3] == '-')
|
||||
{
|
||||
string decodedPart;
|
||||
charset::convert(encodedPart, decodedPart,
|
||||
vmime::charsets::IDNA, vmime::charsets::UTF_8);
|
||||
|
||||
domainName << decodedPart;
|
||||
}
|
||||
else
|
||||
{
|
||||
domainName << encodedPart; // not encoded
|
||||
}
|
||||
}
|
||||
|
||||
return domainName.str();
|
||||
}
|
||||
|
||||
|
||||
/** Encode an UTF-8 domain name ("黒川.日本") to an IDNA-encoded
|
||||
* domain name ("xn--5rtw95l.xn--wgv71a").
|
||||
*
|
||||
* @param domainName domain name in UTF-8
|
||||
* @return domain name encoded with IDNA
|
||||
*/
|
||||
static const string domainNameToIDNA(const string& domainName)
|
||||
{
|
||||
std::ostringstream idnaDomain;
|
||||
size_t p = 0;
|
||||
|
||||
for (size_t n = domainName.find('.', p) ;
|
||||
(n = domainName.find('.', p)) != string::npos ; p = n + 1)
|
||||
{
|
||||
string idnaPart;
|
||||
charset::convert(string(domainName.begin() + p, domainName.begin() + n),
|
||||
idnaPart, vmime::charsets::UTF_8, vmime::charsets::IDNA);
|
||||
|
||||
idnaDomain << idnaPart << '.';
|
||||
}
|
||||
|
||||
if (p < domainName.length())
|
||||
{
|
||||
string idnaPart;
|
||||
charset::convert(string(domainName.begin() + p, domainName.end()),
|
||||
idnaPart, vmime::charsets::UTF_8, vmime::charsets::IDNA);
|
||||
|
||||
idnaDomain << idnaPart;
|
||||
}
|
||||
|
||||
return idnaDomain.str();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
emailAddress::emailAddress()
|
||||
{
|
||||
}
|
||||
@ -86,6 +178,10 @@ void emailAddress::parseImpl
|
||||
State_LocalPartMiddle,
|
||||
State_LocalPartComment,
|
||||
State_LocalPartQuoted,
|
||||
State_LocalPartRFC2047Start,
|
||||
State_LocalPartRFC2047Middle,
|
||||
State_LocalPartRFC2047MiddleQM,
|
||||
State_LocalPartRFC2047End,
|
||||
State_DomainPartStart,
|
||||
State_DomainPartMiddle,
|
||||
State_DomainPartComment,
|
||||
@ -101,6 +197,7 @@ void emailAddress::parseImpl
|
||||
bool atFound = false;
|
||||
bool stop = false;
|
||||
int commentLevel = 0;
|
||||
bool localPartIsRFC2047 = false;
|
||||
|
||||
while (p < pend && !stop)
|
||||
{
|
||||
@ -128,6 +225,11 @@ void emailAddress::parseImpl
|
||||
state = State_LocalPartQuoted;
|
||||
++p;
|
||||
}
|
||||
else if (c == '=')
|
||||
{
|
||||
state = State_LocalPartRFC2047Start;
|
||||
++p;
|
||||
}
|
||||
else if (c == '(')
|
||||
{
|
||||
state = State_LocalPartComment;
|
||||
@ -214,6 +316,25 @@ void emailAddress::parseImpl
|
||||
|
||||
break;
|
||||
|
||||
case State_LocalPartRFC2047Start:
|
||||
|
||||
if (c == '?')
|
||||
{
|
||||
state = State_LocalPartRFC2047Middle;
|
||||
localPart << "=?";
|
||||
localPartIsRFC2047 = true;
|
||||
++p;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = State_LocalPartMiddle;
|
||||
localPart << '=';
|
||||
localPart << c;
|
||||
++p;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case State_LocalPartMiddle:
|
||||
|
||||
if (c == '.')
|
||||
@ -256,6 +377,55 @@ void emailAddress::parseImpl
|
||||
|
||||
break;
|
||||
|
||||
case State_LocalPartRFC2047Middle:
|
||||
|
||||
if (c == '?')
|
||||
{
|
||||
state = State_LocalPartRFC2047MiddleQM;
|
||||
++p;
|
||||
}
|
||||
else
|
||||
{
|
||||
localPart << c;
|
||||
++p;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case State_LocalPartRFC2047MiddleQM:
|
||||
|
||||
if (c == '=')
|
||||
{
|
||||
// End of RFC-2047 encoded word
|
||||
state = State_LocalPartRFC2047End;
|
||||
localPart << "?=";
|
||||
++p;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = State_LocalPartRFC2047Middle;
|
||||
localPart << '?';
|
||||
localPart << c;
|
||||
++p;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case State_LocalPartRFC2047End:
|
||||
|
||||
if (c == '@')
|
||||
{
|
||||
atFound = true;
|
||||
state = State_DomainPartStart;
|
||||
++p;
|
||||
}
|
||||
else
|
||||
{
|
||||
state = State_End;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case State_DomainPartStart:
|
||||
|
||||
if (c == '(')
|
||||
@ -357,8 +527,12 @@ void emailAddress::parseImpl
|
||||
if (domainPart.str().empty() && !atFound)
|
||||
domainPart << platform::getHandler()->getHostName();
|
||||
|
||||
m_localName = word(localPart.str(), vmime::charsets::UTF_8);
|
||||
m_domainName = word(domainPart.str(), vmime::charsets::UTF_8);
|
||||
if (localPartIsRFC2047)
|
||||
m_localName.parse(localPart.str());
|
||||
else
|
||||
m_localName = word(localPart.str(), vmime::charsets::UTF_8);
|
||||
|
||||
m_domainName = word(domainNameFromIDNA(domainPart.str()), vmime::charsets::UTF_8);
|
||||
}
|
||||
|
||||
setParsedBounds(position, p - pend);
|
||||
@ -368,34 +542,6 @@ void emailAddress::parseImpl
|
||||
}
|
||||
|
||||
|
||||
static const string domainNameToIDNA(const string& domainName)
|
||||
{
|
||||
std::ostringstream idnaDomain;
|
||||
size_t p = 0;
|
||||
|
||||
for (size_t n = domainName.find('.', p) ;
|
||||
(n = domainName.find('.', p)) != string::npos ; p = n + 1)
|
||||
{
|
||||
string idnaPart;
|
||||
charset::convert(string(domainName.begin() + p, domainName.begin() + n),
|
||||
idnaPart, vmime::charsets::UTF_8, vmime::charsets::IDNA);
|
||||
|
||||
idnaDomain << idnaPart << '.';
|
||||
}
|
||||
|
||||
if (p < domainName.length())
|
||||
{
|
||||
string idnaPart;
|
||||
charset::convert(string(domainName.begin() + p, domainName.end()),
|
||||
idnaPart, vmime::charsets::UTF_8, vmime::charsets::IDNA);
|
||||
|
||||
idnaDomain << idnaPart;
|
||||
}
|
||||
|
||||
return idnaDomain.str();
|
||||
}
|
||||
|
||||
|
||||
void emailAddress::generateImpl
|
||||
(const generationContext& ctx, utility::outputStream& os,
|
||||
const size_t curLinePos, size_t* newLinePos) const
|
||||
|
@ -40,6 +40,7 @@ VMIME_TEST_SUITE_BEGIN(emailAddressTest)
|
||||
VMIME_TEST(testParseSpecialChars)
|
||||
VMIME_TEST(testParseCommentInLocalPart)
|
||||
VMIME_TEST(testParseCommentInDomainPart)
|
||||
VMIME_TEST(testParseRFC2047EncodedLocalPart)
|
||||
VMIME_TEST(testGenerateSpecialChars)
|
||||
VMIME_TEST_LIST_END
|
||||
|
||||
@ -198,6 +199,21 @@ VMIME_TEST_SUITE_BEGIN(emailAddressTest)
|
||||
VASSERT_EQ("4/domain", "example.com", eml4.getDomainName());
|
||||
}
|
||||
|
||||
void testParseRFC2047EncodedLocalPart()
|
||||
{
|
||||
vmime::emailAddress eml1("=?utf-8?Q?Pel=C3=A9?=@example.com");
|
||||
VASSERT_EQ("1/local", "Pelé", eml1.getLocalName());
|
||||
VASSERT_EQ("1/domain", "example.com", eml1.getDomainName());
|
||||
|
||||
vmime::emailAddress eml2("=?utf-8?B?55Sy5paQ?=@xn--5rtw95l.xn--wgv71a");
|
||||
VASSERT_EQ("2/local", "甲斐", eml2.getLocalName());
|
||||
VASSERT_EQ("2/domain", "黒川.日本", eml2.getDomainName());
|
||||
|
||||
vmime::emailAddress eml3("=?utf-8?B?55Sy5paQ?=@xn--5rtw95l.com");
|
||||
VASSERT_EQ("3/local", "甲斐", eml3.getLocalName());
|
||||
VASSERT_EQ("3/domain", "黒川.com", eml3.getDomainName());
|
||||
}
|
||||
|
||||
void testGenerateASCII()
|
||||
{
|
||||
VASSERT_EQ("email 1", "local@domain", vmime::emailAddress("local", "domain").generate());
|
||||
|
Loading…
Reference in New Issue
Block a user