Fixed bug in modified UTF-7 encoding.

This commit is contained in:
Vincent Richard 2007-07-09 22:05:54 +00:00
parent 5e3a99710b
commit e984682e26
2 changed files with 101 additions and 48 deletions

View File

@ -2,6 +2,10 @@
VERSION 0.8.2cvs VERSION 0.8.2cvs
================ ================
2007-07-09 Vincent Richard <vincent@vincent-richard.net>
* IMAPUtils.cpp: fixed bug in modified UTF-7 encoding (IMAP).
2007-05-22 Vincent Richard <vincent@vincent-richard.net> 2007-05-22 Vincent Richard <vincent@vincent-richard.net>
* Implemented thread-safe reference counting for smart pointers, * Implemented thread-safe reference counting for smart pointers,

View File

@ -90,8 +90,7 @@ const string IMAPUtils::quoteString(const string& text)
quoted += '"'; quoted += '"';
for (string::const_iterator it = text.begin() ; for (string::const_iterator it = text.begin() ; it != text.end() ; ++it)
!needQuoting && it != text.end() ; ++it)
{ {
const unsigned char c = *it; const unsigned char c = *it;
@ -168,76 +167,126 @@ const string IMAPUtils::toModifiedUTF7
hsUTF7[1] = base64alphabet[(hs & 0xF0) >> 4]; hsUTF7[1] = base64alphabet[(hs & 0xF0) >> 4];
hsUTF7[2] = base64alphabet[(hs & 0x0F) << 2]; hsUTF7[2] = base64alphabet[(hs & 0x0F) << 2];
// Transcode path component to UTF-7 charset. // iconv() is buggy with UTF-8 to UTF-7 conversion, so we do it "by hand".
// This code is largely inspired from "imap/utf7.c", in mutt 1.4.
// Copyright (C) 2000 Edmund Grimley Evans <edmundo@rano.org>
// WARNING: This may throw "exceptions::charset_conv_error" // WARNING: This may throw "exceptions::charset_conv_error"
const string cvt = text.getConvertedText(charset(charsets::UTF_7)); const string cvt = text.getConvertedText(charset(charsets::UTF_8));
// Transcode to modified UTF-7 (RFC-2060). // In the worst case we convert 2 chars to 7 chars.
// For example: "\x10&\x10&..." -> "&ABA-&-&ABA-&-...".
string out; string out;
out.reserve((cvt.length() * 3) / 2); out.reserve((cvt.length() / 2) * 7 + 6);
bool inB64sequence = false; int b = 0, k = 0;
bool base64 = false;
for (string::const_iterator it = cvt.begin() ; it != cvt.end() ; ++it) string::size_type remaining = cvt.length();
for (string::size_type i = 0, len = cvt.length() ; i < len ; )
{ {
const unsigned char c = *it; const unsigned char c = cvt[i];
// Replace hierarchy separator with an equivalent UTF-7 Base64 sequence // Replace hierarchy separator with an equivalent UTF-7 Base64 sequence
if (!inB64sequence && c == hierarchySeparator) if (!base64 && c == hierarchySeparator)
{ {
out += "&" + hsUTF7 + "-"; out += "&" + hsUTF7 + "-";
++i;
--remaining;
continue; continue;
} }
switch (c) string::size_type n = 0;
int ch = 0;
if (c < 0x80)
ch = c, n = 0;
else if (c < 0xc2)
return "";
else if (c < 0xe0)
ch = c & 0x1f, n = 1;
else if (c < 0xf0)
ch = c & 0x0f, n = 2;
else if (c < 0xf8)
ch = c & 0x07, n = 3;
else if (c < 0xfc)
ch = c & 0x03, n = 4;
else if (c < 0xfe)
ch = c & 0x01, n = 5;
else
return "";
if (n > remaining)
return ""; // error
++i;
--remaining;
for (string::size_type j = 0 ; j < n ; j++)
{ {
// Beginning of Base64 sequence: replace '+' with '&' if ((cvt[i + j] & 0xc0) != 0x80)
case '+': return ""; // error
ch = (ch << 6) | (cvt[i + j] & 0x3f);
}
if (n > 1 && !(ch >> (n * 5 + 1)))
return ""; // error
i += n;
remaining -= n;
if (ch < 0x20 || ch >= 0x7f)
{ {
if (!inB64sequence) if (!base64)
{ {
inB64sequence = true;
out += '&'; out += '&';
base64 = true;
b = 0;
k = 10;
}
if (ch & ~0xffff)
ch = 0xfffe;
out += base64alphabet[b | ch >> k];
k -= 6;
for ( ; k >= 0 ; k -= 6)
out += base64alphabet[(ch >> k) & 0x3f];
b = (ch << (-k)) & 0x3f;
k += 16;
} }
else else
{ {
out += '+'; if (base64)
}
break;
}
// End of Base64 sequence
case '-':
{ {
inB64sequence = false; if (k > 10)
out += base64alphabet[b];
out += '-'; out += '-';
break; base64 = false;
}
// ',' is used instead of '/' in modified Base64
case '/':
{
out += inB64sequence ? ',' : '/';
break;
}
// '&' (0x26) is represented by the two-octet sequence "&-"
case '&':
{
if (!inB64sequence)
out += "&-";
else
out += '&';
break;
}
default:
{
out += c;
break;
} }
out += ch;
if (ch == '&')
out += '-';
} }
} }
if (base64)
{
if (k > 10)
out += base64alphabet[b];
out += '-';
}
return (out); return (out);
} }