Fixed bug in modified UTF-7 encoding.

This commit is contained in:
Vincent Richard 2007-07-09 22:05:54 +00:00
parent 5e3a99710b
commit e984682e26
2 changed files with 101 additions and 48 deletions

View File

@ -2,6 +2,10 @@
VERSION 0.8.2cvs
================
2007-07-09 Vincent Richard <vincent@vincent-richard.net>
* IMAPUtils.cpp: fixed bug in modified UTF-7 encoding (IMAP).
2007-05-22 Vincent Richard <vincent@vincent-richard.net>
* Implemented thread-safe reference counting for smart pointers,

View File

@ -90,8 +90,7 @@ const string IMAPUtils::quoteString(const string& text)
quoted += '"';
for (string::const_iterator it = text.begin() ;
!needQuoting && it != text.end() ; ++it)
for (string::const_iterator it = text.begin() ; it != text.end() ; ++it)
{
const unsigned char c = *it;
@ -168,74 +167,124 @@ const string IMAPUtils::toModifiedUTF7
hsUTF7[1] = base64alphabet[(hs & 0xF0) >> 4];
hsUTF7[2] = base64alphabet[(hs & 0x0F) << 2];
// Transcode path component to UTF-7 charset.
// iconv() is buggy with UTF-8 to UTF-7 conversion, so we do it "by hand".
// This code is largely inspired from "imap/utf7.c", in mutt 1.4.
// Copyright (C) 2000 Edmund Grimley Evans <edmundo@rano.org>
// WARNING: This may throw "exceptions::charset_conv_error"
const string cvt = text.getConvertedText(charset(charsets::UTF_7));
const string cvt = text.getConvertedText(charset(charsets::UTF_8));
// Transcode to modified UTF-7 (RFC-2060).
// In the worst case we convert 2 chars to 7 chars.
// For example: "\x10&\x10&..." -> "&ABA-&-&ABA-&-...".
string out;
out.reserve((cvt.length() * 3) / 2);
out.reserve((cvt.length() / 2) * 7 + 6);
bool inB64sequence = false;
int b = 0, k = 0;
bool base64 = false;
for (string::const_iterator it = cvt.begin() ; it != cvt.end() ; ++it)
string::size_type remaining = cvt.length();
for (string::size_type i = 0, len = cvt.length() ; i < len ; )
{
const unsigned char c = *it;
const unsigned char c = cvt[i];
// Replace hierarchy separator with an equivalent UTF-7 Base64 sequence
if (!inB64sequence && c == hierarchySeparator)
if (!base64 && c == hierarchySeparator)
{
out += "&" + hsUTF7 + "-";
++i;
--remaining;
continue;
}
switch (c)
string::size_type n = 0;
int ch = 0;
if (c < 0x80)
ch = c, n = 0;
else if (c < 0xc2)
return "";
else if (c < 0xe0)
ch = c & 0x1f, n = 1;
else if (c < 0xf0)
ch = c & 0x0f, n = 2;
else if (c < 0xf8)
ch = c & 0x07, n = 3;
else if (c < 0xfc)
ch = c & 0x03, n = 4;
else if (c < 0xfe)
ch = c & 0x01, n = 5;
else
return "";
if (n > remaining)
return ""; // error
++i;
--remaining;
for (string::size_type j = 0 ; j < n ; j++)
{
// Beginning of Base64 sequence: replace '+' with '&'
case '+':
if ((cvt[i + j] & 0xc0) != 0x80)
return ""; // error
ch = (ch << 6) | (cvt[i + j] & 0x3f);
}
if (n > 1 && !(ch >> (n * 5 + 1)))
return ""; // error
i += n;
remaining -= n;
if (ch < 0x20 || ch >= 0x7f)
{
if (!inB64sequence)
if (!base64)
{
inB64sequence = true;
out += '&';
}
else
{
out += '+';
base64 = true;
b = 0;
k = 10;
}
break;
}
// End of Base64 sequence
case '-':
{
inB64sequence = false;
out += '-';
break;
}
// ',' is used instead of '/' in modified Base64
case '/':
{
out += inB64sequence ? ',' : '/';
break;
}
// '&' (0x26) is represented by the two-octet sequence "&-"
case '&':
{
if (!inB64sequence)
out += "&-";
else
out += '&';
if (ch & ~0xffff)
ch = 0xfffe;
break;
}
default:
{
out += c;
break;
}
out += base64alphabet[b | ch >> k];
k -= 6;
for ( ; k >= 0 ; k -= 6)
out += base64alphabet[(ch >> k) & 0x3f];
b = (ch << (-k)) & 0x3f;
k += 16;
}
else
{
if (base64)
{
if (k > 10)
out += base64alphabet[b];
out += '-';
base64 = false;
}
out += ch;
if (ch == '&')
out += '-';
}
}
if (base64)
{
if (k > 10)
out += base64alphabet[b];
out += '-';
}
return (out);