vmime/src/charsetConverter.cpp

413 lines
9.8 KiB
C++

//
// VMime library (http://www.vmime.org)
// Copyright (C) 2002-2009 Vincent Richard <vincent@vincent-richard.net>
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 3 of
// the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Linking this library statically or dynamically with other modules is making
// a combined work based on this library. Thus, the terms and conditions of
// the GNU General Public License cover the whole combination.
//
#include "vmime/charsetConverter.hpp"
#include "vmime/exception.hpp"
extern "C"
{
#ifndef VMIME_BUILDING_DOC
#include <iconv.h>
#include <errno.h>
// HACK: prototypes may differ depending on the compiler and/or system (the
// second parameter may or may not be 'const'). This relies on the compiler
// for choosing the right type.
class ICONV_HACK
{
public:
ICONV_HACK(const char** ptr) : m_ptr(ptr) { }
operator const char**() { return m_ptr; }
operator char**() { return const_cast <char**>(m_ptr); }
private:
const char** m_ptr;
};
#endif // VMIME_BUILDING_DOC
}
// Output replacement char when an invalid sequence is encountered
template <typename OUTPUT_CLASS, typename ICONV_DESC>
void outputInvalidChar(OUTPUT_CLASS& out, ICONV_DESC cd)
{
const char* invalidCharIn = "?";
size_t invalidCharInLen = 1;
char invalidCharOutBuffer[16];
char* invalidCharOutPtr = invalidCharOutBuffer;
size_t invalidCharOutLen = 16;
if (iconv(cd, ICONV_HACK(&invalidCharIn), &invalidCharInLen,
&invalidCharOutPtr, &invalidCharOutLen) != static_cast <size_t>(-1))
{
out.write(invalidCharOutBuffer, 16 - invalidCharOutLen);
}
}
namespace vmime
{
charsetConverter::charsetConverter(const charset& source, const charset& dest)
: m_desc(NULL), m_source(source), m_dest(dest)
{
// Get an iconv descriptor
const iconv_t cd = iconv_open(dest.getName().c_str(), source.getName().c_str());
if (cd != reinterpret_cast <iconv_t>(-1))
{
iconv_t* p = new iconv_t;
*p= cd;
m_desc = p;
}
}
charsetConverter::~charsetConverter()
{
if (m_desc != NULL)
{
// Close iconv handle
iconv_close(*static_cast <iconv_t*>(m_desc));
delete static_cast <iconv_t*>(m_desc);
m_desc = NULL;
}
}
void charsetConverter::convert(utility::inputStream& in, utility::outputStream& out)
{
if (m_desc == NULL)
throw exceptions::charset_conv_error("Cannot initialize converter.");
const iconv_t cd = *static_cast <iconv_t*>(m_desc);
char inBuffer[32768];
char outBuffer[32768];
size_t inPos = 0;
bool prevIsInvalid = false;
while (true)
{
// Fullfill the buffer
size_t inLength = static_cast <size_t>(in.read(inBuffer + inPos, sizeof(inBuffer) - inPos) + inPos);
size_t outLength = sizeof(outBuffer);
const char* inPtr = inBuffer;
char* outPtr = outBuffer;
// Convert input bytes
if (iconv(cd, ICONV_HACK(&inPtr), &inLength,
&outPtr, &outLength) == static_cast <size_t>(-1))
{
// Illegal input sequence or input sequence has no equivalent
// sequence in the destination charset.
if (prevIsInvalid)
{
// Write successfully converted bytes
out.write(outBuffer, sizeof(outBuffer) - outLength);
// Output a special character to indicate we don't known how to
// convert the sequence at this position
outputInvalidChar(out, cd);
// Skip a byte and leave unconverted bytes in the input buffer
std::copy(const_cast <char*>(inPtr + 1), inBuffer + sizeof(inBuffer), inBuffer);
inPos = inLength - 1;
}
else
{
// Write successfully converted bytes
out.write(outBuffer, sizeof(outBuffer) - outLength);
// Leave unconverted bytes in the input buffer
std::copy(const_cast <char*>(inPtr), inBuffer + sizeof(inBuffer), inBuffer);
inPos = inLength;
if (errno != E2BIG)
prevIsInvalid = true;
}
}
else
{
// Write successfully converted bytes
out.write(outBuffer, sizeof(outBuffer) - outLength);
inPos = 0;
prevIsInvalid = false;
}
// Check for end of data
if (in.eof() && inPos == 0)
break;
}
}
void charsetConverter::convert(const string& in, string& out)
{
out.clear();
utility::inputStreamStringAdapter is(in);
utility::outputStreamStringAdapter os(out);
convert(is, os);
os.flush();
}
// charsetFilteredOutputStream
namespace utility {
charsetFilteredOutputStream::charsetFilteredOutputStream
(const charset& source, const charset& dest, outputStream& os)
: m_desc(NULL), m_sourceCharset(source), m_destCharset(dest),
m_stream(os), m_unconvCount(0)
{
// Get an iconv descriptor
const iconv_t cd = iconv_open(dest.getName().c_str(), source.getName().c_str());
if (cd != reinterpret_cast <iconv_t>(-1))
{
iconv_t* p = new iconv_t;
*p= cd;
m_desc = p;
}
}
charsetFilteredOutputStream::~charsetFilteredOutputStream()
{
if (m_desc != NULL)
{
// Close iconv handle
iconv_close(*static_cast <iconv_t*>(m_desc));
delete static_cast <iconv_t*>(m_desc);
m_desc = NULL;
}
}
outputStream& charsetFilteredOutputStream::getNextOutputStream()
{
return m_stream;
}
void charsetFilteredOutputStream::write
(const value_type* const data, const size_type count)
{
if (m_desc == NULL)
throw exceptions::charset_conv_error("Cannot initialize converter.");
const iconv_t cd = *static_cast <iconv_t*>(m_desc);
const value_type* curData = data;
size_type curDataLen = count;
// If there is some unconverted bytes left, add more data from this
// chunk to see if it can now be converted.
while (m_unconvCount != 0 || curDataLen != 0)
{
if (m_unconvCount != 0)
{
// Check if an incomplete input sequence is larger than the
// input buffer size: should not happen except if something
// in the input sequence is invalid. If so, output a special
// character and skip one byte in the invalid sequence.
if (m_unconvCount >= sizeof(m_unconvBuffer))
{
outputInvalidChar(m_stream, cd);
std::copy(m_unconvBuffer + 1,
m_unconvBuffer + m_unconvCount, m_unconvBuffer);
m_unconvCount--;
}
// Get more data
const size_type remaining =
std::min(curDataLen, sizeof(m_unconvBuffer) - m_unconvCount);
std::copy(curData, curData + remaining, m_unconvBuffer + m_unconvCount);
m_unconvCount += remaining;
curDataLen -= remaining;
curData += remaining;
if (remaining == 0)
return; // no more data
// Try a conversion
const char* inPtr = m_unconvBuffer;
size_t inLength = m_unconvCount;
char* outPtr = m_outputBuffer;
size_t outLength = sizeof(m_outputBuffer);
const size_t inLength0 = inLength;
if (iconv(cd, ICONV_HACK(&inPtr), &inLength, &outPtr, &outLength) == static_cast <size_t>(-1))
{
const size_t inputConverted = inLength0 - inLength;
// Write successfully converted bytes
m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength);
// Shift unconverted bytes
std::copy(m_unconvBuffer + inputConverted,
m_unconvBuffer + m_unconvCount, m_unconvBuffer);
m_unconvCount -= inputConverted;
continue;
}
// Write successfully converted bytes
m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength);
// Empty the unconverted buffer
m_unconvCount = 0;
}
if (curDataLen == 0)
return; // no more data
// Now, convert the current data buffer
const char* inPtr = curData;
size_t inLength = std::min(curDataLen, sizeof(m_outputBuffer) / MAX_CHARACTER_WIDTH);
char* outPtr = m_outputBuffer;
size_t outLength = sizeof(m_outputBuffer);
const size_t inLength0 = inLength;
if (iconv(cd, ICONV_HACK(&inPtr), &inLength, &outPtr, &outLength) == static_cast <size_t>(-1))
{
// Write successfully converted bytes
m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength);
const size_t inputConverted = inLength0 - inLength;
curData += inputConverted;
curDataLen -= inputConverted;
// Put one byte byte into the unconverted buffer so
// that the next iteration fill it
if (curDataLen != 0)
{
m_unconvCount = 1;
m_unconvBuffer[0] = *curData;
curData++;
curDataLen--;
}
}
else
{
// Write successfully converted bytes
m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength);
curData += inLength0;
curDataLen -= inLength0;
}
}
}
void charsetFilteredOutputStream::flush()
{
if (m_desc == NULL)
throw exceptions::charset_conv_error("Cannot initialize converter.");
const iconv_t cd = *static_cast <iconv_t*>(m_desc);
size_t offset = 0;
// Process unconverted bytes
while (m_unconvCount != 0)
{
// Try a conversion
const char* inPtr = m_unconvBuffer + offset;
size_t inLength = m_unconvCount;
char* outPtr = m_outputBuffer;
size_t outLength = sizeof(m_outputBuffer);
const size_t inLength0 = inLength;
if (iconv(cd, ICONV_HACK(&inPtr), &inLength, &outPtr, &outLength) == static_cast <size_t>(-1))
{
const size_t inputConverted = inLength0 - inLength;
// Skip a "blocking" character
if (inputConverted == 0)
{
outputInvalidChar(m_stream, cd);
offset++;
m_unconvCount--;
}
else
{
// Write successfully converted bytes
m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength);
offset += inputConverted;
m_unconvCount -= inputConverted;
}
}
else
{
// Write successfully converted bytes
m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength);
m_unconvCount = 0;
}
}
m_stream.flush();
}
} // utility
} // vmime