aboutsummaryrefslogtreecommitdiffstats
path: root/src/charsetConverter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/charsetConverter.cpp')
-rw-r--r--src/charsetConverter.cpp393
1 files changed, 10 insertions, 383 deletions
diff --git a/src/charsetConverter.cpp b/src/charsetConverter.cpp
index a33f4f84..c2041476 100644
--- a/src/charsetConverter.cpp
+++ b/src/charsetConverter.cpp
@@ -22,398 +22,25 @@
//
#include "vmime/charsetConverter.hpp"
-#include "vmime/exception.hpp"
-#include "vmime/utility/inputStreamStringAdapter.hpp"
-#include "vmime/utility/outputStreamStringAdapter.hpp"
-
-
-extern "C"
-{
-#ifndef VMIME_BUILDING_DOC
-
- #include <iconv.h>
- #include <errno.h>
-
- // HACK: prototypes may differ depending on the compiler and/or system (the
- // second parameter may or may not be 'const'). This relies on the compiler
- // for choosing the right type.
- class ICONV_HACK
- {
- public:
-
- ICONV_HACK(const char** ptr) : m_ptr(ptr) { }
-
- operator const char**() { return m_ptr; }
- operator char**() { return const_cast <char**>(m_ptr); }
-
- private:
-
- const char** m_ptr;
- };
-
-#endif // VMIME_BUILDING_DOC
-}
-
-
-
-// Output replacement char when an invalid sequence is encountered
-template <typename OUTPUT_CLASS, typename ICONV_DESC>
-void outputInvalidChar(OUTPUT_CLASS& out, ICONV_DESC cd)
-{
- const char* invalidCharIn = "?";
- size_t invalidCharInLen = 1;
-
- char invalidCharOutBuffer[16];
- char* invalidCharOutPtr = invalidCharOutBuffer;
- size_t invalidCharOutLen = 16;
-
- if (iconv(cd, ICONV_HACK(&invalidCharIn), &invalidCharInLen,
- &invalidCharOutPtr, &invalidCharOutLen) != static_cast <size_t>(-1))
- {
- out.write(invalidCharOutBuffer, 16 - invalidCharOutLen);
- }
-}
+#include "vmime/charsetConverter_iconv.hpp"
+#include "vmime/charsetConverter_idna.hpp"
namespace vmime
{
-charsetConverter::charsetConverter(const charset& source, const charset& dest)
- : m_desc(NULL), m_source(source), m_dest(dest)
-{
- // Get an iconv descriptor
- const iconv_t cd = iconv_open(dest.getName().c_str(), source.getName().c_str());
-
- if (cd != reinterpret_cast <iconv_t>(-1))
- {
- iconv_t* p = new iconv_t;
- *p= cd;
-
- m_desc = p;
- }
-}
-
-
-charsetConverter::~charsetConverter()
-{
- if (m_desc != NULL)
- {
- // Close iconv handle
- iconv_close(*static_cast <iconv_t*>(m_desc));
-
- delete static_cast <iconv_t*>(m_desc);
- m_desc = NULL;
- }
-}
-
-
-void charsetConverter::convert(utility::inputStream& in, utility::outputStream& out)
-{
- if (m_desc == NULL)
- throw exceptions::charset_conv_error("Cannot initialize converter.");
-
- const iconv_t cd = *static_cast <iconv_t*>(m_desc);
-
- char inBuffer[32768];
- char outBuffer[32768];
- size_t inPos = 0;
-
- bool prevIsInvalid = false;
- bool breakAfterNext = false;
-
- while (true)
- {
- // Fullfill the buffer
- size_t inLength = static_cast <size_t>(in.read(inBuffer + inPos, sizeof(inBuffer) - inPos) + inPos);
- size_t outLength = sizeof(outBuffer);
-
- const char* inPtr = breakAfterNext ? NULL : inBuffer;
- size_t *ptrLength = breakAfterNext ? NULL : &inLength;
- char* outPtr = outBuffer;
-
- // Convert input bytes
- if (iconv(cd, ICONV_HACK(&inPtr), ptrLength,
- &outPtr, &outLength) == static_cast <size_t>(-1))
- {
- // Illegal input sequence or input sequence has no equivalent
- // sequence in the destination charset.
- if (prevIsInvalid)
- {
- // Write successfully converted bytes
- out.write(outBuffer, sizeof(outBuffer) - outLength);
-
- // Output a special character to indicate we don't known how to
- // convert the sequence at this position
- outputInvalidChar(out, cd);
-
- // Skip a byte and leave unconverted bytes in the input buffer
- std::copy(const_cast <char*>(inPtr + 1), inBuffer + sizeof(inBuffer), inBuffer);
- inPos = inLength - 1;
- }
- else
- {
- // Write successfully converted bytes
- out.write(outBuffer, sizeof(outBuffer) - outLength);
-
- // Leave unconverted bytes in the input buffer
- std::copy(const_cast <char*>(inPtr), inBuffer + sizeof(inBuffer), inBuffer);
- inPos = inLength;
-
- if (errno != E2BIG)
- prevIsInvalid = true;
- }
- }
- else
- {
- // Write successfully converted bytes
- out.write(outBuffer, sizeof(outBuffer) - outLength);
-
- inPos = 0;
- prevIsInvalid = false;
- }
-
- if (breakAfterNext)
- break;
-
- // Check for end of data, loop again to flush stateful data from iconv
- if (in.eof() && inPos == 0)
- breakAfterNext = true;
- }
-}
-
-
-void charsetConverter::convert(const string& in, string& out)
+// static
+ref <charsetConverter> charsetConverter::create
+ (const charset& source, const charset& dest,
+ const charsetConverterOptions& opts)
{
- out.clear();
-
- utility::inputStreamStringAdapter is(in);
- utility::outputStreamStringAdapter os(out);
-
- convert(is, os);
-
- os.flush();
+ if (source == "idna" || dest == "idna")
+ return vmime::create <charsetConverter_idna>(source, dest, opts);
+ else
+ return vmime::create <charsetConverter_iconv>(source, dest, opts);
}
-
-// charsetFilteredOutputStream
-
-namespace utility {
-
-
-charsetFilteredOutputStream::charsetFilteredOutputStream
- (const charset& source, const charset& dest, outputStream& os)
- : m_desc(NULL), m_sourceCharset(source), m_destCharset(dest),
- m_stream(os), m_unconvCount(0)
-{
- // Get an iconv descriptor
- const iconv_t cd = iconv_open(dest.getName().c_str(), source.getName().c_str());
-
- if (cd != reinterpret_cast <iconv_t>(-1))
- {
- iconv_t* p = new iconv_t;
- *p= cd;
-
- m_desc = p;
- }
-}
-
-
-charsetFilteredOutputStream::~charsetFilteredOutputStream()
-{
- if (m_desc != NULL)
- {
- // Close iconv handle
- iconv_close(*static_cast <iconv_t*>(m_desc));
-
- delete static_cast <iconv_t*>(m_desc);
- m_desc = NULL;
- }
-}
-
-
-outputStream& charsetFilteredOutputStream::getNextOutputStream()
-{
- return m_stream;
-}
-
-
-void charsetFilteredOutputStream::write
- (const value_type* const data, const size_type count)
-{
- if (m_desc == NULL)
- throw exceptions::charset_conv_error("Cannot initialize converter.");
-
- const iconv_t cd = *static_cast <iconv_t*>(m_desc);
-
- const value_type* curData = data;
- size_type curDataLen = count;
-
- // If there is some unconverted bytes left, add more data from this
- // chunk to see if it can now be converted.
- while (m_unconvCount != 0 || curDataLen != 0)
- {
- if (m_unconvCount != 0)
- {
- // Check if an incomplete input sequence is larger than the
- // input buffer size: should not happen except if something
- // in the input sequence is invalid. If so, output a special
- // character and skip one byte in the invalid sequence.
- if (m_unconvCount >= sizeof(m_unconvBuffer))
- {
- outputInvalidChar(m_stream, cd);
-
- std::copy(m_unconvBuffer + 1,
- m_unconvBuffer + m_unconvCount, m_unconvBuffer);
-
- m_unconvCount--;
- }
-
- // Get more data
- const size_type remaining =
- std::min(curDataLen, sizeof(m_unconvBuffer) - m_unconvCount);
-
- std::copy(curData, curData + remaining, m_unconvBuffer + m_unconvCount);
-
- m_unconvCount += remaining;
- curDataLen -= remaining;
- curData += remaining;
-
- if (remaining == 0)
- return; // no more data
-
- // Try a conversion
- const char* inPtr = m_unconvBuffer;
- size_t inLength = m_unconvCount;
- char* outPtr = m_outputBuffer;
- size_t outLength = sizeof(m_outputBuffer);
-
- const size_t inLength0 = inLength;
-
- if (iconv(cd, ICONV_HACK(&inPtr), &inLength, &outPtr, &outLength) == static_cast <size_t>(-1))
- {
- const size_t inputConverted = inLength0 - inLength;
-
- // Write successfully converted bytes
- m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength);
-
- // Shift unconverted bytes
- std::copy(m_unconvBuffer + inputConverted,
- m_unconvBuffer + m_unconvCount, m_unconvBuffer);
-
- m_unconvCount -= inputConverted;
-
- continue;
- }
-
- // Write successfully converted bytes
- m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength);
-
- // Empty the unconverted buffer
- m_unconvCount = 0;
- }
-
- if (curDataLen == 0)
- return; // no more data
-
- // Now, convert the current data buffer
- const char* inPtr = curData;
- size_t inLength = std::min(curDataLen, sizeof(m_outputBuffer) / MAX_CHARACTER_WIDTH);
- char* outPtr = m_outputBuffer;
- size_t outLength = sizeof(m_outputBuffer);
-
- const size_t inLength0 = inLength;
-
- if (iconv(cd, ICONV_HACK(&inPtr), &inLength, &outPtr, &outLength) == static_cast <size_t>(-1))
- {
- // Write successfully converted bytes
- m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength);
-
- const size_t inputConverted = inLength0 - inLength;
-
- curData += inputConverted;
- curDataLen -= inputConverted;
-
- // Put one byte byte into the unconverted buffer so
- // that the next iteration fill it
- if (curDataLen != 0)
- {
- m_unconvCount = 1;
- m_unconvBuffer[0] = *curData;
-
- curData++;
- curDataLen--;
- }
- }
- else
- {
- // Write successfully converted bytes
- m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength);
-
- curData += inLength0;
- curDataLen -= inLength0;
- }
- }
-}
-
-
-void charsetFilteredOutputStream::flush()
-{
- if (m_desc == NULL)
- throw exceptions::charset_conv_error("Cannot initialize converter.");
-
- const iconv_t cd = *static_cast <iconv_t*>(m_desc);
-
- size_t offset = 0;
-
- // Process unconverted bytes
- while (m_unconvCount != 0)
- {
- // Try a conversion
- const char* inPtr = m_unconvBuffer + offset;
- size_t inLength = m_unconvCount;
- char* outPtr = m_outputBuffer;
- size_t outLength = sizeof(m_outputBuffer);
-
- const size_t inLength0 = inLength;
-
- if (iconv(cd, ICONV_HACK(&inPtr), &inLength, &outPtr, &outLength) == static_cast <size_t>(-1))
- {
- const size_t inputConverted = inLength0 - inLength;
-
- // Skip a "blocking" character
- if (inputConverted == 0)
- {
- outputInvalidChar(m_stream, cd);
-
- offset++;
- m_unconvCount--;
- }
- else
- {
- // Write successfully converted bytes
- m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength);
-
- offset += inputConverted;
- m_unconvCount -= inputConverted;
- }
- }
- else
- {
- // Write successfully converted bytes
- m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength);
-
- m_unconvCount = 0;
- }
- }
-
- m_stream.flush();
-}
-
-
-} // utility
-
-
} // vmime