aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/vmime/charsetConverter_icu.cpp1144
-rw-r--r--src/vmime/charsetConverter_icu.hpp274
2 files changed, 709 insertions, 709 deletions
diff --git a/src/vmime/charsetConverter_icu.cpp b/src/vmime/charsetConverter_icu.cpp
index 5779cd90..cc74be98 100644
--- a/src/vmime/charsetConverter_icu.cpp
+++ b/src/vmime/charsetConverter_icu.cpp
@@ -1,572 +1,572 @@
-//
-// VMime library (http://www.vmime.org)
-// Copyright (C) 2002 Vincent Richard <[email protected]>
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 3 of
-// the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with this program; if not, write to the Free Software Foundation, Inc.,
-// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-//
-// Linking this library statically or dynamically with other modules is making
-// a combined work based on this library. Thus, the terms and conditions of
-// the GNU General Public License cover the whole combination.
-//
-
-#include "vmime/config.hpp"
-
-
-#if VMIME_CHARSETCONV_LIB_IS_ICU
-
-
-#include "vmime/charsetConverter_icu.hpp"
-
-#include "vmime/exception.hpp"
-#include "vmime/utility/inputStreamStringAdapter.hpp"
-#include "vmime/utility/outputStreamStringAdapter.hpp"
-
-
-#ifndef VMIME_BUILDING_DOC
-
- #include <unicode/ucnv.h>
- #include <unicode/ucnv_err.h>
-
-#endif // VMIME_BUILDING_DOC
-
-
-#include <unicode/unistr.h>
-
-
-namespace vmime {
-
-
-// static
-shared_ptr <charsetConverter> charsetConverter::createGenericConverter(
- const charset& source,
- const charset& dest,
- const charsetConverterOptions& opts
-) {
-
- return make_shared <charsetConverter_icu>(source, dest, opts);
-}
-
-
-charsetConverter_icu::charsetConverter_icu(
- const charset& source,
- const charset& dest,
- const charsetConverterOptions& opts
-)
- : m_from(NULL),
- m_to(NULL),
- m_source(source),
- m_dest(dest),
- m_options(opts) {
-
- UErrorCode err = U_ZERO_ERROR;
- m_from = ucnv_open(source.getName().c_str(), &err);
-
- if (!U_SUCCESS(err)) {
-
- throw exceptions::charset_conv_error(
- "Cannot initialize ICU converter for source charset '" + source.getName()
- + "' (error code: " + u_errorName(err) + "."
- );
- }
-
- m_to = ucnv_open(dest.getName().c_str(), &err);
-
- if (!U_SUCCESS(err)) {
-
- throw exceptions::charset_conv_error(
- "Cannot initialize ICU converter for destination charset '" + dest.getName()
- + "' (error code: " + u_errorName(err) + "."
- );
- }
-}
-
-
-charsetConverter_icu::~charsetConverter_icu() {
-
- if (m_from) ucnv_close(m_from);
- if (m_to) ucnv_close(m_to);
-}
-
-
-void charsetConverter_icu::convert(
- utility::inputStream& in,
- utility::outputStream& out,
- status* st
-) {
-
- UErrorCode err = U_ZERO_ERROR;
-
- ucnv_reset(m_from);
- ucnv_reset(m_to);
-
- if (st) {
- new (st) status();
- }
-
- // From buffers
- byte_t cpInBuffer[16]; // stream data put here
- const size_t outSize = ucnv_getMinCharSize(m_from) * sizeof(cpInBuffer) * sizeof(UChar);
- std::vector <UChar> uOutBuffer(outSize); // Unicode chars end up here
-
- // To buffers
- // converted (char) data end up here
- const size_t cpOutBufferSz = ucnv_getMaxCharSize(m_to) * outSize;
- std::vector <char> cpOutBuffer(cpOutBufferSz);
-
- // Tell ICU what to do when encountering an illegal byte sequence
- if (m_options.silentlyReplaceInvalidSequences) {
-
- // Set replacement chars for when converting from Unicode to codepage
- icu::UnicodeString substString(m_options.invalidSequence.c_str());
- ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);
-
- if (U_FAILURE(err)) {
- throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
- }
-
- } else {
-
- // Tell ICU top stop (and return an error) on illegal byte sequences
- ucnv_setToUCallBack(
- m_from, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
- );
-
- if (U_FAILURE(err)) {
- throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");
- }
-
- ucnv_setFromUCallBack(
- m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
- );
-
- if (U_FAILURE(err)) {
- throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
- }
- }
-
- // Input data available
- while (!in.eof()) {
-
- // Read input data into buffer
- size_t inLength = in.read(cpInBuffer, sizeof(cpInBuffer));
-
- // Beginning of read data
- const char* source = reinterpret_cast <const char*>(&cpInBuffer[0]);
- const char* sourceLimit = source + inLength; // end + 1
-
- UBool flush = in.eof(); // is this last run?
-
- UErrorCode toErr;
-
- // Loop until all source has been processed
- do {
-
- // Set up target pointers
- UChar* target = &uOutBuffer[0];
- UChar* targetLimit = &target[0] + outSize;
-
- toErr = U_ZERO_ERROR;
-
- ucnv_toUnicode(
- m_from, &target, targetLimit,
- &source, sourceLimit, NULL, flush, &toErr
- );
-
- if (st) {
- st->inputBytesRead += (source - reinterpret_cast <const char*>(&cpInBuffer[0]));
- }
-
- if (toErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(toErr)) {
-
- if (toErr == U_INVALID_CHAR_FOUND ||
- toErr == U_TRUNCATED_CHAR_FOUND ||
- toErr == U_ILLEGAL_CHAR_FOUND) {
-
- // Error will be thrown later (*)
-
- } else {
-
- throw exceptions::charset_conv_error(
- "[ICU] Error converting to Unicode from " + m_source.getName()
- );
- }
- }
-
- // The Unicode source is the buffer just written and the limit
- // is where the previous conversion stopped (target is moved in the conversion)
- const UChar* uSource = &uOutBuffer[0];
- UChar* uSourceLimit = &target[0];
- UErrorCode fromErr;
-
- // Loop until converted chars are fully written
- do {
-
- char* cpTarget = &cpOutBuffer[0];
- const char* cpTargetLimit = &cpOutBuffer[0] + cpOutBufferSz;
-
- fromErr = U_ZERO_ERROR;
-
- // Write converted bytes (Unicode) to destination codepage
- ucnv_fromUnicode(
- m_to, &cpTarget, cpTargetLimit,
- &uSource, uSourceLimit, NULL, flush, &fromErr
- );
-
- if (st) {
-
- // Decrement input bytes count by the number of input bytes in error
- char errBytes[16];
- int8_t errBytesLen = sizeof(errBytes);
- UErrorCode errBytesErr = U_ZERO_ERROR;
-
- ucnv_getInvalidChars(m_from, errBytes, &errBytesLen, &errBytesErr);
-
- st->inputBytesRead -= errBytesLen;
- st->outputBytesWritten += cpTarget - &cpOutBuffer[0];
- }
-
- // (*) If an error occurred while converting from input charset, throw it now
- if (toErr == U_INVALID_CHAR_FOUND ||
- toErr == U_TRUNCATED_CHAR_FOUND ||
- toErr == U_ILLEGAL_CHAR_FOUND) {
-
- throw exceptions::illegal_byte_sequence_for_charset();
- }
-
- if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
-
- if (fromErr == U_INVALID_CHAR_FOUND ||
- fromErr == U_TRUNCATED_CHAR_FOUND ||
- fromErr == U_ILLEGAL_CHAR_FOUND) {
-
- throw exceptions::illegal_byte_sequence_for_charset();
-
- } else {
-
- throw exceptions::charset_conv_error(
- "[ICU] Error converting from Unicode to " + m_dest.getName()
- );
- }
- }
-
- // Write to destination stream
- out.write(&cpOutBuffer[0], (cpTarget - &cpOutBuffer[0]));
-
- } while (fromErr == U_BUFFER_OVERFLOW_ERROR);
-
- } while (toErr == U_BUFFER_OVERFLOW_ERROR);
- }
-}
-
-
-void charsetConverter_icu::convert(const string& in, string& out, status* st) {
-
- if (st) {
- new (st) status();
- }
-
- out.clear();
-
- utility::inputStreamStringAdapter is(in);
- utility::outputStreamStringAdapter os(out);
-
- convert(is, os, st);
-
- os.flush();
-}
-
-
-shared_ptr <utility::charsetFilteredOutputStream>
- charsetConverter_icu::getFilteredOutputStream(
- utility::outputStream& os,
- const charsetConverterOptions& opts
- ) {
-
- return make_shared <utility::charsetFilteredOutputStream_icu>(m_source, m_dest, &os, opts);
-}
-
-
-
-// charsetFilteredOutputStream_icu
-
-namespace utility {
-
-
-charsetFilteredOutputStream_icu::charsetFilteredOutputStream_icu(
- const charset& source,
- const charset& dest,
- outputStream* os,
- const charsetConverterOptions& opts
-)
- : m_from(NULL),
- m_to(NULL),
- m_sourceCharset(source),
- m_destCharset(dest),
- m_stream(*os),
- m_options(opts) {
-
- UErrorCode err = U_ZERO_ERROR;
- m_from = ucnv_open(source.getName().c_str(), &err);
-
- if (!U_SUCCESS(err)) {
-
- throw exceptions::charset_conv_error(
- "Cannot initialize ICU converter for source charset '" + source.getName()
- + "' (error code: " + u_errorName(err) + "."
- );
- }
-
- m_to = ucnv_open(dest.getName().c_str(), &err);
-
- if (!U_SUCCESS(err)) {
-
- throw exceptions::charset_conv_error(
- "Cannot initialize ICU converter for destination charset '" + dest.getName()
- + "' (error code: " + u_errorName(err) + "."
- );
- }
-
- // Tell ICU what to do when encountering an illegal byte sequence
- if (m_options.silentlyReplaceInvalidSequences) {
-
- // Set replacement chars for when converting from Unicode to codepage
- icu::UnicodeString substString(m_options.invalidSequence.c_str());
- ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);
-
- if (U_FAILURE(err)) {
- throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
- }
-
- } else {
-
- // Tell ICU top stop (and return an error) on illegal byte sequences
- ucnv_setToUCallBack(
- m_to, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
- );
-
- if (U_FAILURE(err)) {
- throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");
- }
-
- ucnv_setFromUCallBack(
- m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
- );
-
- if (U_FAILURE(err)) {
- throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
- }
- }
-}
-
-
-charsetFilteredOutputStream_icu::~charsetFilteredOutputStream_icu() {
-
- if (m_from) ucnv_close(m_from);
- if (m_to) ucnv_close(m_to);
-}
-
-
-outputStream& charsetFilteredOutputStream_icu::getNextOutputStream() {
-
- return m_stream;
-}
-
-
-void charsetFilteredOutputStream_icu::writeImpl(
- const byte_t* const data,
- const size_t count
-) {
-
- if (!m_from || !m_to) {
- throw exceptions::charset_conv_error("Cannot initialize converters.");
- }
-
- // Allocate buffer for Unicode chars
- const size_t uniSize = ucnv_getMinCharSize(m_from) * count * sizeof(UChar);
- std::vector <UChar> uniBuffer(uniSize);
-
- // Conversion loop
- UErrorCode toErr = U_ZERO_ERROR;
-
- const char* uniSource = reinterpret_cast <const char*>(data);
- const char* uniSourceLimit = uniSource + count;
-
- do {
-
- // Convert from source charset to Unicode
- UChar* uniTarget = &uniBuffer[0];
- UChar* uniTargetLimit = &uniBuffer[0] + uniSize;
-
- toErr = U_ZERO_ERROR;
-
- ucnv_toUnicode(
- m_from, &uniTarget, uniTargetLimit,
- &uniSource, uniSourceLimit, NULL, /* flush */ UBool(0), &toErr
- );
-
- if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR) {
-
- if (toErr == U_INVALID_CHAR_FOUND ||
- toErr == U_TRUNCATED_CHAR_FOUND ||
- toErr == U_ILLEGAL_CHAR_FOUND) {
-
- throw exceptions::illegal_byte_sequence_for_charset();
-
- } else {
-
- throw exceptions::charset_conv_error(
- "[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'."
- );
- }
- }
-
- const size_t uniLength = uniTarget - &uniBuffer[0];
-
- // Allocate buffer for destination charset
- const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength;
- std::vector <char> cpBuffer(cpSize);
-
- // Convert from Unicode to destination charset
- UErrorCode fromErr = U_ZERO_ERROR;
-
- const UChar* cpSource = &uniBuffer[0];
- const UChar* cpSourceLimit = &uniBuffer[0] + uniLength;
-
- do {
-
- char* cpTarget = &cpBuffer[0];
- char* cpTargetLimit = &cpBuffer[0] + cpSize;
-
- fromErr = U_ZERO_ERROR;
-
- ucnv_fromUnicode(
- m_to, &cpTarget, cpTargetLimit,
- &cpSource, cpSourceLimit, NULL, /* flush */ FALSE, &fromErr
- );
-
- if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
-
- if (fromErr == U_INVALID_CHAR_FOUND ||
- fromErr == U_TRUNCATED_CHAR_FOUND ||
- fromErr == U_ILLEGAL_CHAR_FOUND) {
-
- throw exceptions::illegal_byte_sequence_for_charset();
-
- } else {
-
- throw exceptions::charset_conv_error(
- "[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'."
- );
- }
- }
-
- const size_t cpLength = cpTarget - &cpBuffer[0];
-
- // Write successfully converted bytes
- m_stream.write(&cpBuffer[0], cpLength);
-
- } while (fromErr == U_BUFFER_OVERFLOW_ERROR);
-
- } while (toErr == U_BUFFER_OVERFLOW_ERROR);
-}
-
-
-void charsetFilteredOutputStream_icu::flush() {
-
- if (!m_from || !m_to) {
- throw exceptions::charset_conv_error("Cannot initialize converters.");
- }
-
- // Allocate buffer for Unicode chars
- const size_t uniSize = ucnv_getMinCharSize(m_from) * 1024 * sizeof(UChar);
- std::vector <UChar> uniBuffer(uniSize);
-
- // Conversion loop (with flushing)
- UErrorCode toErr = U_ZERO_ERROR;
-
- const char* uniSource = 0;
- const char* uniSourceLimit = 0;
-
- do {
-
- // Convert from source charset to Unicode
- UChar* uniTarget = &uniBuffer[0];
- UChar* uniTargetLimit = &uniBuffer[0] + uniSize;
-
- toErr = U_ZERO_ERROR;
-
- ucnv_toUnicode(
- m_from, &uniTarget, uniTargetLimit,
- &uniSource, uniSourceLimit, NULL, /* flush */ UBool(1), &toErr
- );
-
- if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR) {
-
- throw exceptions::charset_conv_error(
- "[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'."
- );
- }
-
- const size_t uniLength = uniTarget - &uniBuffer[0];
-
- // Allocate buffer for destination charset
- const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength;
- std::vector <char> cpBuffer(cpSize);
-
- // Convert from Unicode to destination charset
- UErrorCode fromErr = U_ZERO_ERROR;
-
- const UChar* cpSource = &uniBuffer[0];
- const UChar* cpSourceLimit = &uniBuffer[0] + uniLength;
-
- do {
-
- char* cpTarget = &cpBuffer[0];
- char* cpTargetLimit = &cpBuffer[0] + cpSize;
-
- fromErr = U_ZERO_ERROR;
-
- ucnv_fromUnicode(
- m_to, &cpTarget, cpTargetLimit,
- &cpSource, cpSourceLimit, NULL, /* flush */ UBool(1), &fromErr
- );
-
- if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
-
- throw exceptions::charset_conv_error(
- "[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'."
- );
- }
-
- const size_t cpLength = cpTarget - &cpBuffer[0];
-
- // Write successfully converted bytes
- m_stream.write(&cpBuffer[0], cpLength);
-
- } while (fromErr == U_BUFFER_OVERFLOW_ERROR);
-
- } while (toErr == U_BUFFER_OVERFLOW_ERROR);
-
- m_stream.flush();
-}
-
-
-} // utility
-
-
-} // vmime
-
-
-#endif // VMIME_CHARSETCONV_LIB_IS_ICU
+//
+// VMime library (http://www.vmime.org)
+// Copyright (C) 2002 Vincent Richard <[email protected]>
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 3 of
+// the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Linking this library statically or dynamically with other modules is making
+// a combined work based on this library. Thus, the terms and conditions of
+// the GNU General Public License cover the whole combination.
+//
+
+#include "vmime/config.hpp"
+
+
+#if VMIME_CHARSETCONV_LIB_IS_ICU
+
+
+#include "vmime/charsetConverter_icu.hpp"
+
+#include "vmime/exception.hpp"
+#include "vmime/utility/inputStreamStringAdapter.hpp"
+#include "vmime/utility/outputStreamStringAdapter.hpp"
+
+
+#ifndef VMIME_BUILDING_DOC
+
+ #include <unicode/ucnv.h>
+ #include <unicode/ucnv_err.h>
+
+#endif // VMIME_BUILDING_DOC
+
+
+#include <unicode/unistr.h>
+
+
+namespace vmime {
+
+
+// static
+shared_ptr <charsetConverter> charsetConverter::createGenericConverter(
+ const charset& source,
+ const charset& dest,
+ const charsetConverterOptions& opts
+) {
+
+ return make_shared <charsetConverter_icu>(source, dest, opts);
+}
+
+
+charsetConverter_icu::charsetConverter_icu(
+ const charset& source,
+ const charset& dest,
+ const charsetConverterOptions& opts
+)
+ : m_from(NULL),
+ m_to(NULL),
+ m_source(source),
+ m_dest(dest),
+ m_options(opts) {
+
+ UErrorCode err = U_ZERO_ERROR;
+ m_from = ucnv_open(source.getName().c_str(), &err);
+
+ if (!U_SUCCESS(err)) {
+
+ throw exceptions::charset_conv_error(
+ "Cannot initialize ICU converter for source charset '" + source.getName()
+ + "' (error code: " + u_errorName(err) + "."
+ );
+ }
+
+ m_to = ucnv_open(dest.getName().c_str(), &err);
+
+ if (!U_SUCCESS(err)) {
+
+ throw exceptions::charset_conv_error(
+ "Cannot initialize ICU converter for destination charset '" + dest.getName()
+ + "' (error code: " + u_errorName(err) + "."
+ );
+ }
+}
+
+
+charsetConverter_icu::~charsetConverter_icu() {
+
+ if (m_from) ucnv_close(m_from);
+ if (m_to) ucnv_close(m_to);
+}
+
+
+void charsetConverter_icu::convert(
+ utility::inputStream& in,
+ utility::outputStream& out,
+ status* st
+) {
+
+ UErrorCode err = U_ZERO_ERROR;
+
+ ucnv_reset(m_from);
+ ucnv_reset(m_to);
+
+ if (st) {
+ new (st) status();
+ }
+
+ // From buffers
+ byte_t cpInBuffer[16]; // stream data put here
+ const size_t outSize = ucnv_getMinCharSize(m_from) * sizeof(cpInBuffer) * sizeof(UChar);
+ std::vector <UChar> uOutBuffer(outSize); // Unicode chars end up here
+
+ // To buffers
+ // converted (char) data end up here
+ const size_t cpOutBufferSz = ucnv_getMaxCharSize(m_to) * outSize;
+ std::vector <char> cpOutBuffer(cpOutBufferSz);
+
+ // Tell ICU what to do when encountering an illegal byte sequence
+ if (m_options.silentlyReplaceInvalidSequences) {
+
+ // Set replacement chars for when converting from Unicode to codepage
+ icu::UnicodeString substString(m_options.invalidSequence.c_str());
+ ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);
+
+ if (U_FAILURE(err)) {
+ throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
+ }
+
+ } else {
+
+ // Tell ICU top stop (and return an error) on illegal byte sequences
+ ucnv_setToUCallBack(
+ m_from, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
+ );
+
+ if (U_FAILURE(err)) {
+ throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");
+ }
+
+ ucnv_setFromUCallBack(
+ m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
+ );
+
+ if (U_FAILURE(err)) {
+ throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
+ }
+ }
+
+ // Input data available
+ while (!in.eof()) {
+
+ // Read input data into buffer
+ size_t inLength = in.read(cpInBuffer, sizeof(cpInBuffer));
+
+ // Beginning of read data
+ const char* source = reinterpret_cast <const char*>(&cpInBuffer[0]);
+ const char* sourceLimit = source + inLength; // end + 1
+
+ UBool flush = in.eof(); // is this last run?
+
+ UErrorCode toErr;
+
+ // Loop until all source has been processed
+ do {
+
+ // Set up target pointers
+ UChar* target = &uOutBuffer[0];
+ UChar* targetLimit = &target[0] + outSize;
+
+ toErr = U_ZERO_ERROR;
+
+ ucnv_toUnicode(
+ m_from, &target, targetLimit,
+ &source, sourceLimit, NULL, flush, &toErr
+ );
+
+ if (st) {
+ st->inputBytesRead += (source - reinterpret_cast <const char*>(&cpInBuffer[0]));
+ }
+
+ if (toErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(toErr)) {
+
+ if (toErr == U_INVALID_CHAR_FOUND ||
+ toErr == U_TRUNCATED_CHAR_FOUND ||
+ toErr == U_ILLEGAL_CHAR_FOUND) {
+
+ // Error will be thrown later (*)
+
+ } else {
+
+ throw exceptions::charset_conv_error(
+ "[ICU] Error converting to Unicode from " + m_source.getName()
+ );
+ }
+ }
+
+ // The Unicode source is the buffer just written and the limit
+ // is where the previous conversion stopped (target is moved in the conversion)
+ const UChar* uSource = &uOutBuffer[0];
+ UChar* uSourceLimit = &target[0];
+ UErrorCode fromErr;
+
+ // Loop until converted chars are fully written
+ do {
+
+ char* cpTarget = &cpOutBuffer[0];
+ const char* cpTargetLimit = &cpOutBuffer[0] + cpOutBufferSz;
+
+ fromErr = U_ZERO_ERROR;
+
+ // Write converted bytes (Unicode) to destination codepage
+ ucnv_fromUnicode(
+ m_to, &cpTarget, cpTargetLimit,
+ &uSource, uSourceLimit, NULL, flush, &fromErr
+ );
+
+ if (st) {
+
+ // Decrement input bytes count by the number of input bytes in error
+ char errBytes[16];
+ int8_t errBytesLen = sizeof(errBytes);
+ UErrorCode errBytesErr = U_ZERO_ERROR;
+
+ ucnv_getInvalidChars(m_from, errBytes, &errBytesLen, &errBytesErr);
+
+ st->inputBytesRead -= errBytesLen;
+ st->outputBytesWritten += cpTarget - &cpOutBuffer[0];
+ }
+
+ // (*) If an error occurred while converting from input charset, throw it now
+ if (toErr == U_INVALID_CHAR_FOUND ||
+ toErr == U_TRUNCATED_CHAR_FOUND ||
+ toErr == U_ILLEGAL_CHAR_FOUND) {
+
+ throw exceptions::illegal_byte_sequence_for_charset();
+ }
+
+ if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
+
+ if (fromErr == U_INVALID_CHAR_FOUND ||
+ fromErr == U_TRUNCATED_CHAR_FOUND ||
+ fromErr == U_ILLEGAL_CHAR_FOUND) {
+
+ throw exceptions::illegal_byte_sequence_for_charset();
+
+ } else {
+
+ throw exceptions::charset_conv_error(
+ "[ICU] Error converting from Unicode to " + m_dest.getName()
+ );
+ }
+ }
+
+ // Write to destination stream
+ out.write(&cpOutBuffer[0], (cpTarget - &cpOutBuffer[0]));
+
+ } while (fromErr == U_BUFFER_OVERFLOW_ERROR);
+
+ } while (toErr == U_BUFFER_OVERFLOW_ERROR);
+ }
+}
+
+
+void charsetConverter_icu::convert(const string& in, string& out, status* st) {
+
+ if (st) {
+ new (st) status();
+ }
+
+ out.clear();
+
+ utility::inputStreamStringAdapter is(in);
+ utility::outputStreamStringAdapter os(out);
+
+ convert(is, os, st);
+
+ os.flush();
+}
+
+
+shared_ptr <utility::charsetFilteredOutputStream>
+ charsetConverter_icu::getFilteredOutputStream(
+ utility::outputStream& os,
+ const charsetConverterOptions& opts
+ ) {
+
+ return make_shared <utility::charsetFilteredOutputStream_icu>(m_source, m_dest, &os, opts);
+}
+
+
+
+// charsetFilteredOutputStream_icu
+
+namespace utility {
+
+
+charsetFilteredOutputStream_icu::charsetFilteredOutputStream_icu(
+ const charset& source,
+ const charset& dest,
+ outputStream* os,
+ const charsetConverterOptions& opts
+)
+ : m_from(NULL),
+ m_to(NULL),
+ m_sourceCharset(source),
+ m_destCharset(dest),
+ m_stream(*os),
+ m_options(opts) {
+
+ UErrorCode err = U_ZERO_ERROR;
+ m_from = ucnv_open(source.getName().c_str(), &err);
+
+ if (!U_SUCCESS(err)) {
+
+ throw exceptions::charset_conv_error(
+ "Cannot initialize ICU converter for source charset '" + source.getName()
+ + "' (error code: " + u_errorName(err) + "."
+ );
+ }
+
+ m_to = ucnv_open(dest.getName().c_str(), &err);
+
+ if (!U_SUCCESS(err)) {
+
+ throw exceptions::charset_conv_error(
+ "Cannot initialize ICU converter for destination charset '" + dest.getName()
+ + "' (error code: " + u_errorName(err) + "."
+ );
+ }
+
+ // Tell ICU what to do when encountering an illegal byte sequence
+ if (m_options.silentlyReplaceInvalidSequences) {
+
+ // Set replacement chars for when converting from Unicode to codepage
+ icu::UnicodeString substString(m_options.invalidSequence.c_str());
+ ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);
+
+ if (U_FAILURE(err)) {
+ throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
+ }
+
+ } else {
+
+ // Tell ICU top stop (and return an error) on illegal byte sequences
+ ucnv_setToUCallBack(
+ m_to, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
+ );
+
+ if (U_FAILURE(err)) {
+ throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");
+ }
+
+ ucnv_setFromUCallBack(
+ m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
+ );
+
+ if (U_FAILURE(err)) {
+ throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
+ }
+ }
+}
+
+
+charsetFilteredOutputStream_icu::~charsetFilteredOutputStream_icu() {
+
+ if (m_from) ucnv_close(m_from);
+ if (m_to) ucnv_close(m_to);
+}
+
+
+outputStream& charsetFilteredOutputStream_icu::getNextOutputStream() {
+
+ return m_stream;
+}
+
+
+void charsetFilteredOutputStream_icu::writeImpl(
+ const byte_t* const data,
+ const size_t count
+) {
+
+ if (!m_from || !m_to) {
+ throw exceptions::charset_conv_error("Cannot initialize converters.");
+ }
+
+ // Allocate buffer for Unicode chars
+ const size_t uniSize = ucnv_getMinCharSize(m_from) * count * sizeof(UChar);
+ std::vector <UChar> uniBuffer(uniSize);
+
+ // Conversion loop
+ UErrorCode toErr = U_ZERO_ERROR;
+
+ const char* uniSource = reinterpret_cast <const char*>(data);
+ const char* uniSourceLimit = uniSource + count;
+
+ do {
+
+ // Convert from source charset to Unicode
+ UChar* uniTarget = &uniBuffer[0];
+ UChar* uniTargetLimit = &uniBuffer[0] + uniSize;
+
+ toErr = U_ZERO_ERROR;
+
+ ucnv_toUnicode(
+ m_from, &uniTarget, uniTargetLimit,
+ &uniSource, uniSourceLimit, NULL, /* flush */ UBool(0), &toErr
+ );
+
+ if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR) {
+
+ if (toErr == U_INVALID_CHAR_FOUND ||
+ toErr == U_TRUNCATED_CHAR_FOUND ||
+ toErr == U_ILLEGAL_CHAR_FOUND) {
+
+ throw exceptions::illegal_byte_sequence_for_charset();
+
+ } else {
+
+ throw exceptions::charset_conv_error(
+ "[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'."
+ );
+ }
+ }
+
+ const size_t uniLength = uniTarget - &uniBuffer[0];
+
+ // Allocate buffer for destination charset
+ const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength;
+ std::vector <char> cpBuffer(cpSize);
+
+ // Convert from Unicode to destination charset
+ UErrorCode fromErr = U_ZERO_ERROR;
+
+ const UChar* cpSource = &uniBuffer[0];
+ const UChar* cpSourceLimit = &uniBuffer[0] + uniLength;
+
+ do {
+
+ char* cpTarget = &cpBuffer[0];
+ char* cpTargetLimit = &cpBuffer[0] + cpSize;
+
+ fromErr = U_ZERO_ERROR;
+
+ ucnv_fromUnicode(
+ m_to, &cpTarget, cpTargetLimit,
+ &cpSource, cpSourceLimit, NULL, /* flush */ FALSE, &fromErr
+ );
+
+ if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
+
+ if (fromErr == U_INVALID_CHAR_FOUND ||
+ fromErr == U_TRUNCATED_CHAR_FOUND ||
+ fromErr == U_ILLEGAL_CHAR_FOUND) {
+
+ throw exceptions::illegal_byte_sequence_for_charset();
+
+ } else {
+
+ throw exceptions::charset_conv_error(
+ "[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'."
+ );
+ }
+ }
+
+ const size_t cpLength = cpTarget - &cpBuffer[0];
+
+ // Write successfully converted bytes
+ m_stream.write(&cpBuffer[0], cpLength);
+
+ } while (fromErr == U_BUFFER_OVERFLOW_ERROR);
+
+ } while (toErr == U_BUFFER_OVERFLOW_ERROR);
+}
+
+
+void charsetFilteredOutputStream_icu::flush() {
+
+ if (!m_from || !m_to) {
+ throw exceptions::charset_conv_error("Cannot initialize converters.");
+ }
+
+ // Allocate buffer for Unicode chars
+ const size_t uniSize = ucnv_getMinCharSize(m_from) * 1024 * sizeof(UChar);
+ std::vector <UChar> uniBuffer(uniSize);
+
+ // Conversion loop (with flushing)
+ UErrorCode toErr = U_ZERO_ERROR;
+
+ const char* uniSource = 0;
+ const char* uniSourceLimit = 0;
+
+ do {
+
+ // Convert from source charset to Unicode
+ UChar* uniTarget = &uniBuffer[0];
+ UChar* uniTargetLimit = &uniBuffer[0] + uniSize;
+
+ toErr = U_ZERO_ERROR;
+
+ ucnv_toUnicode(
+ m_from, &uniTarget, uniTargetLimit,
+ &uniSource, uniSourceLimit, NULL, /* flush */ UBool(1), &toErr
+ );
+
+ if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR) {
+
+ throw exceptions::charset_conv_error(
+ "[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'."
+ );
+ }
+
+ const size_t uniLength = uniTarget - &uniBuffer[0];
+
+ // Allocate buffer for destination charset
+ const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength;
+ std::vector <char> cpBuffer(cpSize);
+
+ // Convert from Unicode to destination charset
+ UErrorCode fromErr = U_ZERO_ERROR;
+
+ const UChar* cpSource = &uniBuffer[0];
+ const UChar* cpSourceLimit = &uniBuffer[0] + uniLength;
+
+ do {
+
+ char* cpTarget = &cpBuffer[0];
+ char* cpTargetLimit = &cpBuffer[0] + cpSize;
+
+ fromErr = U_ZERO_ERROR;
+
+ ucnv_fromUnicode(
+ m_to, &cpTarget, cpTargetLimit,
+ &cpSource, cpSourceLimit, NULL, /* flush */ UBool(1), &fromErr
+ );
+
+ if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
+
+ throw exceptions::charset_conv_error(
+ "[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'."
+ );
+ }
+
+ const size_t cpLength = cpTarget - &cpBuffer[0];
+
+ // Write successfully converted bytes
+ m_stream.write(&cpBuffer[0], cpLength);
+
+ } while (fromErr == U_BUFFER_OVERFLOW_ERROR);
+
+ } while (toErr == U_BUFFER_OVERFLOW_ERROR);
+
+ m_stream.flush();
+}
+
+
+} // utility
+
+
+} // vmime
+
+
+#endif // VMIME_CHARSETCONV_LIB_IS_ICU
diff --git a/src/vmime/charsetConverter_icu.hpp b/src/vmime/charsetConverter_icu.hpp
index cf5eb6bc..742999f0 100644
--- a/src/vmime/charsetConverter_icu.hpp
+++ b/src/vmime/charsetConverter_icu.hpp
@@ -1,137 +1,137 @@
-//
-// VMime library (http://www.vmime.org)
-// Copyright (C) 2002 Vincent Richard <[email protected]>
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 3 of
-// the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with this program; if not, write to the Free Software Foundation, Inc.,
-// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-//
-// Linking this library statically or dynamically with other modules is making
-// a combined work based on this library. Thus, the terms and conditions of
-// the GNU General Public License cover the whole combination.
-//
-
-#ifndef VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
-#define VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
-
-
-#include "vmime/config.hpp"
-
-
-#if VMIME_CHARSETCONV_LIB_IS_ICU
-
-
-#include "vmime/charsetConverter.hpp"
-
-
-struct UConverter;
-
-
-namespace vmime {
-
-
-/** A generic charset converter which uses ICU library.
- */
-class charsetConverter_icu : public charsetConverter {
-
-public:
-
- /** Construct and initialize an ICU charset converter.
- *
- * @param source input charset
- * @param dest output charset
- * @param opts conversion options
- */
- charsetConverter_icu(
- const charset& source,
- const charset& dest,
- const charsetConverterOptions& opts = charsetConverterOptions()
- );
-
- ~charsetConverter_icu();
-
- void convert(const string& in, string& out, status* st = NULL);
- void convert(utility::inputStream& in, utility::outputStream& out, status* st = NULL);
-
- shared_ptr <utility::charsetFilteredOutputStream> getFilteredOutputStream(
- utility::outputStream& os,
- const charsetConverterOptions& opts = charsetConverterOptions()
- );
-
-private:
-
- UConverter* m_from;
- UConverter* m_to;
-
- charset m_source;
- charset m_dest;
-
- charsetConverterOptions m_options;
-};
-
-
-namespace utility {
-
-
-class charsetFilteredOutputStream_icu : public charsetFilteredOutputStream {
-
-public:
-
- /** Construct a new filter for the specified output stream.
- *
- * @param source input charset
- * @param dest output charset
- * @param os stream into which write filtered data
- * @param opts conversion options
- */
- charsetFilteredOutputStream_icu(
- const charset& source,
- const charset& dest,
- outputStream* os,
- const charsetConverterOptions& opts = charsetConverterOptions()
- );
-
- ~charsetFilteredOutputStream_icu();
-
-
- outputStream& getNextOutputStream();
-
- void flush();
-
-protected:
-
- void writeImpl(const byte_t* const data, const size_t count);
-
-private:
-
- UConverter* m_from;
- UConverter* m_to;
-
- const charset m_sourceCharset;
- const charset m_destCharset;
-
- outputStream& m_stream;
-
- charsetConverterOptions m_options;
-};
-
-
-} // utility
-
-
-} // vmime
-
-
-#endif // VMIME_CHARSETCONV_LIB_IS_ICU
-
-#endif // VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
+//
+// VMime library (http://www.vmime.org)
+// Copyright (C) 2002 Vincent Richard <[email protected]>
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 3 of
+// the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Linking this library statically or dynamically with other modules is making
+// a combined work based on this library. Thus, the terms and conditions of
+// the GNU General Public License cover the whole combination.
+//
+
+#ifndef VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
+#define VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
+
+
+#include "vmime/config.hpp"
+
+
+#if VMIME_CHARSETCONV_LIB_IS_ICU
+
+
+#include "vmime/charsetConverter.hpp"
+
+
+struct UConverter;
+
+
+namespace vmime {
+
+
+/** A generic charset converter which uses ICU library.
+ */
+class charsetConverter_icu : public charsetConverter {
+
+public:
+
+ /** Construct and initialize an ICU charset converter.
+ *
+ * @param source input charset
+ * @param dest output charset
+ * @param opts conversion options
+ */
+ charsetConverter_icu(
+ const charset& source,
+ const charset& dest,
+ const charsetConverterOptions& opts = charsetConverterOptions()
+ );
+
+ ~charsetConverter_icu();
+
+ void convert(const string& in, string& out, status* st = NULL);
+ void convert(utility::inputStream& in, utility::outputStream& out, status* st = NULL);
+
+ shared_ptr <utility::charsetFilteredOutputStream> getFilteredOutputStream(
+ utility::outputStream& os,
+ const charsetConverterOptions& opts = charsetConverterOptions()
+ );
+
+private:
+
+ UConverter* m_from;
+ UConverter* m_to;
+
+ charset m_source;
+ charset m_dest;
+
+ charsetConverterOptions m_options;
+};
+
+
+namespace utility {
+
+
+class charsetFilteredOutputStream_icu : public charsetFilteredOutputStream {
+
+public:
+
+ /** Construct a new filter for the specified output stream.
+ *
+ * @param source input charset
+ * @param dest output charset
+ * @param os stream into which write filtered data
+ * @param opts conversion options
+ */
+ charsetFilteredOutputStream_icu(
+ const charset& source,
+ const charset& dest,
+ outputStream* os,
+ const charsetConverterOptions& opts = charsetConverterOptions()
+ );
+
+ ~charsetFilteredOutputStream_icu();
+
+
+ outputStream& getNextOutputStream();
+
+ void flush();
+
+protected:
+
+ void writeImpl(const byte_t* const data, const size_t count);
+
+private:
+
+ UConverter* m_from;
+ UConverter* m_to;
+
+ const charset m_sourceCharset;
+ const charset m_destCharset;
+
+ outputStream& m_stream;
+
+ charsetConverterOptions m_options;
+};
+
+
+} // utility
+
+
+} // vmime
+
+
+#endif // VMIME_CHARSETCONV_LIB_IS_ICU
+
+#endif // VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED