diff options
author | Vincent Richard <[email protected]> | 2021-01-11 21:06:42 +0000 |
---|---|---|
committer | Vincent Richard <[email protected]> | 2021-01-11 21:06:42 +0000 |
commit | 817358854d6a1ffea3dc213263f45aae860a19d7 (patch) | |
tree | 3fe5e631fd18f66df5c49336d0cb7ac9981dba31 | |
parent | #247 Fixed build with ICU 68.2-1 (diff) | |
download | vmime-817358854d6a1ffea3dc213263f45aae860a19d7.tar.gz vmime-817358854d6a1ffea3dc213263f45aae860a19d7.zip |
Fixed line endings.
-rw-r--r-- | src/vmime/charsetConverter_icu.cpp | 1144 | ||||
-rw-r--r-- | src/vmime/charsetConverter_icu.hpp | 274 |
2 files changed, 709 insertions, 709 deletions
diff --git a/src/vmime/charsetConverter_icu.cpp b/src/vmime/charsetConverter_icu.cpp index 5779cd90..cc74be98 100644 --- a/src/vmime/charsetConverter_icu.cpp +++ b/src/vmime/charsetConverter_icu.cpp @@ -1,572 +1,572 @@ -//
-// VMime library (http://www.vmime.org)
-// Copyright (C) 2002 Vincent Richard <[email protected]>
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 3 of
-// the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with this program; if not, write to the Free Software Foundation, Inc.,
-// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-//
-// Linking this library statically or dynamically with other modules is making
-// a combined work based on this library. Thus, the terms and conditions of
-// the GNU General Public License cover the whole combination.
-//
-
-#include "vmime/config.hpp"
-
-
-#if VMIME_CHARSETCONV_LIB_IS_ICU
-
-
-#include "vmime/charsetConverter_icu.hpp"
-
-#include "vmime/exception.hpp"
-#include "vmime/utility/inputStreamStringAdapter.hpp"
-#include "vmime/utility/outputStreamStringAdapter.hpp"
-
-
-#ifndef VMIME_BUILDING_DOC
-
- #include <unicode/ucnv.h>
- #include <unicode/ucnv_err.h>
-
-#endif // VMIME_BUILDING_DOC
-
-
-#include <unicode/unistr.h>
-
-
-namespace vmime {
-
-
-// static
-shared_ptr <charsetConverter> charsetConverter::createGenericConverter(
- const charset& source,
- const charset& dest,
- const charsetConverterOptions& opts
-) {
-
- return make_shared <charsetConverter_icu>(source, dest, opts);
-}
-
-
-charsetConverter_icu::charsetConverter_icu(
- const charset& source,
- const charset& dest,
- const charsetConverterOptions& opts
-)
- : m_from(NULL),
- m_to(NULL),
- m_source(source),
- m_dest(dest),
- m_options(opts) {
-
- UErrorCode err = U_ZERO_ERROR;
- m_from = ucnv_open(source.getName().c_str(), &err);
-
- if (!U_SUCCESS(err)) {
-
- throw exceptions::charset_conv_error(
- "Cannot initialize ICU converter for source charset '" + source.getName()
- + "' (error code: " + u_errorName(err) + "."
- );
- }
-
- m_to = ucnv_open(dest.getName().c_str(), &err);
-
- if (!U_SUCCESS(err)) {
-
- throw exceptions::charset_conv_error(
- "Cannot initialize ICU converter for destination charset '" + dest.getName()
- + "' (error code: " + u_errorName(err) + "."
- );
- }
-}
-
-
-charsetConverter_icu::~charsetConverter_icu() {
-
- if (m_from) ucnv_close(m_from);
- if (m_to) ucnv_close(m_to);
-}
-
-
-void charsetConverter_icu::convert(
- utility::inputStream& in,
- utility::outputStream& out,
- status* st
-) {
-
- UErrorCode err = U_ZERO_ERROR;
-
- ucnv_reset(m_from);
- ucnv_reset(m_to);
-
- if (st) {
- new (st) status();
- }
-
- // From buffers
- byte_t cpInBuffer[16]; // stream data put here
- const size_t outSize = ucnv_getMinCharSize(m_from) * sizeof(cpInBuffer) * sizeof(UChar);
- std::vector <UChar> uOutBuffer(outSize); // Unicode chars end up here
-
- // To buffers
- // converted (char) data end up here
- const size_t cpOutBufferSz = ucnv_getMaxCharSize(m_to) * outSize;
- std::vector <char> cpOutBuffer(cpOutBufferSz);
-
- // Tell ICU what to do when encountering an illegal byte sequence
- if (m_options.silentlyReplaceInvalidSequences) {
-
- // Set replacement chars for when converting from Unicode to codepage
- icu::UnicodeString substString(m_options.invalidSequence.c_str());
- ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);
-
- if (U_FAILURE(err)) {
- throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
- }
-
- } else {
-
- // Tell ICU top stop (and return an error) on illegal byte sequences
- ucnv_setToUCallBack(
- m_from, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
- );
-
- if (U_FAILURE(err)) {
- throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");
- }
-
- ucnv_setFromUCallBack(
- m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
- );
-
- if (U_FAILURE(err)) {
- throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
- }
- }
-
- // Input data available
- while (!in.eof()) {
-
- // Read input data into buffer
- size_t inLength = in.read(cpInBuffer, sizeof(cpInBuffer));
-
- // Beginning of read data
- const char* source = reinterpret_cast <const char*>(&cpInBuffer[0]);
- const char* sourceLimit = source + inLength; // end + 1
-
- UBool flush = in.eof(); // is this last run?
-
- UErrorCode toErr;
-
- // Loop until all source has been processed
- do {
-
- // Set up target pointers
- UChar* target = &uOutBuffer[0];
- UChar* targetLimit = &target[0] + outSize;
-
- toErr = U_ZERO_ERROR;
-
- ucnv_toUnicode(
- m_from, &target, targetLimit,
- &source, sourceLimit, NULL, flush, &toErr
- );
-
- if (st) {
- st->inputBytesRead += (source - reinterpret_cast <const char*>(&cpInBuffer[0]));
- }
-
- if (toErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(toErr)) {
-
- if (toErr == U_INVALID_CHAR_FOUND ||
- toErr == U_TRUNCATED_CHAR_FOUND ||
- toErr == U_ILLEGAL_CHAR_FOUND) {
-
- // Error will be thrown later (*)
-
- } else {
-
- throw exceptions::charset_conv_error(
- "[ICU] Error converting to Unicode from " + m_source.getName()
- );
- }
- }
-
- // The Unicode source is the buffer just written and the limit
- // is where the previous conversion stopped (target is moved in the conversion)
- const UChar* uSource = &uOutBuffer[0];
- UChar* uSourceLimit = &target[0];
- UErrorCode fromErr;
-
- // Loop until converted chars are fully written
- do {
-
- char* cpTarget = &cpOutBuffer[0];
- const char* cpTargetLimit = &cpOutBuffer[0] + cpOutBufferSz;
-
- fromErr = U_ZERO_ERROR;
-
- // Write converted bytes (Unicode) to destination codepage
- ucnv_fromUnicode(
- m_to, &cpTarget, cpTargetLimit,
- &uSource, uSourceLimit, NULL, flush, &fromErr
- );
-
- if (st) {
-
- // Decrement input bytes count by the number of input bytes in error
- char errBytes[16];
- int8_t errBytesLen = sizeof(errBytes);
- UErrorCode errBytesErr = U_ZERO_ERROR;
-
- ucnv_getInvalidChars(m_from, errBytes, &errBytesLen, &errBytesErr);
-
- st->inputBytesRead -= errBytesLen;
- st->outputBytesWritten += cpTarget - &cpOutBuffer[0];
- }
-
- // (*) If an error occurred while converting from input charset, throw it now
- if (toErr == U_INVALID_CHAR_FOUND ||
- toErr == U_TRUNCATED_CHAR_FOUND ||
- toErr == U_ILLEGAL_CHAR_FOUND) {
-
- throw exceptions::illegal_byte_sequence_for_charset();
- }
-
- if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
-
- if (fromErr == U_INVALID_CHAR_FOUND ||
- fromErr == U_TRUNCATED_CHAR_FOUND ||
- fromErr == U_ILLEGAL_CHAR_FOUND) {
-
- throw exceptions::illegal_byte_sequence_for_charset();
-
- } else {
-
- throw exceptions::charset_conv_error(
- "[ICU] Error converting from Unicode to " + m_dest.getName()
- );
- }
- }
-
- // Write to destination stream
- out.write(&cpOutBuffer[0], (cpTarget - &cpOutBuffer[0]));
-
- } while (fromErr == U_BUFFER_OVERFLOW_ERROR);
-
- } while (toErr == U_BUFFER_OVERFLOW_ERROR);
- }
-}
-
-
-void charsetConverter_icu::convert(const string& in, string& out, status* st) {
-
- if (st) {
- new (st) status();
- }
-
- out.clear();
-
- utility::inputStreamStringAdapter is(in);
- utility::outputStreamStringAdapter os(out);
-
- convert(is, os, st);
-
- os.flush();
-}
-
-
-shared_ptr <utility::charsetFilteredOutputStream>
- charsetConverter_icu::getFilteredOutputStream(
- utility::outputStream& os,
- const charsetConverterOptions& opts
- ) {
-
- return make_shared <utility::charsetFilteredOutputStream_icu>(m_source, m_dest, &os, opts);
-}
-
-
-
-// charsetFilteredOutputStream_icu
-
-namespace utility {
-
-
-charsetFilteredOutputStream_icu::charsetFilteredOutputStream_icu(
- const charset& source,
- const charset& dest,
- outputStream* os,
- const charsetConverterOptions& opts
-)
- : m_from(NULL),
- m_to(NULL),
- m_sourceCharset(source),
- m_destCharset(dest),
- m_stream(*os),
- m_options(opts) {
-
- UErrorCode err = U_ZERO_ERROR;
- m_from = ucnv_open(source.getName().c_str(), &err);
-
- if (!U_SUCCESS(err)) {
-
- throw exceptions::charset_conv_error(
- "Cannot initialize ICU converter for source charset '" + source.getName()
- + "' (error code: " + u_errorName(err) + "."
- );
- }
-
- m_to = ucnv_open(dest.getName().c_str(), &err);
-
- if (!U_SUCCESS(err)) {
-
- throw exceptions::charset_conv_error(
- "Cannot initialize ICU converter for destination charset '" + dest.getName()
- + "' (error code: " + u_errorName(err) + "."
- );
- }
-
- // Tell ICU what to do when encountering an illegal byte sequence
- if (m_options.silentlyReplaceInvalidSequences) {
-
- // Set replacement chars for when converting from Unicode to codepage
- icu::UnicodeString substString(m_options.invalidSequence.c_str());
- ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);
-
- if (U_FAILURE(err)) {
- throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
- }
-
- } else {
-
- // Tell ICU top stop (and return an error) on illegal byte sequences
- ucnv_setToUCallBack(
- m_to, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
- );
-
- if (U_FAILURE(err)) {
- throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");
- }
-
- ucnv_setFromUCallBack(
- m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
- );
-
- if (U_FAILURE(err)) {
- throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
- }
- }
-}
-
-
-charsetFilteredOutputStream_icu::~charsetFilteredOutputStream_icu() {
-
- if (m_from) ucnv_close(m_from);
- if (m_to) ucnv_close(m_to);
-}
-
-
-outputStream& charsetFilteredOutputStream_icu::getNextOutputStream() {
-
- return m_stream;
-}
-
-
-void charsetFilteredOutputStream_icu::writeImpl(
- const byte_t* const data,
- const size_t count
-) {
-
- if (!m_from || !m_to) {
- throw exceptions::charset_conv_error("Cannot initialize converters.");
- }
-
- // Allocate buffer for Unicode chars
- const size_t uniSize = ucnv_getMinCharSize(m_from) * count * sizeof(UChar);
- std::vector <UChar> uniBuffer(uniSize);
-
- // Conversion loop
- UErrorCode toErr = U_ZERO_ERROR;
-
- const char* uniSource = reinterpret_cast <const char*>(data);
- const char* uniSourceLimit = uniSource + count;
-
- do {
-
- // Convert from source charset to Unicode
- UChar* uniTarget = &uniBuffer[0];
- UChar* uniTargetLimit = &uniBuffer[0] + uniSize;
-
- toErr = U_ZERO_ERROR;
-
- ucnv_toUnicode(
- m_from, &uniTarget, uniTargetLimit,
- &uniSource, uniSourceLimit, NULL, /* flush */ UBool(0), &toErr
- );
-
- if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR) {
-
- if (toErr == U_INVALID_CHAR_FOUND ||
- toErr == U_TRUNCATED_CHAR_FOUND ||
- toErr == U_ILLEGAL_CHAR_FOUND) {
-
- throw exceptions::illegal_byte_sequence_for_charset();
-
- } else {
-
- throw exceptions::charset_conv_error(
- "[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'."
- );
- }
- }
-
- const size_t uniLength = uniTarget - &uniBuffer[0];
-
- // Allocate buffer for destination charset
- const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength;
- std::vector <char> cpBuffer(cpSize);
-
- // Convert from Unicode to destination charset
- UErrorCode fromErr = U_ZERO_ERROR;
-
- const UChar* cpSource = &uniBuffer[0];
- const UChar* cpSourceLimit = &uniBuffer[0] + uniLength;
-
- do {
-
- char* cpTarget = &cpBuffer[0];
- char* cpTargetLimit = &cpBuffer[0] + cpSize;
-
- fromErr = U_ZERO_ERROR;
-
- ucnv_fromUnicode(
- m_to, &cpTarget, cpTargetLimit,
- &cpSource, cpSourceLimit, NULL, /* flush */ FALSE, &fromErr
- );
-
- if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
-
- if (fromErr == U_INVALID_CHAR_FOUND ||
- fromErr == U_TRUNCATED_CHAR_FOUND ||
- fromErr == U_ILLEGAL_CHAR_FOUND) {
-
- throw exceptions::illegal_byte_sequence_for_charset();
-
- } else {
-
- throw exceptions::charset_conv_error(
- "[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'."
- );
- }
- }
-
- const size_t cpLength = cpTarget - &cpBuffer[0];
-
- // Write successfully converted bytes
- m_stream.write(&cpBuffer[0], cpLength);
-
- } while (fromErr == U_BUFFER_OVERFLOW_ERROR);
-
- } while (toErr == U_BUFFER_OVERFLOW_ERROR);
-}
-
-
-void charsetFilteredOutputStream_icu::flush() {
-
- if (!m_from || !m_to) {
- throw exceptions::charset_conv_error("Cannot initialize converters.");
- }
-
- // Allocate buffer for Unicode chars
- const size_t uniSize = ucnv_getMinCharSize(m_from) * 1024 * sizeof(UChar);
- std::vector <UChar> uniBuffer(uniSize);
-
- // Conversion loop (with flushing)
- UErrorCode toErr = U_ZERO_ERROR;
-
- const char* uniSource = 0;
- const char* uniSourceLimit = 0;
-
- do {
-
- // Convert from source charset to Unicode
- UChar* uniTarget = &uniBuffer[0];
- UChar* uniTargetLimit = &uniBuffer[0] + uniSize;
-
- toErr = U_ZERO_ERROR;
-
- ucnv_toUnicode(
- m_from, &uniTarget, uniTargetLimit,
- &uniSource, uniSourceLimit, NULL, /* flush */ UBool(1), &toErr
- );
-
- if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR) {
-
- throw exceptions::charset_conv_error(
- "[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'."
- );
- }
-
- const size_t uniLength = uniTarget - &uniBuffer[0];
-
- // Allocate buffer for destination charset
- const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength;
- std::vector <char> cpBuffer(cpSize);
-
- // Convert from Unicode to destination charset
- UErrorCode fromErr = U_ZERO_ERROR;
-
- const UChar* cpSource = &uniBuffer[0];
- const UChar* cpSourceLimit = &uniBuffer[0] + uniLength;
-
- do {
-
- char* cpTarget = &cpBuffer[0];
- char* cpTargetLimit = &cpBuffer[0] + cpSize;
-
- fromErr = U_ZERO_ERROR;
-
- ucnv_fromUnicode(
- m_to, &cpTarget, cpTargetLimit,
- &cpSource, cpSourceLimit, NULL, /* flush */ UBool(1), &fromErr
- );
-
- if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
-
- throw exceptions::charset_conv_error(
- "[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'."
- );
- }
-
- const size_t cpLength = cpTarget - &cpBuffer[0];
-
- // Write successfully converted bytes
- m_stream.write(&cpBuffer[0], cpLength);
-
- } while (fromErr == U_BUFFER_OVERFLOW_ERROR);
-
- } while (toErr == U_BUFFER_OVERFLOW_ERROR);
-
- m_stream.flush();
-}
-
-
-} // utility
-
-
-} // vmime
-
-
-#endif // VMIME_CHARSETCONV_LIB_IS_ICU
+// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002 Vincent Richard <[email protected]> +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#include "vmime/config.hpp" + + +#if VMIME_CHARSETCONV_LIB_IS_ICU + + +#include "vmime/charsetConverter_icu.hpp" + +#include "vmime/exception.hpp" +#include "vmime/utility/inputStreamStringAdapter.hpp" +#include "vmime/utility/outputStreamStringAdapter.hpp" + + +#ifndef VMIME_BUILDING_DOC + + #include <unicode/ucnv.h> + #include <unicode/ucnv_err.h> + +#endif // VMIME_BUILDING_DOC + + +#include <unicode/unistr.h> + + +namespace vmime { + + +// static +shared_ptr <charsetConverter> charsetConverter::createGenericConverter( + const charset& source, + const charset& dest, + const charsetConverterOptions& opts +) { + + return make_shared <charsetConverter_icu>(source, dest, opts); +} + + +charsetConverter_icu::charsetConverter_icu( + const charset& source, + const charset& dest, + const charsetConverterOptions& opts +) + : m_from(NULL), + m_to(NULL), + m_source(source), + m_dest(dest), + m_options(opts) { + + UErrorCode err = U_ZERO_ERROR; + m_from = ucnv_open(source.getName().c_str(), &err); + + if (!U_SUCCESS(err)) { + + throw exceptions::charset_conv_error( + "Cannot initialize ICU converter for source charset '" + source.getName() + + "' (error code: " + u_errorName(err) + "." + ); + } + + m_to = ucnv_open(dest.getName().c_str(), &err); + + if (!U_SUCCESS(err)) { + + throw exceptions::charset_conv_error( + "Cannot initialize ICU converter for destination charset '" + dest.getName() + + "' (error code: " + u_errorName(err) + "." + ); + } +} + + +charsetConverter_icu::~charsetConverter_icu() { + + if (m_from) ucnv_close(m_from); + if (m_to) ucnv_close(m_to); +} + + +void charsetConverter_icu::convert( + utility::inputStream& in, + utility::outputStream& out, + status* st +) { + + UErrorCode err = U_ZERO_ERROR; + + ucnv_reset(m_from); + ucnv_reset(m_to); + + if (st) { + new (st) status(); + } + + // From buffers + byte_t cpInBuffer[16]; // stream data put here + const size_t outSize = ucnv_getMinCharSize(m_from) * sizeof(cpInBuffer) * sizeof(UChar); + std::vector <UChar> uOutBuffer(outSize); // Unicode chars end up here + + // To buffers + // converted (char) data end up here + const size_t cpOutBufferSz = ucnv_getMaxCharSize(m_to) * outSize; + std::vector <char> cpOutBuffer(cpOutBufferSz); + + // Tell ICU what to do when encountering an illegal byte sequence + if (m_options.silentlyReplaceInvalidSequences) { + + // Set replacement chars for when converting from Unicode to codepage + icu::UnicodeString substString(m_options.invalidSequence.c_str()); + ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err); + + if (U_FAILURE(err)) { + throw exceptions::charset_conv_error("[ICU] Error when setting substitution string."); + } + + } else { + + // Tell ICU top stop (and return an error) on illegal byte sequences + ucnv_setToUCallBack( + m_from, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err + ); + + if (U_FAILURE(err)) { + throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback."); + } + + ucnv_setFromUCallBack( + m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err + ); + + if (U_FAILURE(err)) { + throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback."); + } + } + + // Input data available + while (!in.eof()) { + + // Read input data into buffer + size_t inLength = in.read(cpInBuffer, sizeof(cpInBuffer)); + + // Beginning of read data + const char* source = reinterpret_cast <const char*>(&cpInBuffer[0]); + const char* sourceLimit = source + inLength; // end + 1 + + UBool flush = in.eof(); // is this last run? + + UErrorCode toErr; + + // Loop until all source has been processed + do { + + // Set up target pointers + UChar* target = &uOutBuffer[0]; + UChar* targetLimit = &target[0] + outSize; + + toErr = U_ZERO_ERROR; + + ucnv_toUnicode( + m_from, &target, targetLimit, + &source, sourceLimit, NULL, flush, &toErr + ); + + if (st) { + st->inputBytesRead += (source - reinterpret_cast <const char*>(&cpInBuffer[0])); + } + + if (toErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(toErr)) { + + if (toErr == U_INVALID_CHAR_FOUND || + toErr == U_TRUNCATED_CHAR_FOUND || + toErr == U_ILLEGAL_CHAR_FOUND) { + + // Error will be thrown later (*) + + } else { + + throw exceptions::charset_conv_error( + "[ICU] Error converting to Unicode from " + m_source.getName() + ); + } + } + + // The Unicode source is the buffer just written and the limit + // is where the previous conversion stopped (target is moved in the conversion) + const UChar* uSource = &uOutBuffer[0]; + UChar* uSourceLimit = &target[0]; + UErrorCode fromErr; + + // Loop until converted chars are fully written + do { + + char* cpTarget = &cpOutBuffer[0]; + const char* cpTargetLimit = &cpOutBuffer[0] + cpOutBufferSz; + + fromErr = U_ZERO_ERROR; + + // Write converted bytes (Unicode) to destination codepage + ucnv_fromUnicode( + m_to, &cpTarget, cpTargetLimit, + &uSource, uSourceLimit, NULL, flush, &fromErr + ); + + if (st) { + + // Decrement input bytes count by the number of input bytes in error + char errBytes[16]; + int8_t errBytesLen = sizeof(errBytes); + UErrorCode errBytesErr = U_ZERO_ERROR; + + ucnv_getInvalidChars(m_from, errBytes, &errBytesLen, &errBytesErr); + + st->inputBytesRead -= errBytesLen; + st->outputBytesWritten += cpTarget - &cpOutBuffer[0]; + } + + // (*) If an error occurred while converting from input charset, throw it now + if (toErr == U_INVALID_CHAR_FOUND || + toErr == U_TRUNCATED_CHAR_FOUND || + toErr == U_ILLEGAL_CHAR_FOUND) { + + throw exceptions::illegal_byte_sequence_for_charset(); + } + + if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) { + + if (fromErr == U_INVALID_CHAR_FOUND || + fromErr == U_TRUNCATED_CHAR_FOUND || + fromErr == U_ILLEGAL_CHAR_FOUND) { + + throw exceptions::illegal_byte_sequence_for_charset(); + + } else { + + throw exceptions::charset_conv_error( + "[ICU] Error converting from Unicode to " + m_dest.getName() + ); + } + } + + // Write to destination stream + out.write(&cpOutBuffer[0], (cpTarget - &cpOutBuffer[0])); + + } while (fromErr == U_BUFFER_OVERFLOW_ERROR); + + } while (toErr == U_BUFFER_OVERFLOW_ERROR); + } +} + + +void charsetConverter_icu::convert(const string& in, string& out, status* st) { + + if (st) { + new (st) status(); + } + + out.clear(); + + utility::inputStreamStringAdapter is(in); + utility::outputStreamStringAdapter os(out); + + convert(is, os, st); + + os.flush(); +} + + +shared_ptr <utility::charsetFilteredOutputStream> + charsetConverter_icu::getFilteredOutputStream( + utility::outputStream& os, + const charsetConverterOptions& opts + ) { + + return make_shared <utility::charsetFilteredOutputStream_icu>(m_source, m_dest, &os, opts); +} + + + +// charsetFilteredOutputStream_icu + +namespace utility { + + +charsetFilteredOutputStream_icu::charsetFilteredOutputStream_icu( + const charset& source, + const charset& dest, + outputStream* os, + const charsetConverterOptions& opts +) + : m_from(NULL), + m_to(NULL), + m_sourceCharset(source), + m_destCharset(dest), + m_stream(*os), + m_options(opts) { + + UErrorCode err = U_ZERO_ERROR; + m_from = ucnv_open(source.getName().c_str(), &err); + + if (!U_SUCCESS(err)) { + + throw exceptions::charset_conv_error( + "Cannot initialize ICU converter for source charset '" + source.getName() + + "' (error code: " + u_errorName(err) + "." + ); + } + + m_to = ucnv_open(dest.getName().c_str(), &err); + + if (!U_SUCCESS(err)) { + + throw exceptions::charset_conv_error( + "Cannot initialize ICU converter for destination charset '" + dest.getName() + + "' (error code: " + u_errorName(err) + "." + ); + } + + // Tell ICU what to do when encountering an illegal byte sequence + if (m_options.silentlyReplaceInvalidSequences) { + + // Set replacement chars for when converting from Unicode to codepage + icu::UnicodeString substString(m_options.invalidSequence.c_str()); + ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err); + + if (U_FAILURE(err)) { + throw exceptions::charset_conv_error("[ICU] Error when setting substitution string."); + } + + } else { + + // Tell ICU top stop (and return an error) on illegal byte sequences + ucnv_setToUCallBack( + m_to, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err + ); + + if (U_FAILURE(err)) { + throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback."); + } + + ucnv_setFromUCallBack( + m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err + ); + + if (U_FAILURE(err)) { + throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback."); + } + } +} + + +charsetFilteredOutputStream_icu::~charsetFilteredOutputStream_icu() { + + if (m_from) ucnv_close(m_from); + if (m_to) ucnv_close(m_to); +} + + +outputStream& charsetFilteredOutputStream_icu::getNextOutputStream() { + + return m_stream; +} + + +void charsetFilteredOutputStream_icu::writeImpl( + const byte_t* const data, + const size_t count +) { + + if (!m_from || !m_to) { + throw exceptions::charset_conv_error("Cannot initialize converters."); + } + + // Allocate buffer for Unicode chars + const size_t uniSize = ucnv_getMinCharSize(m_from) * count * sizeof(UChar); + std::vector <UChar> uniBuffer(uniSize); + + // Conversion loop + UErrorCode toErr = U_ZERO_ERROR; + + const char* uniSource = reinterpret_cast <const char*>(data); + const char* uniSourceLimit = uniSource + count; + + do { + + // Convert from source charset to Unicode + UChar* uniTarget = &uniBuffer[0]; + UChar* uniTargetLimit = &uniBuffer[0] + uniSize; + + toErr = U_ZERO_ERROR; + + ucnv_toUnicode( + m_from, &uniTarget, uniTargetLimit, + &uniSource, uniSourceLimit, NULL, /* flush */ UBool(0), &toErr + ); + + if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR) { + + if (toErr == U_INVALID_CHAR_FOUND || + toErr == U_TRUNCATED_CHAR_FOUND || + toErr == U_ILLEGAL_CHAR_FOUND) { + + throw exceptions::illegal_byte_sequence_for_charset(); + + } else { + + throw exceptions::charset_conv_error( + "[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'." + ); + } + } + + const size_t uniLength = uniTarget - &uniBuffer[0]; + + // Allocate buffer for destination charset + const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength; + std::vector <char> cpBuffer(cpSize); + + // Convert from Unicode to destination charset + UErrorCode fromErr = U_ZERO_ERROR; + + const UChar* cpSource = &uniBuffer[0]; + const UChar* cpSourceLimit = &uniBuffer[0] + uniLength; + + do { + + char* cpTarget = &cpBuffer[0]; + char* cpTargetLimit = &cpBuffer[0] + cpSize; + + fromErr = U_ZERO_ERROR; + + ucnv_fromUnicode( + m_to, &cpTarget, cpTargetLimit, + &cpSource, cpSourceLimit, NULL, /* flush */ FALSE, &fromErr + ); + + if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) { + + if (fromErr == U_INVALID_CHAR_FOUND || + fromErr == U_TRUNCATED_CHAR_FOUND || + fromErr == U_ILLEGAL_CHAR_FOUND) { + + throw exceptions::illegal_byte_sequence_for_charset(); + + } else { + + throw exceptions::charset_conv_error( + "[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'." + ); + } + } + + const size_t cpLength = cpTarget - &cpBuffer[0]; + + // Write successfully converted bytes + m_stream.write(&cpBuffer[0], cpLength); + + } while (fromErr == U_BUFFER_OVERFLOW_ERROR); + + } while (toErr == U_BUFFER_OVERFLOW_ERROR); +} + + +void charsetFilteredOutputStream_icu::flush() { + + if (!m_from || !m_to) { + throw exceptions::charset_conv_error("Cannot initialize converters."); + } + + // Allocate buffer for Unicode chars + const size_t uniSize = ucnv_getMinCharSize(m_from) * 1024 * sizeof(UChar); + std::vector <UChar> uniBuffer(uniSize); + + // Conversion loop (with flushing) + UErrorCode toErr = U_ZERO_ERROR; + + const char* uniSource = 0; + const char* uniSourceLimit = 0; + + do { + + // Convert from source charset to Unicode + UChar* uniTarget = &uniBuffer[0]; + UChar* uniTargetLimit = &uniBuffer[0] + uniSize; + + toErr = U_ZERO_ERROR; + + ucnv_toUnicode( + m_from, &uniTarget, uniTargetLimit, + &uniSource, uniSourceLimit, NULL, /* flush */ UBool(1), &toErr + ); + + if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR) { + + throw exceptions::charset_conv_error( + "[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'." + ); + } + + const size_t uniLength = uniTarget - &uniBuffer[0]; + + // Allocate buffer for destination charset + const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength; + std::vector <char> cpBuffer(cpSize); + + // Convert from Unicode to destination charset + UErrorCode fromErr = U_ZERO_ERROR; + + const UChar* cpSource = &uniBuffer[0]; + const UChar* cpSourceLimit = &uniBuffer[0] + uniLength; + + do { + + char* cpTarget = &cpBuffer[0]; + char* cpTargetLimit = &cpBuffer[0] + cpSize; + + fromErr = U_ZERO_ERROR; + + ucnv_fromUnicode( + m_to, &cpTarget, cpTargetLimit, + &cpSource, cpSourceLimit, NULL, /* flush */ UBool(1), &fromErr + ); + + if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) { + + throw exceptions::charset_conv_error( + "[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'." + ); + } + + const size_t cpLength = cpTarget - &cpBuffer[0]; + + // Write successfully converted bytes + m_stream.write(&cpBuffer[0], cpLength); + + } while (fromErr == U_BUFFER_OVERFLOW_ERROR); + + } while (toErr == U_BUFFER_OVERFLOW_ERROR); + + m_stream.flush(); +} + + +} // utility + + +} // vmime + + +#endif // VMIME_CHARSETCONV_LIB_IS_ICU diff --git a/src/vmime/charsetConverter_icu.hpp b/src/vmime/charsetConverter_icu.hpp index cf5eb6bc..742999f0 100644 --- a/src/vmime/charsetConverter_icu.hpp +++ b/src/vmime/charsetConverter_icu.hpp @@ -1,137 +1,137 @@ -//
-// VMime library (http://www.vmime.org)
-// Copyright (C) 2002 Vincent Richard <[email protected]>
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 3 of
-// the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with this program; if not, write to the Free Software Foundation, Inc.,
-// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-//
-// Linking this library statically or dynamically with other modules is making
-// a combined work based on this library. Thus, the terms and conditions of
-// the GNU General Public License cover the whole combination.
-//
-
-#ifndef VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
-#define VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
-
-
-#include "vmime/config.hpp"
-
-
-#if VMIME_CHARSETCONV_LIB_IS_ICU
-
-
-#include "vmime/charsetConverter.hpp"
-
-
-struct UConverter;
-
-
-namespace vmime {
-
-
-/** A generic charset converter which uses ICU library.
- */
-class charsetConverter_icu : public charsetConverter {
-
-public:
-
- /** Construct and initialize an ICU charset converter.
- *
- * @param source input charset
- * @param dest output charset
- * @param opts conversion options
- */
- charsetConverter_icu(
- const charset& source,
- const charset& dest,
- const charsetConverterOptions& opts = charsetConverterOptions()
- );
-
- ~charsetConverter_icu();
-
- void convert(const string& in, string& out, status* st = NULL);
- void convert(utility::inputStream& in, utility::outputStream& out, status* st = NULL);
-
- shared_ptr <utility::charsetFilteredOutputStream> getFilteredOutputStream(
- utility::outputStream& os,
- const charsetConverterOptions& opts = charsetConverterOptions()
- );
-
-private:
-
- UConverter* m_from;
- UConverter* m_to;
-
- charset m_source;
- charset m_dest;
-
- charsetConverterOptions m_options;
-};
-
-
-namespace utility {
-
-
-class charsetFilteredOutputStream_icu : public charsetFilteredOutputStream {
-
-public:
-
- /** Construct a new filter for the specified output stream.
- *
- * @param source input charset
- * @param dest output charset
- * @param os stream into which write filtered data
- * @param opts conversion options
- */
- charsetFilteredOutputStream_icu(
- const charset& source,
- const charset& dest,
- outputStream* os,
- const charsetConverterOptions& opts = charsetConverterOptions()
- );
-
- ~charsetFilteredOutputStream_icu();
-
-
- outputStream& getNextOutputStream();
-
- void flush();
-
-protected:
-
- void writeImpl(const byte_t* const data, const size_t count);
-
-private:
-
- UConverter* m_from;
- UConverter* m_to;
-
- const charset m_sourceCharset;
- const charset m_destCharset;
-
- outputStream& m_stream;
-
- charsetConverterOptions m_options;
-};
-
-
-} // utility
-
-
-} // vmime
-
-
-#endif // VMIME_CHARSETCONV_LIB_IS_ICU
-
-#endif // VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
+// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002 Vincent Richard <[email protected]> +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#ifndef VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED +#define VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED + + +#include "vmime/config.hpp" + + +#if VMIME_CHARSETCONV_LIB_IS_ICU + + +#include "vmime/charsetConverter.hpp" + + +struct UConverter; + + +namespace vmime { + + +/** A generic charset converter which uses ICU library. + */ +class charsetConverter_icu : public charsetConverter { + +public: + + /** Construct and initialize an ICU charset converter. + * + * @param source input charset + * @param dest output charset + * @param opts conversion options + */ + charsetConverter_icu( + const charset& source, + const charset& dest, + const charsetConverterOptions& opts = charsetConverterOptions() + ); + + ~charsetConverter_icu(); + + void convert(const string& in, string& out, status* st = NULL); + void convert(utility::inputStream& in, utility::outputStream& out, status* st = NULL); + + shared_ptr <utility::charsetFilteredOutputStream> getFilteredOutputStream( + utility::outputStream& os, + const charsetConverterOptions& opts = charsetConverterOptions() + ); + +private: + + UConverter* m_from; + UConverter* m_to; + + charset m_source; + charset m_dest; + + charsetConverterOptions m_options; +}; + + +namespace utility { + + +class charsetFilteredOutputStream_icu : public charsetFilteredOutputStream { + +public: + + /** Construct a new filter for the specified output stream. + * + * @param source input charset + * @param dest output charset + * @param os stream into which write filtered data + * @param opts conversion options + */ + charsetFilteredOutputStream_icu( + const charset& source, + const charset& dest, + outputStream* os, + const charsetConverterOptions& opts = charsetConverterOptions() + ); + + ~charsetFilteredOutputStream_icu(); + + + outputStream& getNextOutputStream(); + + void flush(); + +protected: + + void writeImpl(const byte_t* const data, const size_t count); + +private: + + UConverter* m_from; + UConverter* m_to; + + const charset m_sourceCharset; + const charset m_destCharset; + + outputStream& m_stream; + + charsetConverterOptions m_options; +}; + + +} // utility + + +} // vmime + + +#endif // VMIME_CHARSETCONV_LIB_IS_ICU + +#endif // VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED |