Fixed line endings.

author: Vincent Richard <[email protected]> 2021-01-11 21:06:42 +0000
committer: Vincent Richard <[email protected]> 2021-01-11 21:06:42 +0000
commit: 817358854d6a1ffea3dc213263f45aae860a19d7 (patch)
tree: 3fe5e631fd18f66df5c49336d0cb7ac9981dba31
parent: #247 Fixed build with ICU 68.2-1 (diff)
download: vmime-817358854d6a1ffea3dc213263f45aae860a19d7.tar.gz
vmime-817358854d6a1ffea3dc213263f45aae860a19d7.zip
2 files changed, 709 insertions, 709 deletions
diff --git a/src/vmime/charsetConverter_icu.cpp b/src/vmime/charsetConverter_icu.cpp
index 5779cd90..cc74be98 100644
--- a/src/vmime/charsetConverter_icu.cpp
+++ b/src/vmime/charsetConverter_icu.cpp
@@ -1,572 +1,572 @@
-//
-// VMime library (http://www.vmime.org)
-// Copyright (C) 2002 Vincent Richard <[email protected]>
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 3 of
-// the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with this program; if not, write to the Free Software Foundation, Inc.,
-// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-//
-// Linking this library statically or dynamically with other modules is making
-// a combined work based on this library.  Thus, the terms and conditions of
-// the GNU General Public License cover the whole combination.
-//
-
-#include "vmime/config.hpp"
-
-
-#if VMIME_CHARSETCONV_LIB_IS_ICU
-
-
-#include "vmime/charsetConverter_icu.hpp"
-
-#include "vmime/exception.hpp"
-#include "vmime/utility/inputStreamStringAdapter.hpp"
-#include "vmime/utility/outputStreamStringAdapter.hpp"
-
-
-#ifndef VMIME_BUILDING_DOC
-
-	#include <unicode/ucnv.h>
-	#include <unicode/ucnv_err.h>
-
-#endif // VMIME_BUILDING_DOC
-
-
-#include <unicode/unistr.h>
-
-
-namespace vmime {
-
-
-// static
-shared_ptr <charsetConverter> charsetConverter::createGenericConverter(
-	const charset& source,
-	const charset& dest,
-	const charsetConverterOptions& opts
-) {
-
-	return make_shared <charsetConverter_icu>(source, dest, opts);
-}
-
-
-charsetConverter_icu::charsetConverter_icu(
-	const charset& source,
-	const charset& dest,
-	const charsetConverterOptions& opts
-)
-	: m_from(NULL),
-	  m_to(NULL),
-	  m_source(source),
-	  m_dest(dest),
-	  m_options(opts) {
-
-	UErrorCode err = U_ZERO_ERROR;
-	m_from = ucnv_open(source.getName().c_str(), &err);
-
-	if (!U_SUCCESS(err)) {
-
-		throw exceptions::charset_conv_error(
-			"Cannot initialize ICU converter for source charset '" + source.getName()
-			+ "' (error code: " + u_errorName(err) + "."
-		);
-	}
-
-	m_to = ucnv_open(dest.getName().c_str(), &err);
-
-	if (!U_SUCCESS(err)) {
-
-		throw exceptions::charset_conv_error(
-			"Cannot initialize ICU converter for destination charset '" + dest.getName()
-			+ "' (error code: " + u_errorName(err) + "."
-		);
-	}
-}
-
-
-charsetConverter_icu::~charsetConverter_icu() {
-
-	if (m_from) ucnv_close(m_from);
-	if (m_to) ucnv_close(m_to);
-}
-
-
-void charsetConverter_icu::convert(
-	utility::inputStream& in,
-	utility::outputStream& out,
-	status* st
-) {
-
-	UErrorCode err = U_ZERO_ERROR;
-
-	ucnv_reset(m_from);
-	ucnv_reset(m_to);
-
-	if (st) {
-		new (st) status();
-	}
-
-	// From buffers
-	byte_t cpInBuffer[16]; // stream data put here
-	const size_t outSize = ucnv_getMinCharSize(m_from) * sizeof(cpInBuffer) * sizeof(UChar);
-	std::vector <UChar> uOutBuffer(outSize); // Unicode chars end up here
-
-	// To buffers
-	// converted (char) data end up here
-	const size_t cpOutBufferSz = ucnv_getMaxCharSize(m_to) * outSize;
-	std::vector <char> cpOutBuffer(cpOutBufferSz);
-
-	// Tell ICU what to do when encountering an illegal byte sequence
-	if (m_options.silentlyReplaceInvalidSequences) {
-
-		// Set replacement chars for when converting from Unicode to codepage
-		icu::UnicodeString substString(m_options.invalidSequence.c_str());
-		ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);
-
-		if (U_FAILURE(err)) {
-			throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
-		}
-
-	} else {
-
-		// Tell ICU top stop (and return an error) on illegal byte sequences
-		ucnv_setToUCallBack(
-			m_from, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
-		);
-
-		if (U_FAILURE(err)) {
-			throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");
-		}
-
-		ucnv_setFromUCallBack(
-			m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
-		);
-
-		if (U_FAILURE(err)) {
-			throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
-		}
-	}
-
-	// Input data available
-	while (!in.eof()) {
-
-		// Read input data into buffer
-		size_t inLength = in.read(cpInBuffer, sizeof(cpInBuffer));
-
-		// Beginning of read data
-		const char* source = reinterpret_cast <const char*>(&cpInBuffer[0]);
-		const char* sourceLimit = source + inLength; // end + 1
-
-		UBool flush = in.eof();  // is this last run?
-
-		UErrorCode toErr;
-
-		// Loop until all source has been processed
-		do {
-
-			// Set up target pointers
-			UChar* target = &uOutBuffer[0];
-			UChar* targetLimit = &target[0] + outSize;
-
-			toErr = U_ZERO_ERROR;
-
-			ucnv_toUnicode(
-				m_from, &target, targetLimit,
-				&source, sourceLimit, NULL, flush, &toErr
-			);
-
-			if (st) {
-				st->inputBytesRead += (source - reinterpret_cast <const char*>(&cpInBuffer[0]));
-			}
-
-			if (toErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(toErr)) {
-
-				if (toErr == U_INVALID_CHAR_FOUND ||
-				    toErr == U_TRUNCATED_CHAR_FOUND ||
-				    toErr == U_ILLEGAL_CHAR_FOUND) {
-
-					// Error will be thrown later (*)
-
-				} else {
-
-					throw exceptions::charset_conv_error(
-						"[ICU] Error converting to Unicode from " + m_source.getName()
-					);
-				}
-			}
-
-			// The Unicode source is the buffer just written and the limit
-			// is where the previous conversion stopped (target is moved in the conversion)
-			const UChar* uSource = &uOutBuffer[0];
-			UChar* uSourceLimit = &target[0];
-			UErrorCode fromErr;
-
-			// Loop until converted chars are fully written
-			do {
-
-				char* cpTarget = &cpOutBuffer[0];
-				const char* cpTargetLimit = &cpOutBuffer[0] + cpOutBufferSz;
-
-				fromErr = U_ZERO_ERROR;
-
-				// Write converted bytes (Unicode) to destination codepage
-				ucnv_fromUnicode(
-					m_to, &cpTarget, cpTargetLimit,
-					&uSource, uSourceLimit, NULL, flush, &fromErr
-				);
-
-				if (st) {
-
-					// Decrement input bytes count by the number of input bytes in error
-					char errBytes[16];
-					int8_t errBytesLen = sizeof(errBytes);
-					UErrorCode errBytesErr = U_ZERO_ERROR;
-
-	 				ucnv_getInvalidChars(m_from, errBytes, &errBytesLen, &errBytesErr);
-
-					st->inputBytesRead -= errBytesLen;
-					st->outputBytesWritten += cpTarget - &cpOutBuffer[0];
-				}
-
-				// (*) If an error occurred while converting from input charset, throw it now
-				if (toErr == U_INVALID_CHAR_FOUND ||
-				    toErr == U_TRUNCATED_CHAR_FOUND ||
-				    toErr == U_ILLEGAL_CHAR_FOUND) {
-
-					throw exceptions::illegal_byte_sequence_for_charset();
-				}
-
-				if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
-
-					if (fromErr == U_INVALID_CHAR_FOUND ||
-					    fromErr == U_TRUNCATED_CHAR_FOUND ||
-					    fromErr == U_ILLEGAL_CHAR_FOUND) {
-
-						throw exceptions::illegal_byte_sequence_for_charset();
-
-					} else {
-
-						throw exceptions::charset_conv_error(
-							"[ICU] Error converting from Unicode to " + m_dest.getName()
-						);
-					}
-				}
-
-				// Write to destination stream
-				out.write(&cpOutBuffer[0], (cpTarget - &cpOutBuffer[0]));
-
-			} while (fromErr == U_BUFFER_OVERFLOW_ERROR);
-
-		} while (toErr == U_BUFFER_OVERFLOW_ERROR);
-	}
-}
-
-
-void charsetConverter_icu::convert(const string& in, string& out, status* st) {
-
-	if (st) {
-		new (st) status();
-	}
-
-	out.clear();
-
-	utility::inputStreamStringAdapter is(in);
-	utility::outputStreamStringAdapter os(out);
-
-	convert(is, os, st);
-
-	os.flush();
-}
-
-
-shared_ptr <utility::charsetFilteredOutputStream>
-	charsetConverter_icu::getFilteredOutputStream(
-		utility::outputStream& os,
-		const charsetConverterOptions& opts
-	) {
-
-	return make_shared <utility::charsetFilteredOutputStream_icu>(m_source, m_dest, &os, opts);
-}
-
-
-
-// charsetFilteredOutputStream_icu
-
-namespace utility {
-
-
-charsetFilteredOutputStream_icu::charsetFilteredOutputStream_icu(
-	const charset& source,
-	const charset& dest,
-	outputStream* os,
-	const charsetConverterOptions& opts
-)
-	: m_from(NULL),
-	  m_to(NULL),
-	  m_sourceCharset(source),
-	  m_destCharset(dest),
-	  m_stream(*os),
-	  m_options(opts) {
-
-	UErrorCode err = U_ZERO_ERROR;
-	m_from = ucnv_open(source.getName().c_str(), &err);
-
-	if (!U_SUCCESS(err)) {
-
-		throw exceptions::charset_conv_error(
-			"Cannot initialize ICU converter for source charset '" + source.getName()
-			+ "' (error code: " + u_errorName(err) + "."
-		);
-	}
-
-	m_to = ucnv_open(dest.getName().c_str(), &err);
-
-	if (!U_SUCCESS(err)) {
-
-		throw exceptions::charset_conv_error(
-			"Cannot initialize ICU converter for destination charset '" + dest.getName()
-			+ "' (error code: " + u_errorName(err) + "."
-		);
-	}
-
-	// Tell ICU what to do when encountering an illegal byte sequence
-	if (m_options.silentlyReplaceInvalidSequences) {
-
-		// Set replacement chars for when converting from Unicode to codepage
-		icu::UnicodeString substString(m_options.invalidSequence.c_str());
-		ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);
-
-		if (U_FAILURE(err)) {
-			throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
-		}
-
-	} else {
-
-		// Tell ICU top stop (and return an error) on illegal byte sequences
-		ucnv_setToUCallBack(
-			m_to, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
-		);
-
-		if (U_FAILURE(err)) {
-			throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");
-		}
-
-		ucnv_setFromUCallBack(
-			m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
-		);
-
-		if (U_FAILURE(err)) {
-			throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
-		}
-	}
-}
-
-
-charsetFilteredOutputStream_icu::~charsetFilteredOutputStream_icu() {
-
-	if (m_from) ucnv_close(m_from);
-	if (m_to) ucnv_close(m_to);
-}
-
-
-outputStream& charsetFilteredOutputStream_icu::getNextOutputStream() {
-
-	return m_stream;
-}
-
-
-void charsetFilteredOutputStream_icu::writeImpl(
-	const byte_t* const data,
-	const size_t count
-) {
-
-	if (!m_from || !m_to) {
-		throw exceptions::charset_conv_error("Cannot initialize converters.");
-	}
-
-	// Allocate buffer for Unicode chars
-	const size_t uniSize = ucnv_getMinCharSize(m_from) * count * sizeof(UChar);
-	std::vector <UChar> uniBuffer(uniSize);
-
-	// Conversion loop
-	UErrorCode toErr = U_ZERO_ERROR;
-
-	const char* uniSource = reinterpret_cast <const char*>(data);
-	const char* uniSourceLimit = uniSource + count;
-
-	do {
-
-		// Convert from source charset to Unicode
-		UChar* uniTarget = &uniBuffer[0];
-		UChar* uniTargetLimit = &uniBuffer[0] + uniSize;
-
-		toErr = U_ZERO_ERROR;
-
-		ucnv_toUnicode(
-			m_from, &uniTarget, uniTargetLimit,
-			&uniSource, uniSourceLimit, NULL, /* flush */ UBool(0), &toErr
-		);
-
-		if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR) {
-
-			if (toErr == U_INVALID_CHAR_FOUND ||
-			    toErr == U_TRUNCATED_CHAR_FOUND ||
-			    toErr == U_ILLEGAL_CHAR_FOUND) {
-
-				throw exceptions::illegal_byte_sequence_for_charset();
-
-			} else {
-
-				throw exceptions::charset_conv_error(
-					"[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'."
-				);
-			}
-		}
-
-		const size_t uniLength = uniTarget - &uniBuffer[0];
-
-		// Allocate buffer for destination charset
-		const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength;
-		std::vector <char> cpBuffer(cpSize);
-
-		// Convert from Unicode to destination charset
-		UErrorCode fromErr = U_ZERO_ERROR;
-
-		const UChar* cpSource = &uniBuffer[0];
-		const UChar* cpSourceLimit = &uniBuffer[0] + uniLength;
-
-		do {
-
-			char* cpTarget = &cpBuffer[0];
-			char* cpTargetLimit = &cpBuffer[0] + cpSize;
-
-			fromErr = U_ZERO_ERROR;
-
-			ucnv_fromUnicode(
-				m_to, &cpTarget, cpTargetLimit,
-				&cpSource, cpSourceLimit, NULL, /* flush */ FALSE, &fromErr
-			);
-
-			if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
-
-				if (fromErr == U_INVALID_CHAR_FOUND ||
-				    fromErr == U_TRUNCATED_CHAR_FOUND ||
-				    fromErr == U_ILLEGAL_CHAR_FOUND) {
-
-					throw exceptions::illegal_byte_sequence_for_charset();
-
-				} else {
-
-					throw exceptions::charset_conv_error(
-						"[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'."
-					);
-				}
-			}
-
-			const size_t cpLength = cpTarget - &cpBuffer[0];
-
-			// Write successfully converted bytes
-			m_stream.write(&cpBuffer[0], cpLength);
-
-		} while (fromErr == U_BUFFER_OVERFLOW_ERROR);
-
-	} while (toErr == U_BUFFER_OVERFLOW_ERROR);
-}
-
-
-void charsetFilteredOutputStream_icu::flush() {
-
-	if (!m_from || !m_to) {
-		throw exceptions::charset_conv_error("Cannot initialize converters.");
-	}
-
-	// Allocate buffer for Unicode chars
-	const size_t uniSize = ucnv_getMinCharSize(m_from) * 1024 * sizeof(UChar);
-	std::vector <UChar> uniBuffer(uniSize);
-
-	// Conversion loop (with flushing)
-	UErrorCode toErr = U_ZERO_ERROR;
-
-	const char* uniSource = 0;
-	const char* uniSourceLimit = 0;
-
-	do {
-
-		// Convert from source charset to Unicode
-		UChar* uniTarget = &uniBuffer[0];
-		UChar* uniTargetLimit = &uniBuffer[0] + uniSize;
-
-		toErr = U_ZERO_ERROR;
-
-		ucnv_toUnicode(
-			m_from, &uniTarget, uniTargetLimit,
-			&uniSource, uniSourceLimit, NULL, /* flush */ UBool(1), &toErr
-		);
-
-		if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR) {
-
-			throw exceptions::charset_conv_error(
-				"[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'."
-			);
-		}
-
-		const size_t uniLength = uniTarget - &uniBuffer[0];
-
-		// Allocate buffer for destination charset
-		const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength;
-		std::vector <char> cpBuffer(cpSize);
-
-		// Convert from Unicode to destination charset
-		UErrorCode fromErr = U_ZERO_ERROR;
-
-		const UChar* cpSource = &uniBuffer[0];
-		const UChar* cpSourceLimit = &uniBuffer[0] + uniLength;
-
-		do {
-
-			char* cpTarget = &cpBuffer[0];
-			char* cpTargetLimit = &cpBuffer[0] + cpSize;
-
-			fromErr = U_ZERO_ERROR;
-
-			ucnv_fromUnicode(
-				m_to, &cpTarget, cpTargetLimit,
-				&cpSource, cpSourceLimit, NULL, /* flush */ UBool(1), &fromErr
-			);
-
-			if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
-
-				throw exceptions::charset_conv_error(
-					"[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'."
-				);
-			}
-
-			const size_t cpLength = cpTarget - &cpBuffer[0];
-
-			// Write successfully converted bytes
-			m_stream.write(&cpBuffer[0], cpLength);
-
-		} while (fromErr == U_BUFFER_OVERFLOW_ERROR);
-
-	} while (toErr == U_BUFFER_OVERFLOW_ERROR);
-
-	m_stream.flush();
-}
-
-
-} // utility
-
-
-} // vmime
-
-
-#endif // VMIME_CHARSETCONV_LIB_IS_ICU
+//
+// VMime library (http://www.vmime.org)
+// Copyright (C) 2002 Vincent Richard <[email protected]>
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 3 of
+// the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Linking this library statically or dynamically with other modules is making
+// a combined work based on this library.  Thus, the terms and conditions of
+// the GNU General Public License cover the whole combination.
+//
+
+#include "vmime/config.hpp"
+
+
+#if VMIME_CHARSETCONV_LIB_IS_ICU
+
+
+#include "vmime/charsetConverter_icu.hpp"
+
+#include "vmime/exception.hpp"
+#include "vmime/utility/inputStreamStringAdapter.hpp"
+#include "vmime/utility/outputStreamStringAdapter.hpp"
+
+
+#ifndef VMIME_BUILDING_DOC
+
+	#include <unicode/ucnv.h>
+	#include <unicode/ucnv_err.h>
+
+#endif // VMIME_BUILDING_DOC
+
+
+#include <unicode/unistr.h>
+
+
+namespace vmime {
+
+
+// static
+shared_ptr <charsetConverter> charsetConverter::createGenericConverter(
+	const charset& source,
+	const charset& dest,
+	const charsetConverterOptions& opts
+) {
+
+	return make_shared <charsetConverter_icu>(source, dest, opts);
+}
+
+
+charsetConverter_icu::charsetConverter_icu(
+	const charset& source,
+	const charset& dest,
+	const charsetConverterOptions& opts
+)
+	: m_from(NULL),
+	  m_to(NULL),
+	  m_source(source),
+	  m_dest(dest),
+	  m_options(opts) {
+
+	UErrorCode err = U_ZERO_ERROR;
+	m_from = ucnv_open(source.getName().c_str(), &err);
+
+	if (!U_SUCCESS(err)) {
+
+		throw exceptions::charset_conv_error(
+			"Cannot initialize ICU converter for source charset '" + source.getName()
+			+ "' (error code: " + u_errorName(err) + "."
+		);
+	}
+
+	m_to = ucnv_open(dest.getName().c_str(), &err);
+
+	if (!U_SUCCESS(err)) {
+
+		throw exceptions::charset_conv_error(
+			"Cannot initialize ICU converter for destination charset '" + dest.getName()
+			+ "' (error code: " + u_errorName(err) + "."
+		);
+	}
+}
+
+
+charsetConverter_icu::~charsetConverter_icu() {
+
+	if (m_from) ucnv_close(m_from);
+	if (m_to) ucnv_close(m_to);
+}
+
+
+void charsetConverter_icu::convert(
+	utility::inputStream& in,
+	utility::outputStream& out,
+	status* st
+) {
+
+	UErrorCode err = U_ZERO_ERROR;
+
+	ucnv_reset(m_from);
+	ucnv_reset(m_to);
+
+	if (st) {
+		new (st) status();
+	}
+
+	// From buffers
+	byte_t cpInBuffer[16]; // stream data put here
+	const size_t outSize = ucnv_getMinCharSize(m_from) * sizeof(cpInBuffer) * sizeof(UChar);
+	std::vector <UChar> uOutBuffer(outSize); // Unicode chars end up here
+
+	// To buffers
+	// converted (char) data end up here
+	const size_t cpOutBufferSz = ucnv_getMaxCharSize(m_to) * outSize;
+	std::vector <char> cpOutBuffer(cpOutBufferSz);
+
+	// Tell ICU what to do when encountering an illegal byte sequence
+	if (m_options.silentlyReplaceInvalidSequences) {
+
+		// Set replacement chars for when converting from Unicode to codepage
+		icu::UnicodeString substString(m_options.invalidSequence.c_str());
+		ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);
+
+		if (U_FAILURE(err)) {
+			throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
+		}
+
+	} else {
+
+		// Tell ICU top stop (and return an error) on illegal byte sequences
+		ucnv_setToUCallBack(
+			m_from, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
+		);
+
+		if (U_FAILURE(err)) {
+			throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");
+		}
+
+		ucnv_setFromUCallBack(
+			m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
+		);
+
+		if (U_FAILURE(err)) {
+			throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
+		}
+	}
+
+	// Input data available
+	while (!in.eof()) {
+
+		// Read input data into buffer
+		size_t inLength = in.read(cpInBuffer, sizeof(cpInBuffer));
+
+		// Beginning of read data
+		const char* source = reinterpret_cast <const char*>(&cpInBuffer[0]);
+		const char* sourceLimit = source + inLength; // end + 1
+
+		UBool flush = in.eof();  // is this last run?
+
+		UErrorCode toErr;
+
+		// Loop until all source has been processed
+		do {
+
+			// Set up target pointers
+			UChar* target = &uOutBuffer[0];
+			UChar* targetLimit = &target[0] + outSize;
+
+			toErr = U_ZERO_ERROR;
+
+			ucnv_toUnicode(
+				m_from, &target, targetLimit,
+				&source, sourceLimit, NULL, flush, &toErr
+			);
+
+			if (st) {
+				st->inputBytesRead += (source - reinterpret_cast <const char*>(&cpInBuffer[0]));
+			}
+
+			if (toErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(toErr)) {
+
+				if (toErr == U_INVALID_CHAR_FOUND ||
+				    toErr == U_TRUNCATED_CHAR_FOUND ||
+				    toErr == U_ILLEGAL_CHAR_FOUND) {
+
+					// Error will be thrown later (*)
+
+				} else {
+
+					throw exceptions::charset_conv_error(
+						"[ICU] Error converting to Unicode from " + m_source.getName()
+					);
+				}
+			}
+
+			// The Unicode source is the buffer just written and the limit
+			// is where the previous conversion stopped (target is moved in the conversion)
+			const UChar* uSource = &uOutBuffer[0];
+			UChar* uSourceLimit = &target[0];
+			UErrorCode fromErr;
+
+			// Loop until converted chars are fully written
+			do {
+
+				char* cpTarget = &cpOutBuffer[0];
+				const char* cpTargetLimit = &cpOutBuffer[0] + cpOutBufferSz;
+
+				fromErr = U_ZERO_ERROR;
+
+				// Write converted bytes (Unicode) to destination codepage
+				ucnv_fromUnicode(
+					m_to, &cpTarget, cpTargetLimit,
+					&uSource, uSourceLimit, NULL, flush, &fromErr
+				);
+
+				if (st) {
+
+					// Decrement input bytes count by the number of input bytes in error
+					char errBytes[16];
+					int8_t errBytesLen = sizeof(errBytes);
+					UErrorCode errBytesErr = U_ZERO_ERROR;
+
+	 				ucnv_getInvalidChars(m_from, errBytes, &errBytesLen, &errBytesErr);
+
+					st->inputBytesRead -= errBytesLen;
+					st->outputBytesWritten += cpTarget - &cpOutBuffer[0];
+				}
+
+				// (*) If an error occurred while converting from input charset, throw it now
+				if (toErr == U_INVALID_CHAR_FOUND ||
+				    toErr == U_TRUNCATED_CHAR_FOUND ||
+				    toErr == U_ILLEGAL_CHAR_FOUND) {
+
+					throw exceptions::illegal_byte_sequence_for_charset();
+				}
+
+				if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
+
+					if (fromErr == U_INVALID_CHAR_FOUND ||
+					    fromErr == U_TRUNCATED_CHAR_FOUND ||
+					    fromErr == U_ILLEGAL_CHAR_FOUND) {
+
+						throw exceptions::illegal_byte_sequence_for_charset();
+
+					} else {
+
+						throw exceptions::charset_conv_error(
+							"[ICU] Error converting from Unicode to " + m_dest.getName()
+						);
+					}
+				}
+
+				// Write to destination stream
+				out.write(&cpOutBuffer[0], (cpTarget - &cpOutBuffer[0]));
+
+			} while (fromErr == U_BUFFER_OVERFLOW_ERROR);
+
+		} while (toErr == U_BUFFER_OVERFLOW_ERROR);
+	}
+}
+
+
+void charsetConverter_icu::convert(const string& in, string& out, status* st) {
+
+	if (st) {
+		new (st) status();
+	}
+
+	out.clear();
+
+	utility::inputStreamStringAdapter is(in);
+	utility::outputStreamStringAdapter os(out);
+
+	convert(is, os, st);
+
+	os.flush();
+}
+
+
+shared_ptr <utility::charsetFilteredOutputStream>
+	charsetConverter_icu::getFilteredOutputStream(
+		utility::outputStream& os,
+		const charsetConverterOptions& opts
+	) {
+
+	return make_shared <utility::charsetFilteredOutputStream_icu>(m_source, m_dest, &os, opts);
+}
+
+
+
+// charsetFilteredOutputStream_icu
+
+namespace utility {
+
+
+charsetFilteredOutputStream_icu::charsetFilteredOutputStream_icu(
+	const charset& source,
+	const charset& dest,
+	outputStream* os,
+	const charsetConverterOptions& opts
+)
+	: m_from(NULL),
+	  m_to(NULL),
+	  m_sourceCharset(source),
+	  m_destCharset(dest),
+	  m_stream(*os),
+	  m_options(opts) {
+
+	UErrorCode err = U_ZERO_ERROR;
+	m_from = ucnv_open(source.getName().c_str(), &err);
+
+	if (!U_SUCCESS(err)) {
+
+		throw exceptions::charset_conv_error(
+			"Cannot initialize ICU converter for source charset '" + source.getName()
+			+ "' (error code: " + u_errorName(err) + "."
+		);
+	}
+
+	m_to = ucnv_open(dest.getName().c_str(), &err);
+
+	if (!U_SUCCESS(err)) {
+
+		throw exceptions::charset_conv_error(
+			"Cannot initialize ICU converter for destination charset '" + dest.getName()
+			+ "' (error code: " + u_errorName(err) + "."
+		);
+	}
+
+	// Tell ICU what to do when encountering an illegal byte sequence
+	if (m_options.silentlyReplaceInvalidSequences) {
+
+		// Set replacement chars for when converting from Unicode to codepage
+		icu::UnicodeString substString(m_options.invalidSequence.c_str());
+		ucnv_setSubstString(m_to, substString.getTerminatedBuffer(), -1, &err);
+
+		if (U_FAILURE(err)) {
+			throw exceptions::charset_conv_error("[ICU] Error when setting substitution string.");
+		}
+
+	} else {
+
+		// Tell ICU top stop (and return an error) on illegal byte sequences
+		ucnv_setToUCallBack(
+			m_to, UCNV_TO_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
+		);
+
+		if (U_FAILURE(err)) {
+			throw exceptions::charset_conv_error("[ICU] Error when setting ToU callback.");
+		}
+
+		ucnv_setFromUCallBack(
+			m_to, UCNV_FROM_U_CALLBACK_STOP, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &err
+		);
+
+		if (U_FAILURE(err)) {
+			throw exceptions::charset_conv_error("[ICU] Error when setting FromU callback.");
+		}
+	}
+}
+
+
+charsetFilteredOutputStream_icu::~charsetFilteredOutputStream_icu() {
+
+	if (m_from) ucnv_close(m_from);
+	if (m_to) ucnv_close(m_to);
+}
+
+
+outputStream& charsetFilteredOutputStream_icu::getNextOutputStream() {
+
+	return m_stream;
+}
+
+
+void charsetFilteredOutputStream_icu::writeImpl(
+	const byte_t* const data,
+	const size_t count
+) {
+
+	if (!m_from || !m_to) {
+		throw exceptions::charset_conv_error("Cannot initialize converters.");
+	}
+
+	// Allocate buffer for Unicode chars
+	const size_t uniSize = ucnv_getMinCharSize(m_from) * count * sizeof(UChar);
+	std::vector <UChar> uniBuffer(uniSize);
+
+	// Conversion loop
+	UErrorCode toErr = U_ZERO_ERROR;
+
+	const char* uniSource = reinterpret_cast <const char*>(data);
+	const char* uniSourceLimit = uniSource + count;
+
+	do {
+
+		// Convert from source charset to Unicode
+		UChar* uniTarget = &uniBuffer[0];
+		UChar* uniTargetLimit = &uniBuffer[0] + uniSize;
+
+		toErr = U_ZERO_ERROR;
+
+		ucnv_toUnicode(
+			m_from, &uniTarget, uniTargetLimit,
+			&uniSource, uniSourceLimit, NULL, /* flush */ UBool(0), &toErr
+		);
+
+		if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR) {
+
+			if (toErr == U_INVALID_CHAR_FOUND ||
+			    toErr == U_TRUNCATED_CHAR_FOUND ||
+			    toErr == U_ILLEGAL_CHAR_FOUND) {
+
+				throw exceptions::illegal_byte_sequence_for_charset();
+
+			} else {
+
+				throw exceptions::charset_conv_error(
+					"[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'."
+				);
+			}
+		}
+
+		const size_t uniLength = uniTarget - &uniBuffer[0];
+
+		// Allocate buffer for destination charset
+		const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength;
+		std::vector <char> cpBuffer(cpSize);
+
+		// Convert from Unicode to destination charset
+		UErrorCode fromErr = U_ZERO_ERROR;
+
+		const UChar* cpSource = &uniBuffer[0];
+		const UChar* cpSourceLimit = &uniBuffer[0] + uniLength;
+
+		do {
+
+			char* cpTarget = &cpBuffer[0];
+			char* cpTargetLimit = &cpBuffer[0] + cpSize;
+
+			fromErr = U_ZERO_ERROR;
+
+			ucnv_fromUnicode(
+				m_to, &cpTarget, cpTargetLimit,
+				&cpSource, cpSourceLimit, NULL, /* flush */ FALSE, &fromErr
+			);
+
+			if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
+
+				if (fromErr == U_INVALID_CHAR_FOUND ||
+				    fromErr == U_TRUNCATED_CHAR_FOUND ||
+				    fromErr == U_ILLEGAL_CHAR_FOUND) {
+
+					throw exceptions::illegal_byte_sequence_for_charset();
+
+				} else {
+
+					throw exceptions::charset_conv_error(
+						"[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'."
+					);
+				}
+			}
+
+			const size_t cpLength = cpTarget - &cpBuffer[0];
+
+			// Write successfully converted bytes
+			m_stream.write(&cpBuffer[0], cpLength);
+
+		} while (fromErr == U_BUFFER_OVERFLOW_ERROR);
+
+	} while (toErr == U_BUFFER_OVERFLOW_ERROR);
+}
+
+
+void charsetFilteredOutputStream_icu::flush() {
+
+	if (!m_from || !m_to) {
+		throw exceptions::charset_conv_error("Cannot initialize converters.");
+	}
+
+	// Allocate buffer for Unicode chars
+	const size_t uniSize = ucnv_getMinCharSize(m_from) * 1024 * sizeof(UChar);
+	std::vector <UChar> uniBuffer(uniSize);
+
+	// Conversion loop (with flushing)
+	UErrorCode toErr = U_ZERO_ERROR;
+
+	const char* uniSource = 0;
+	const char* uniSourceLimit = 0;
+
+	do {
+
+		// Convert from source charset to Unicode
+		UChar* uniTarget = &uniBuffer[0];
+		UChar* uniTargetLimit = &uniBuffer[0] + uniSize;
+
+		toErr = U_ZERO_ERROR;
+
+		ucnv_toUnicode(
+			m_from, &uniTarget, uniTargetLimit,
+			&uniSource, uniSourceLimit, NULL, /* flush */ UBool(1), &toErr
+		);
+
+		if (U_FAILURE(toErr) && toErr != U_BUFFER_OVERFLOW_ERROR) {
+
+			throw exceptions::charset_conv_error(
+				"[ICU] Error converting to Unicode from '" + m_sourceCharset.getName() + "'."
+			);
+		}
+
+		const size_t uniLength = uniTarget - &uniBuffer[0];
+
+		// Allocate buffer for destination charset
+		const size_t cpSize = ucnv_getMinCharSize(m_to) * uniLength;
+		std::vector <char> cpBuffer(cpSize);
+
+		// Convert from Unicode to destination charset
+		UErrorCode fromErr = U_ZERO_ERROR;
+
+		const UChar* cpSource = &uniBuffer[0];
+		const UChar* cpSourceLimit = &uniBuffer[0] + uniLength;
+
+		do {
+
+			char* cpTarget = &cpBuffer[0];
+			char* cpTargetLimit = &cpBuffer[0] + cpSize;
+
+			fromErr = U_ZERO_ERROR;
+
+			ucnv_fromUnicode(
+				m_to, &cpTarget, cpTargetLimit,
+				&cpSource, cpSourceLimit, NULL, /* flush */ UBool(1), &fromErr
+			);
+
+			if (fromErr != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(fromErr)) {
+
+				throw exceptions::charset_conv_error(
+					"[ICU] Error converting from Unicode to '" + m_destCharset.getName() + "'."
+				);
+			}
+
+			const size_t cpLength = cpTarget - &cpBuffer[0];
+
+			// Write successfully converted bytes
+			m_stream.write(&cpBuffer[0], cpLength);
+
+		} while (fromErr == U_BUFFER_OVERFLOW_ERROR);
+
+	} while (toErr == U_BUFFER_OVERFLOW_ERROR);
+
+	m_stream.flush();
+}
+
+
+} // utility
+
+
+} // vmime
+
+
+#endif // VMIME_CHARSETCONV_LIB_IS_ICU
diff --git a/src/vmime/charsetConverter_icu.hpp b/src/vmime/charsetConverter_icu.hpp
index cf5eb6bc..742999f0 100644
--- a/src/vmime/charsetConverter_icu.hpp
+++ b/src/vmime/charsetConverter_icu.hpp
@@ -1,137 +1,137 @@
-//
-// VMime library (http://www.vmime.org)
-// Copyright (C) 2002 Vincent Richard <[email protected]>
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 3 of
-// the License, or (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with this program; if not, write to the Free Software Foundation, Inc.,
-// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-//
-// Linking this library statically or dynamically with other modules is making
-// a combined work based on this library.  Thus, the terms and conditions of
-// the GNU General Public License cover the whole combination.
-//
-
-#ifndef VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
-#define VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
-
-
-#include "vmime/config.hpp"
-
-
-#if VMIME_CHARSETCONV_LIB_IS_ICU
-
-
-#include "vmime/charsetConverter.hpp"
-
-
-struct UConverter;
-
-
-namespace vmime {
-
-
-/** A generic charset converter which uses ICU library.
-  */
-class charsetConverter_icu : public charsetConverter {
-
-public:
-
-	/** Construct and initialize an ICU charset converter.
-	  *
-	  * @param source input charset
-	  * @param dest output charset
-	  * @param opts conversion options
-	  */
-	charsetConverter_icu(
-		const charset& source,
-		const charset& dest,
-		const charsetConverterOptions& opts = charsetConverterOptions()
-	);
-
-	~charsetConverter_icu();
-
-	void convert(const string& in, string& out, status* st = NULL);
-	void convert(utility::inputStream& in, utility::outputStream& out, status* st = NULL);
-
-	shared_ptr <utility::charsetFilteredOutputStream> getFilteredOutputStream(
-		utility::outputStream& os,
-		const charsetConverterOptions& opts = charsetConverterOptions()
-	);
-
-private:
-
-	UConverter* m_from;
-	UConverter* m_to;
-
-	charset m_source;
-	charset m_dest;
-
-	charsetConverterOptions m_options;
-};
-
-
-namespace utility {
-
-
-class charsetFilteredOutputStream_icu : public charsetFilteredOutputStream {
-
-public:
-
-	/** Construct a new filter for the specified output stream.
-	  *
-	  * @param source input charset
-	  * @param dest output charset
-	  * @param os stream into which write filtered data
-	  * @param opts conversion options
-	  */
-	charsetFilteredOutputStream_icu(
-		const charset& source,
-		const charset& dest,
-		outputStream* os,
-		const charsetConverterOptions& opts = charsetConverterOptions()
-	);
-
-	~charsetFilteredOutputStream_icu();
-
-
-	outputStream& getNextOutputStream();
-
-	void flush();
-
-protected:
-
-	void writeImpl(const byte_t* const data, const size_t count);
-
-private:
-
-	UConverter* m_from;
-	UConverter* m_to;
-
-	const charset m_sourceCharset;
-	const charset m_destCharset;
-
-	outputStream& m_stream;
-
-	charsetConverterOptions m_options;
-};
-
-
-} // utility
-
-
-} // vmime
-
-
-#endif // VMIME_CHARSETCONV_LIB_IS_ICU
-
-#endif // VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
+//
+// VMime library (http://www.vmime.org)
+// Copyright (C) 2002 Vincent Richard <[email protected]>
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 3 of
+// the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Linking this library statically or dynamically with other modules is making
+// a combined work based on this library.  Thus, the terms and conditions of
+// the GNU General Public License cover the whole combination.
+//
+
+#ifndef VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
+#define VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
+
+
+#include "vmime/config.hpp"
+
+
+#if VMIME_CHARSETCONV_LIB_IS_ICU
+
+
+#include "vmime/charsetConverter.hpp"
+
+
+struct UConverter;
+
+
+namespace vmime {
+
+
+/** A generic charset converter which uses ICU library.
+  */
+class charsetConverter_icu : public charsetConverter {
+
+public:
+
+	/** Construct and initialize an ICU charset converter.
+	  *
+	  * @param source input charset
+	  * @param dest output charset
+	  * @param opts conversion options
+	  */
+	charsetConverter_icu(
+		const charset& source,
+		const charset& dest,
+		const charsetConverterOptions& opts = charsetConverterOptions()
+	);
+
+	~charsetConverter_icu();
+
+	void convert(const string& in, string& out, status* st = NULL);
+	void convert(utility::inputStream& in, utility::outputStream& out, status* st = NULL);
+
+	shared_ptr <utility::charsetFilteredOutputStream> getFilteredOutputStream(
+		utility::outputStream& os,
+		const charsetConverterOptions& opts = charsetConverterOptions()
+	);
+
+private:
+
+	UConverter* m_from;
+	UConverter* m_to;
+
+	charset m_source;
+	charset m_dest;
+
+	charsetConverterOptions m_options;
+};
+
+
+namespace utility {
+
+
+class charsetFilteredOutputStream_icu : public charsetFilteredOutputStream {
+
+public:
+
+	/** Construct a new filter for the specified output stream.
+	  *
+	  * @param source input charset
+	  * @param dest output charset
+	  * @param os stream into which write filtered data
+	  * @param opts conversion options
+	  */
+	charsetFilteredOutputStream_icu(
+		const charset& source,
+		const charset& dest,
+		outputStream* os,
+		const charsetConverterOptions& opts = charsetConverterOptions()
+	);
+
+	~charsetFilteredOutputStream_icu();
+
+
+	outputStream& getNextOutputStream();
+
+	void flush();
+
+protected:
+
+	void writeImpl(const byte_t* const data, const size_t count);
+
+private:
+
+	UConverter* m_from;
+	UConverter* m_to;
+
+	const charset m_sourceCharset;
+	const charset m_destCharset;
+
+	outputStream& m_stream;
+
+	charsetConverterOptions m_options;
+};
+
+
+} // utility
+
+
+} // vmime
+
+
+#endif // VMIME_CHARSETCONV_LIB_IS_ICU
+
+#endif // VMIME_CHARSETCONVERTER_ICU_HPP_INCLUDED
author	Vincent Richard <[email protected]>	2021-01-11 21:06:42 +0000
committer	Vincent Richard <[email protected]>	2021-01-11 21:06:42 +0000
commit	817358854d6a1ffea3dc213263f45aae860a19d7 (patch)
tree	3fe5e631fd18f66df5c49336d0cb7ac9981dba31
parent	#247 Fixed build with ICU 68.2-1 (diff)
download	vmime-817358854d6a1ffea3dc213263f45aae860a19d7.tar.gz vmime-817358854d6a1ffea3dc213263f45aae860a19d7.zip