From 6f278de337b1f017a34211250bbb09d8c72a8a3e Mon Sep 17 00:00:00 2001
From: Vincent Richard <vincent@vincent-richard.net>
Date: Wed, 19 Oct 2005 11:28:36 +0000
Subject: [PATCH] Charset converter.

---
 src/charset.cpp            |  27 ------
 src/charsetConverter.cpp   | 167 +++++++++++++++++++++++++++++++++++++
 src/exception.cpp          |   4 +-
 tests/charset/Makefile     |   2 +-
 tests/charset/main.cpp     |   6 +-
 tests/charset/run-test.sh  |   4 +-
 vmime/charset.hpp          |  27 +-----
 vmime/charsetConverter.hpp |  90 ++++++++++++++++++++
 vmime/exception.hpp        |   2 +-
 9 files changed, 270 insertions(+), 59 deletions(-)
 create mode 100644 src/charsetConverter.cpp
 create mode 100644 vmime/charsetConverter.hpp

diff --git a/src/charset.cpp b/src/charset.cpp
index b8ce9e70..a5440242 100644
--- a/src/charset.cpp
+++ b/src/charset.cpp
@@ -30,33 +30,6 @@
 #include "vmime/charsetConverter.hpp"
 
 
-extern "C"
-{
-#ifndef VMIME_BUILDING_DOC
-
-	#include <iconv.h>
-	#include <errno.h>
-
-	// HACK: prototypes may differ depending on the compiler and/or system (the
-	// second parameter may or may not be 'const'). This redeclaration is a hack
-	// to have a common prototype "iconv_cast".
-	class ICONV_HACK
-	{
-	public:
-
-		ICONV_HACK(const char** ptr) : m_ptr(ptr) { }
-
-		operator const char**() { return m_ptr; }
-		operator char**() { return const_cast <char**>(m_ptr); }
-
-	private:
-
-		const char** m_ptr;
-	};
-
-#endif // VMIME_BUILDING_DOC
-}
-
 
 namespace vmime
 {
diff --git a/src/charsetConverter.cpp b/src/charsetConverter.cpp
new file mode 100644
index 00000000..ecfb1811
--- /dev/null
+++ b/src/charsetConverter.cpp
@@ -0,0 +1,167 @@
+//
+// VMime library (http://www.vmime.org)
+// Copyright (C) 2002-2005 Vincent Richard <vincent@vincent-richard.net>
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Linking this library statically or dynamically with other modules is making
+// a combined work based on this library.  Thus, the terms and conditions of
+// the GNU General Public License cover the whole combination.
+//
+
+#include "vmime/charsetConverter.hpp"
+#include "vmime/exception.hpp"
+
+
+extern "C"
+{
+#ifndef VMIME_BUILDING_DOC
+
+	#include <iconv.h>
+	#include <errno.h>
+
+	// HACK: prototypes may differ depending on the compiler and/or system (the
+	// second parameter may or may not be 'const'). This redeclaration is a hack
+	// to have a common prototype "iconv_cast".
+	class ICONV_HACK
+	{
+	public:
+
+		ICONV_HACK(const char** ptr) : m_ptr(ptr) { }
+
+		operator const char**() { return m_ptr; }
+		operator char**() { return const_cast <char**>(m_ptr); }
+
+	private:
+
+		const char** m_ptr;
+	};
+
+#endif // VMIME_BUILDING_DOC
+}
+
+
+namespace vmime
+{
+
+
+charsetConverter::charsetConverter(const charset& source, const charset& dest)
+	: m_desc(NULL), m_source(source), m_dest(dest)
+{
+	// Get an iconv descriptor
+	const iconv_t cd = iconv_open(dest.getName().c_str(), source.getName().c_str());
+
+	if (cd != reinterpret_cast <iconv_t>(-1))
+	{
+		iconv_t* p = new iconv_t;
+		*p= cd;
+
+		m_desc = p;
+	}
+}
+
+
+charsetConverter::~charsetConverter()
+{
+	if (m_desc != NULL)
+	{
+		// Close iconv handle
+		iconv_close(*static_cast <iconv_t*>(m_desc));
+
+		delete static_cast <iconv_t*>(m_desc);
+	}
+}
+
+
+void charsetConverter::convert(utility::inputStream& in, utility::outputStream& out)
+{
+	if (m_desc == NULL)
+		throw exceptions::charset_conv_error("Cannot initialize converter.");
+
+	const iconv_t cd = *static_cast <iconv_t*>(m_desc);
+
+	char inBuffer[32768];
+	char outBuffer[32768];
+	size_t inPos = 0;
+
+	bool prevIsInvalid = false;
+
+	while (true)
+	{
+		// Fullfill the buffer
+		size_t inLength = static_cast <size_t>(in.read(inBuffer + inPos, sizeof(inBuffer) - inPos) + inPos);
+		size_t outLength = sizeof(outBuffer);
+
+		const char* inPtr = inBuffer;
+		char* outPtr = outBuffer;
+
+		// Convert input bytes
+		if (iconv(cd, ICONV_HACK(&inPtr), &inLength,
+			      &outPtr, &outLength) == static_cast <size_t>(-1))
+		{
+			// Illegal input sequence or input sequence has no equivalent
+			// sequence in the destination charset.
+			if (prevIsInvalid)
+			{
+				// Write successfully converted bytes
+				out.write(outBuffer, sizeof(outBuffer) - outLength);
+
+				// Output a special character to indicate we don't known how to
+				// convert the sequence at this position
+				out.write("?", 1);
+
+				// Skip a byte and leave unconverted bytes in the input buffer
+				std::copy(const_cast <char*>(inPtr + 1), inBuffer + sizeof(inBuffer), inBuffer);
+				inPos = inLength - 1;
+			}
+			else
+			{
+				// Write successfully converted bytes
+				out.write(outBuffer, sizeof(outBuffer) - outLength);
+
+				// Leave unconverted bytes in the input buffer
+				std::copy(const_cast <char*>(inPtr), inBuffer + sizeof(inBuffer), inBuffer);
+				inPos = inLength;
+
+				if (errno != E2BIG)
+					prevIsInvalid = true;
+			}
+		}
+		else
+		{
+			// Write successfully converted bytes
+			out.write(outBuffer, sizeof(outBuffer) - outLength);
+
+			inPos = 0;
+			prevIsInvalid = false;
+		}
+
+		// Check for end of data
+		if (in.eof() && inPos == 0)
+			break;
+	}
+}
+
+
+void charsetConverter::convert(const string& in, string& out)
+{
+	utility::inputStreamStringAdapter is(in);
+	utility::outputStreamStringAdapter os(out);
+
+	convert(is, os);
+}
+
+
+} // vmime
diff --git a/src/exception.cpp b/src/exception.cpp
index 9620a310..bc599c76 100644
--- a/src/exception.cpp
+++ b/src/exception.cpp
@@ -105,8 +105,8 @@ const char* bad_field_type::name() const throw() { return "bad_field_type"; }
 //
 
 charset_conv_error::~charset_conv_error() throw() {}
-charset_conv_error::charset_conv_error(const exception& other)
-	: exception("Charset conversion error.", other) {}
+charset_conv_error::charset_conv_error(const string& what, const exception& other)
+	: exception(what.empty() ? "Charset conversion error." : what, other) {}
 
 exception* charset_conv_error::clone() const { return new charset_conv_error(*this); }
 const char* charset_conv_error::name() const throw() { return "charset_conv_error"; }
diff --git a/tests/charset/Makefile b/tests/charset/Makefile
index 7c0fe6c0..e46fafa5 100644
--- a/tests/charset/Makefile
+++ b/tests/charset/Makefile
@@ -1,5 +1,5 @@
 
 
 main: main.cpp ../../libvmime-debug.a
-	g++ -g -o main main.cpp ../../libvmime-debug.a
+	g++ -g -o main main.cpp -I../.. ../../libvmime-debug.a -lgnutls -lgsasl
 
diff --git a/tests/charset/main.cpp b/tests/charset/main.cpp
index 0fb2495a..9ad30432 100644
--- a/tests/charset/main.cpp
+++ b/tests/charset/main.cpp
@@ -24,14 +24,14 @@
 #include <iostream>
 #include <ostream>
 
-#include "../../src/vmime"
-#include "../../examples/common.inc"
+#include "vmime/vmime.hpp"
+#include "vmime/platforms/posix/posixHandler.hpp"
 
 
 int main(int argc, char* argv[])
 {
 	// VMime initialization
-	vmime::platformDependant::setHandler<my_handler>();
+	vmime::platformDependant::setHandler<vmime::platforms::posix::posixHandler>();
 
 
 	const vmime::string from(argv[1]);
diff --git a/tests/charset/run-test.sh b/tests/charset/run-test.sh
index d7ad5295..95846c06 100755
--- a/tests/charset/run-test.sh
+++ b/tests/charset/run-test.sh
@@ -5,7 +5,7 @@ TEMP_DIR="/tmp"
 PROGRAM="./main"
 
 
-testFiles=`cd $TEST_DIR ; find . -regex '\./[^\.]*\.in\..*' -maxdepth 1 -type f`
+testFiles=`cd $TEST_DIR ; find . -maxdepth 1 -regex '\./[^\.]*\.in\..*' -type f`
 
 echo
 echo Testing charset conversions
@@ -16,7 +16,7 @@ for testFile in $testFiles ; do
 	testName=`echo $testFile | sed 's/^\.\/\([^\.]*\).*/\1/'`
 	sourceCharset=`echo $testFile | sed 's/^\.\/[^\.]*\.[^\.]*\.\(.*\)/\1/'`
 
-	testOutFiles=`cd $TEST_DIR ; find . -regex "\./$testName\.out\..*" -maxdepth 1 -type f`
+	testOutFiles=`cd $TEST_DIR ; find . -maxdepth 1 -regex "\./$testName\.out\..*" -type f`
 
 	for testOutFile in $testOutFiles ; do
 
diff --git a/vmime/charset.hpp b/vmime/charset.hpp
index 95d03041..eec071d5 100644
--- a/vmime/charset.hpp
+++ b/vmime/charset.hpp
@@ -68,26 +68,6 @@ public:
 	  */
 	static const charset getLocaleCharset();
 
-#if VMIME_WIDE_CHAR_SUPPORT
-	/** Convert a string buffer in the specified charset to a wide-char
-	  * string buffer.
-	  *
-	  * @param in input buffer
-	  * @param out output buffer
-	  * @param ch input charset
-	  */
-	static void decode(const string& in, wstring& out, const charset& ch);
-
-	/** Convert a wide-char string buffer to a string buffer in the
-	  * specified charset.
-	  *
-	  * @param in input buffer
-	  * @param out output buffer
-	  * @param ch output charset
-	  */
-	static void encode(const wstring& in, string& out, const charset& ch);
-#endif
-
 	/** Convert a string buffer from one charset to another
 	  * charset (in-memory conversion)
 	  *
@@ -98,6 +78,8 @@ public:
 	  * @param out output buffer
 	  * @param source input charset
 	  * @param dest output charset
+	  * @throws exceptions::charset_conv_error if an error occured during
+	  * the conversion
 	  */
 	static void convert(const string& in, string& out, const charset& source, const charset& dest);
 
@@ -108,6 +90,8 @@ public:
 	  * @param out output stream to write the converted data
 	  * @param source input charset
 	  * @param dest output charset
+	  * @throws exceptions::charset_conv_error if an error occured during
+	  * the conversion
 	  */
 	static void convert(utility::inputStream& in, utility::outputStream& out, const charset& source, const charset& dest);
 
@@ -118,9 +102,6 @@ private:
 
 	string m_name;
 
-	template <class STRINGF, class STRINGT>
-	static void iconvert(const STRINGF& in, STRINGT& out, const charset& from, const charset& to);
-
 public:
 
 	using component::parse;
diff --git a/vmime/charsetConverter.hpp b/vmime/charsetConverter.hpp
new file mode 100644
index 00000000..aa1ac4f5
--- /dev/null
+++ b/vmime/charsetConverter.hpp
@@ -0,0 +1,90 @@
+//
+// VMime library (http://www.vmime.org)
+// Copyright (C) 2002-2005 Vincent Richard <vincent@vincent-richard.net>
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Linking this library statically or dynamically with other modules is making
+// a combined work based on this library.  Thus, the terms and conditions of
+// the GNU General Public License cover the whole combination.
+//
+
+#ifndef VMIME_CHARSETCONVERTER_HPP_INCLUDED
+#define VMIME_CHARSETCONVERTER_HPP_INCLUDED
+
+
+#include "vmime/base.hpp"
+#include "vmime/component.hpp"
+
+#include "vmime/charset.hpp"
+
+
+namespace vmime
+{
+
+
+/** Convert between charsets.
+  */
+
+class charsetConverter : public object
+{
+public:
+
+	/** Construct and initialize a charset converter.
+	  *
+	  * @param source input charset
+	  * @param dest output charset
+	  */
+	charsetConverter(const charset& source, const charset& dest);
+
+	~charsetConverter();
+
+	/** Convert a string buffer from one charset to another
+	  * charset (in-memory conversion)
+	  *
+	  * \deprecated Use the new convert() method, which takes
+	  * an outputStream parameter.
+	  *
+	  * @param in input buffer
+	  * @param out output buffer
+	  * @throws exceptions::charset_conv_error if an error occured during
+	  * the conversion
+	  */
+	void convert(const string& in, string& out);
+
+	/** Convert the contents of an input stream in a specified charset
+	  * to another charset and write the result to an output stream.
+	  *
+	  * @param in input stream to read data from
+	  * @param out output stream to write the converted data
+	  * @throws exceptions::charset_conv_error if an error occured during
+	  * the conversion
+	  */
+	void convert(utility::inputStream& in, utility::outputStream& out);
+
+private:
+
+	void* m_desc;
+
+	charset m_source;
+	charset m_dest;
+};
+
+
+} // vmime
+
+
+#endif // VMIME_CHARSETCONVERTER_HPP_INCLUDED
+
diff --git a/vmime/exception.hpp b/vmime/exception.hpp
index d686ab1f..451fcfc8 100644
--- a/vmime/exception.hpp
+++ b/vmime/exception.hpp
@@ -112,7 +112,7 @@ class charset_conv_error : public vmime::exception
 {
 public:
 
-	charset_conv_error(const exception& other = NO_EXCEPTION);
+	charset_conv_error(const string& what = "", const exception& other = NO_EXCEPTION);
 	~charset_conv_error() throw();
 
 	exception* clone() const;