From d49ce7cd4de5317189c2572b413160ad25f1ec01 Mon Sep 17 00:00:00 2001 From: Vincent Richard Date: Thu, 6 Feb 2014 21:29:59 +0100 Subject: [PATCH] =?UTF-8?q?Windows=20charset=20converter=20(thanks=20to=20?= =?UTF-8?q?Elm=C3=BCSoft).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 24 ++- cmake/config.hpp.cmake | 1 + src/vmime/charsetConverter_win.cpp | 171 +++++++++++++++ src/vmime/charsetConverter_win.hpp | 79 +++++++ .../platforms/windows/windowsCodepages.hpp | 198 ++++++++++++++++++ 5 files changed, 470 insertions(+), 3 deletions(-) create mode 100644 src/vmime/charsetConverter_win.cpp create mode 100644 src/vmime/charsetConverter_win.hpp create mode 100644 src/vmime/platforms/windows/windowsCodepages.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b08d7c79..aac379cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -726,12 +726,18 @@ INCLUDE(cmake/FindICU.cmake) FIND_PACKAGE(ICU QUIET) -IF(ICONV_FOUND) +IF(WIN32) + SET(VMIME_CHARSETCONV_LIB_IS_ICONV_DEFAULT "OFF") + SET(VMIME_CHARSETCONV_LIB_IS_ICU_DEFAULT "OFF") + SET(VMIME_CHARSETCONV_LIB_IS_WIN_DEFAULT "ON") +ELSEIF(ICONV_FOUND) SET(VMIME_CHARSETCONV_LIB_IS_ICONV_DEFAULT "ON") SET(VMIME_CHARSETCONV_LIB_IS_ICU_DEFAULT "OFF") + SET(VMIME_CHARSETCONV_LIB_IS_WIN_DEFAULT "OFF") ELSEIF(ICU_LIBRARIES) SET(VMIME_CHARSETCONV_LIB_IS_ICONV_DEFAULT "OFF") SET(VMIME_CHARSETCONV_LIB_IS_ICU_DEFAULT "ON") + SET(VMIME_CHARSETCONV_LIB_IS_WIN_DEFAULT "OFF") ENDIF() OPTION( @@ -746,8 +752,16 @@ OPTION( ${VMIME_CHARSETCONV_LIB_IS_ICU_DEFAULT} ) -IF(VMIME_CHARSETCONV_LIB_IS_ICONV AND VMIME_CHARSETCONV_LIB_IS_ICU) - MESSAGE(FATAL_ERROR "Options VMIME_CHARSETCONV_LIB_IS_ICONV and VMIME_CHARSETCONV_LIB_IS_ICU are mutually exclusive (select one or the other, but not both!)") +OPTION( + VMIME_CHARSETCONV_LIB_IS_WIN + "Use Windows API for charset conversion" + ${VMIME_CHARSETCONV_LIB_IS_WIN_DEFAULT} +) + +IF((VMIME_CHARSETCONV_LIB_IS_ICONV AND VMIME_CHARSETCONV_LIB_IS_ICU) OR + (VMIME_CHARSETCONV_LIB_IS_ICONV AND VMIME_CHARSETCONV_LIB_IS_WIN) OR + (VMIME_CHARSETCONV_LIB_IS_ICU AND VMIME_CHARSETCONV_LIB_IS_WIN)) + MESSAGE(FATAL_ERROR "Options VMIME_CHARSETCONV_LIB_IS_ICONV, VMIME_CHARSETCONV_LIB_IS_ICU and VMIME_CHARSETCONV_LIB_IS_WIN are mutually exclusive (select only one of them, but not all!)") ENDIF() IF(VMIME_CHARSETCONV_LIB_IS_ICONV) @@ -786,6 +800,10 @@ ELSEIF(VMIME_CHARSETCONV_LIB_IS_ICU) SET(VMIME_PKGCONFIG_LIBS "${VMIME_PKGCONFIG_LIBS} ${ICU_LIBRARIES}") SET(VMIME_PKGCONFIG_CFLAGS "${VMIME_PKGCONFIG_CFLAGS} -I${ICU_INCLUDE_DIRS}") +ELSEIF(VMIME_CHARSETCONV_LIB_IS_WIN) + + # Nothing to do + ELSE() MESSAGE(FATAL_ERROR "No charset conversion library was selected/found") diff --git a/cmake/config.hpp.cmake b/cmake/config.hpp.cmake index a9edc34f..cdd128c8 100644 --- a/cmake/config.hpp.cmake +++ b/cmake/config.hpp.cmake @@ -51,6 +51,7 @@ typedef unsigned @VMIME_64BIT_TYPE@ vmime_uint64; // Charset conversion support #cmakedefine01 VMIME_CHARSETCONV_LIB_IS_ICONV #cmakedefine01 VMIME_CHARSETCONV_LIB_IS_ICU +#cmakedefine01 VMIME_CHARSETCONV_LIB_IS_WIN // Options // -- File-system support diff --git a/src/vmime/charsetConverter_win.cpp b/src/vmime/charsetConverter_win.cpp new file mode 100644 index 00000000..e7542584 --- /dev/null +++ b/src/vmime/charsetConverter_win.cpp @@ -0,0 +1,171 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#include "vmime/config.hpp" + + +#if VMIME_CHARSETCONV_LIB_IS_WIN + + +#include "vmime/charsetConverter_win.hpp" + +#include "vmime/exception.hpp" +#include "vmime/utility/stringUtils.hpp" +#include "vmime/utility/inputStreamStringAdapter.hpp" +#include "vmime/utility/outputStreamStringAdapter.hpp" + +#include +#include + + +#if (_WIN32 || _WIN64 || WIN32 || WIN64) + #include + #include "vmime/platforms/windows/windowsCodepages.hpp" +#else + #error Please use VMIME_CHARSETCONV_LIB_IS_WIN only on Windows! +#endif + + +#define CP_UNICODE 1200 + + +namespace vmime +{ + + +// static +shared_ptr charsetConverter::createGenericConverter + (const charset& source, const charset& dest, + const charsetConverterOptions& opts) +{ + return make_shared (source, dest, opts); +} + + +charsetConverter_win::charsetConverter_win + (const charset& source, const charset& dest, const charsetConverterOptions& opts) + : m_source(source), m_dest(dest), m_options(opts) +{ +} + + +void charsetConverter_win::convert(utility::inputStream& in, utility::outputStream& out) +{ + byte_t buffer[32768]; + string inStr, outStr; + + while (!in.eof()) + { + const size_t len = in.read(buffer, sizeof(buffer)); + utility::stringUtils::appendBytesToString(inStr, buffer, len); + } + + convert(inStr, outStr); + + out.write(outStr.data(), outStr.length()); +} + + +void charsetConverter_win::convert(const string& in, string& out) +{ + if (m_source == m_dest) + { + // No conversion needed + out = in; + return; + } + + const int sourceCodePage = getCodePage(m_source.getName().c_str()); + const int destCodePage = getCodePage(m_dest.getName().c_str()); + + // Convert from source charset to Unicode + std::vector unicodeBuffer; + const WCHAR* unicodePtr = NULL; + size_t unicodeLen = 0; + + if (sourceCodePage == CP_UNICODE) + { + unicodePtr = reinterpret_cast (in.c_str()); + unicodeLen = in.length() / 2; + } + else + { + const size_t bufferSize = in.length() * 2; // in wide characters + unicodeBuffer.resize(bufferSize); + + unicodePtr = reinterpret_cast (&unicodeBuffer[0]); + unicodeLen = MultiByteToWideChar + (sourceCodePage, 0, in.c_str(), static_cast (in.length()), + reinterpret_cast (&unicodeBuffer[0]), static_cast (bufferSize)); + } + + // Convert from Unicode to destination charset + if (destCodePage == CP_UNICODE) + { + out.assign(reinterpret_cast (unicodePtr), unicodeLen * 2); + } + else + { + const size_t bufferSize = unicodeLen * 6; // in multibyte characters + + std::vector buffer; + buffer.resize(bufferSize); + + const size_t len = WideCharToMultiByte + (destCodePage, 0, unicodePtr, static_cast (unicodeLen), + &buffer[0], static_cast (bufferSize), 0, NULL); + + out.assign(&buffer[0], len); + } +} + + +// static +int charsetConverter_win::getCodePage(const char* name) +{ + if (_stricmp(name, charsets::UTF_16) == 0) // wchar_t is UTF-16 on Windows + return CP_UNICODE; + + // "cp1252" --> return 1252 + if ((name[0] == 'c' || name[0] == 'C') && + (name[1] == 'p' || name[1] == 'P')) + { + return atoi(name + 2); + } + + return vmime::platforms::windows::windowsCodepages::getByName(name); // throws +} + + +shared_ptr + charsetConverter_win::getFilteredOutputStream(utility::outputStream& /* os */) +{ + // TODO: implement me! + return null; +} + + +} // vmime + + +#endif // VMIME_CHARSETCONV_LIB_IS_WIN diff --git a/src/vmime/charsetConverter_win.hpp b/src/vmime/charsetConverter_win.hpp new file mode 100644 index 00000000..a89fc021 --- /dev/null +++ b/src/vmime/charsetConverter_win.hpp @@ -0,0 +1,79 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#ifndef VMIME_CHARSETCONVERTER_WIN_HPP_INCLUDED +#define VMIME_CHARSETCONVERTER_WIN_HPP_INCLUDED + + +#include "vmime/config.hpp" + + +#if VMIME_CHARSETCONV_LIB_IS_WIN + + +#include "vmime/charsetConverter.hpp" + + +namespace vmime +{ + + +/** A generic charset converter which uses Windows MultiByteToWideChar. + */ + +class charsetConverter_win : public charsetConverter +{ +public: + + /** Construct and initialize a Windows charset converter. + * + * @param source input charset + * @param dest output charset + * @param opts conversion options + */ + charsetConverter_win(const charset& source, const charset& dest, + const charsetConverterOptions& opts = charsetConverterOptions()); + + void convert(const string& in, string& out); + void convert(utility::inputStream& in, utility::outputStream& out); + + shared_ptr + getFilteredOutputStream(utility::outputStream& os); + +private: + + static int getCodePage(const char* name); + + charset m_source; + charset m_dest; + + charsetConverterOptions m_options; +}; + + +} // namespace + + +#endif // VMIME_CHARSETCONV_LIB_IS_WIN + +#endif // VMIME_CHARSETCONVERTER_WIN_HPP_INCLUDED diff --git a/src/vmime/platforms/windows/windowsCodepages.hpp b/src/vmime/platforms/windows/windowsCodepages.hpp new file mode 100644 index 00000000..dfd6afdd --- /dev/null +++ b/src/vmime/platforms/windows/windowsCodepages.hpp @@ -0,0 +1,198 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#ifndef VMIME_PLATFORMS_WINDOWS_CODEPAGES_HPP_INCLUDED +#define VMIME_PLATFORMS_WINDOWS_CODEPAGES_HPP_INCLUDED + + +#include "vmime/config.hpp" + + +#if VMIME_PLATFORM_IS_WINDOWS + + +#include + + +namespace vmime { +namespace platforms { +namespace windows { + + +class windowsCodepages +{ +public: + + static int getByName(const char* s8_Name) + { + if (stricmp(s8_Name, "ASMO-708") == 0) return 708; + if (stricmp(s8_Name, "big5") == 0) return 950; + if (stricmp(s8_Name, "cp1025") == 0) return 21025; + if (stricmp(s8_Name, "cp866") == 0) return 866; + if (stricmp(s8_Name, "cp875") == 0) return 875; + if (stricmp(s8_Name, "DOS-720") == 0) return 720; + if (stricmp(s8_Name, "DOS-862") == 0) return 862; + if (stricmp(s8_Name, "EUC-CN") == 0) return 51936; + if (stricmp(s8_Name, "euc-jp") == 0) return 51932; + if (stricmp(s8_Name, "EUC-JP") == 0) return 20932; + if (stricmp(s8_Name, "euc-kr") == 0) return 51949; + if (stricmp(s8_Name, "GB18030") == 0) return 54936; + if (stricmp(s8_Name, "gb2312") == 0) return 936; + if (stricmp(s8_Name, "hz-gb-2312") == 0) return 52936; + if (stricmp(s8_Name, "IBM00858") == 0) return 858; + if (stricmp(s8_Name, "IBM00924") == 0) return 20924; + if (stricmp(s8_Name, "IBM01047") == 0) return 1047; + if (stricmp(s8_Name, "IBM01140") == 0) return 1140; + if (stricmp(s8_Name, "IBM01141") == 0) return 1141; + if (stricmp(s8_Name, "IBM01142") == 0) return 1142; + if (stricmp(s8_Name, "IBM01143") == 0) return 1143; + if (stricmp(s8_Name, "IBM01144") == 0) return 1144; + if (stricmp(s8_Name, "IBM01145") == 0) return 1145; + if (stricmp(s8_Name, "IBM01146") == 0) return 1146; + if (stricmp(s8_Name, "IBM01147") == 0) return 1147; + if (stricmp(s8_Name, "IBM01148") == 0) return 1148; + if (stricmp(s8_Name, "IBM01149") == 0) return 1149; + if (stricmp(s8_Name, "IBM037") == 0) return 37; + if (stricmp(s8_Name, "IBM1026") == 0) return 1026; + if (stricmp(s8_Name, "IBM273") == 0) return 20273; + if (stricmp(s8_Name, "IBM277") == 0) return 20277; + if (stricmp(s8_Name, "IBM278") == 0) return 20278; + if (stricmp(s8_Name, "IBM280") == 0) return 20280; + if (stricmp(s8_Name, "IBM284") == 0) return 20284; + if (stricmp(s8_Name, "IBM285") == 0) return 20285; + if (stricmp(s8_Name, "IBM290") == 0) return 20290; + if (stricmp(s8_Name, "IBM297") == 0) return 20297; + if (stricmp(s8_Name, "IBM420") == 0) return 20420; + if (stricmp(s8_Name, "IBM423") == 0) return 20423; + if (stricmp(s8_Name, "IBM424") == 0) return 20424; + if (stricmp(s8_Name, "IBM437") == 0) return 437; + if (stricmp(s8_Name, "IBM500") == 0) return 500; + if (stricmp(s8_Name, "ibm737") == 0) return 737; + if (stricmp(s8_Name, "ibm775") == 0) return 775; + if (stricmp(s8_Name, "ibm850") == 0) return 850; + if (stricmp(s8_Name, "ibm852") == 0) return 852; + if (stricmp(s8_Name, "IBM855") == 0) return 855; + if (stricmp(s8_Name, "ibm857") == 0) return 857; + if (stricmp(s8_Name, "IBM860") == 0) return 860; + if (stricmp(s8_Name, "ibm861") == 0) return 861; + if (stricmp(s8_Name, "IBM863") == 0) return 863; + if (stricmp(s8_Name, "IBM864") == 0) return 864; + if (stricmp(s8_Name, "IBM865") == 0) return 865; + if (stricmp(s8_Name, "ibm869") == 0) return 869; + if (stricmp(s8_Name, "IBM870") == 0) return 870; + if (stricmp(s8_Name, "IBM871") == 0) return 20871; + if (stricmp(s8_Name, "IBM880") == 0) return 20880; + if (stricmp(s8_Name, "IBM905") == 0) return 20905; + if (stricmp(s8_Name, "IBM-Thai") == 0) return 20838; + if (stricmp(s8_Name, "iso-2022-jp") == 0) return 50222; + if (stricmp(s8_Name, "iso-2022-kr") == 0) return 50225; + if (stricmp(s8_Name, "iso-8859-1") == 0) return 28591; + if (stricmp(s8_Name, "iso-8859-13") == 0) return 28603; + if (stricmp(s8_Name, "iso-8859-15") == 0) return 28605; + if (stricmp(s8_Name, "iso-8859-2") == 0) return 28592; + if (stricmp(s8_Name, "iso-8859-3") == 0) return 28593; + if (stricmp(s8_Name, "iso-8859-4") == 0) return 28594; + if (stricmp(s8_Name, "iso-8859-5") == 0) return 28595; + if (stricmp(s8_Name, "iso-8859-6") == 0) return 28596; + if (stricmp(s8_Name, "iso-8859-7") == 0) return 28597; + if (stricmp(s8_Name, "iso-8859-8") == 0) return 28598; + if (stricmp(s8_Name, "iso-8859-8-i") == 0) return 38598; + if (stricmp(s8_Name, "iso-8859-9") == 0) return 28599; + if (stricmp(s8_Name, "Johab") == 0) return 1361; + if (stricmp(s8_Name, "koi8-r") == 0) return 20866; + if (stricmp(s8_Name, "koi8-u") == 0) return 21866; + if (stricmp(s8_Name, "ks_c_5601-1987") == 0) return 949; + if (stricmp(s8_Name, "macintosh") == 0) return 10000; + if (stricmp(s8_Name, "unicodeFFFE") == 0) return 1201; + if (stricmp(s8_Name, "us-ascii") == 0) return 20127; + if (stricmp(s8_Name, "utf-16") == 0) return 1200; + if (stricmp(s8_Name, "utf-32") == 0) return 12000; + if (stricmp(s8_Name, "utf-32BE") == 0) return 12001; + if (stricmp(s8_Name, "utf-7") == 0) return 65000; + if (stricmp(s8_Name, "utf-8") == 0) return 65001; + if (stricmp(s8_Name, "windows-1250") == 0) return 1250; + if (stricmp(s8_Name, "windows-1251") == 0) return 1251; + if (stricmp(s8_Name, "Windows-1252") == 0) return 1252; + if (stricmp(s8_Name, "windows-1253") == 0) return 1253; + if (stricmp(s8_Name, "windows-1254") == 0) return 1254; + if (stricmp(s8_Name, "windows-1255") == 0) return 1255; + if (stricmp(s8_Name, "windows-1256") == 0) return 1256; + if (stricmp(s8_Name, "windows-1257") == 0) return 1257; + if (stricmp(s8_Name, "windows-1258") == 0) return 1258; + if (stricmp(s8_Name, "windows-874") == 0) return 874; + if (stricmp(s8_Name, "x-Chinese-CNS") == 0) return 20000; + if (stricmp(s8_Name, "x-Chinese-Eten") == 0) return 20002; + if (stricmp(s8_Name, "x-cp20001") == 0) return 20001; + if (stricmp(s8_Name, "x-cp20003") == 0) return 20003; + if (stricmp(s8_Name, "x-cp20004") == 0) return 20004; + if (stricmp(s8_Name, "x-cp20005") == 0) return 20005; + if (stricmp(s8_Name, "x-cp20261") == 0) return 20261; + if (stricmp(s8_Name, "x-cp20269") == 0) return 20269; + if (stricmp(s8_Name, "x-cp20936") == 0) return 20936; + if (stricmp(s8_Name, "x-cp20949") == 0) return 20949; + if (stricmp(s8_Name, "x-cp50227") == 0) return 50227; + if (stricmp(s8_Name, "x-EBCDIC-KoreanExtended") == 0) return 20833; + if (stricmp(s8_Name, "x-Europa") == 0) return 29001; + if (stricmp(s8_Name, "x-IA5") == 0) return 20105; + if (stricmp(s8_Name, "x-IA5-German") == 0) return 20106; + if (stricmp(s8_Name, "x-IA5-Norwegian") == 0) return 20108; + if (stricmp(s8_Name, "x-IA5-Swedish") == 0) return 20107; + if (stricmp(s8_Name, "x-iscii-as") == 0) return 57006; + if (stricmp(s8_Name, "x-iscii-be") == 0) return 57003; + if (stricmp(s8_Name, "x-iscii-de") == 0) return 57002; + if (stricmp(s8_Name, "x-iscii-gu") == 0) return 57010; + if (stricmp(s8_Name, "x-iscii-ka") == 0) return 57008; + if (stricmp(s8_Name, "x-iscii-ma") == 0) return 57009; + if (stricmp(s8_Name, "x-iscii-or") == 0) return 57007; + if (stricmp(s8_Name, "x-iscii-pa") == 0) return 57011; + if (stricmp(s8_Name, "x-iscii-ta") == 0) return 57004; + if (stricmp(s8_Name, "x-iscii-te") == 0) return 57005; + if (stricmp(s8_Name, "x-mac-arabic") == 0) return 10004; + if (stricmp(s8_Name, "x-mac-ce") == 0) return 10029; + if (stricmp(s8_Name, "x-mac-chinesesimp") == 0) return 10008; + if (stricmp(s8_Name, "x-mac-chinesetrad") == 0) return 10002; + if (stricmp(s8_Name, "x-mac-croatian") == 0) return 10082; + if (stricmp(s8_Name, "x-mac-cyrillic") == 0) return 10007; + if (stricmp(s8_Name, "x-mac-greek") == 0) return 10006; + if (stricmp(s8_Name, "x-mac-hebrew") == 0) return 10005; + if (stricmp(s8_Name, "x-mac-icelandic") == 0) return 10079; + if (stricmp(s8_Name, "x-mac-japanese") == 0) return 10001; + if (stricmp(s8_Name, "x-mac-korean") == 0) return 10003; + if (stricmp(s8_Name, "x-mac-romanian") == 0) return 10010; + if (stricmp(s8_Name, "x-mac-thai") == 0) return 10021; + if (stricmp(s8_Name, "x-mac-turkish") == 0) return 10081; + if (stricmp(s8_Name, "x-mac-ukrainian") == 0) return 10017; + + throw exception(std::string("Unknown charset: ") + s8_Name); + } +}; + + +} // windows +} // platforms +} // vmime + + +#endif // VMIME_PLATFORM_IS_WINDOWS + +#endif // VMIME_PLATFORMS_WINDOWS_CODEPAGES_HPP_INCLUDED