vmime/tests/parser/charsetTest.cpp

280 lines
8.4 KiB
C++
Raw Normal View History

2005-10-19 21:00:26 +00:00
//
// VMime library (http://www.vmime.org)
2018-09-05 21:54:48 +00:00
// Copyright (C) 2002 Vincent Richard <vincent@vmime.org>
2005-10-19 21:00:26 +00:00
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 3 of
2005-10-19 21:00:26 +00:00
// the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this program; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Linking this library statically or dynamically with other modules is making
// a combined work based on this library. Thus, the terms and conditions of
// the GNU General Public License cover the whole combination.
//
2015-02-16 17:43:03 +00:00
#include <algorithm>
2005-10-19 21:00:26 +00:00
#include "tests/testUtils.hpp"
#include "charsetTestSuites.hpp"
2005-10-19 21:00:26 +00:00
2013-03-08 07:19:50 +00:00
VMIME_TEST_SUITE_BEGIN(charsetTest)
2005-10-19 21:00:26 +00:00
VMIME_TEST_LIST_BEGIN
// Test valid input
VMIME_TEST(testConvertStringValid)
VMIME_TEST(testConvertStreamValid)
2021-05-15 20:32:24 +00:00
VMIME_TEST(testConvertStreamExtract)
VMIME_TEST(testEncodingHebrew1255)
2005-10-19 21:00:26 +00:00
// IDNA
VMIME_TEST(testEncodeIDNA)
VMIME_TEST(testDecodeIDNA)
VMIME_TEST(testUTF7Support)
2015-02-16 17:43:03 +00:00
VMIME_TEST(testReplaceInvalidSequence)
VMIME_TEST(testStopOnInvalidSequence)
VMIME_TEST(testStatus)
VMIME_TEST(testStatusWithInvalidSequence)
VMIME_TEST(testIsValidText)
2005-10-19 21:00:26 +00:00
VMIME_TEST_LIST_END
2018-09-05 21:54:48 +00:00
void testConvertStringValid() {
for (unsigned int i = 0 ; i < charsetTestSuitesCount ; ++i) {
const charsetTestSuiteStruct& entry = charsetTestSuites[i];
std::ostringstream testName;
testName << i << ": " << entry.fromCharset << " -> " << entry.toCharset;
2005-10-19 21:00:26 +00:00
2013-03-24 11:30:26 +00:00
const unsigned long inLength = (entry.fromLength == 0 ? strlen(entry.fromBytes) : entry.fromLength);
vmime::string in(entry.fromBytes, entry.fromBytes + inLength);
2005-10-19 21:00:26 +00:00
2013-03-24 11:30:26 +00:00
const unsigned long outLength = (entry.toLength == 0 ? strlen(entry.toBytes) : entry.toLength);
vmime::string expectedOut(entry.toBytes, entry.toBytes + outLength);
2005-10-19 21:00:26 +00:00
vmime::string actualOut;
2005-10-19 21:00:26 +00:00
vmime::charset::convert
(in, actualOut, entry.fromCharset, entry.toCharset);
VASSERT_EQ(testName.str(), toHex(expectedOut), toHex(actualOut));
}
}
2018-09-05 21:54:48 +00:00
void testConvertStreamValid() {
for (unsigned int i = 0 ; i < charsetTestSuitesCount ; ++i) {
const charsetTestSuiteStruct& entry = charsetTestSuites[i];
std::ostringstream testName;
testName << i << ": " << entry.fromCharset << " -> " << entry.toCharset;
2013-03-24 11:30:26 +00:00
const unsigned long inLength = (entry.fromLength == 0 ? strlen(entry.fromBytes) : entry.fromLength);
vmime::string in(entry.fromBytes, entry.fromBytes + inLength);
2013-03-24 11:30:26 +00:00
const unsigned long outLength = (entry.toLength == 0 ? strlen(entry.toBytes) : entry.toLength);
vmime::string expectedOut(entry.toBytes, entry.toBytes + outLength);
vmime::string actualOut;
vmime::utility::outputStreamStringAdapter os(actualOut);
vmime::utility::inputStreamStringAdapter is(in);
2018-09-05 21:54:48 +00:00
vmime::charset::convert(is, os, entry.fromCharset, entry.toCharset);
os.flush();
VASSERT_EQ(testName.str(), toHex(expectedOut), toHex(actualOut));
}
2005-10-19 21:00:26 +00:00
}
2021-05-15 20:32:24 +00:00
void testConvertStreamExtract() {
vmime::bodyPart p;
p.getBody()->setContents(
vmime::make_shared <vmime::stringContentHandler>(
"Foo éé\r\né bar\r\nbaz"
),
vmime::mediaType("text", "plain"),
vmime::charset("utf-8"),
vmime::encoding("quoted-printable")
);
vmime::string str;
vmime::utility::outputStreamStringAdapter outStr(str);
vmime::shared_ptr <vmime::charsetConverter> conv = vmime::charsetConverter::create(p.getBody()->getCharset(), vmime::charset("US-ASCII"));
auto filteredStream = conv->getFilteredOutputStream(outStr);
p.getBody()->getContents()->extract(*filteredStream);
filteredStream->flush();
VASSERT_EQ(
"generate",
"Foo ??\r\n"
"? bar\r\n"
"baz",
str
);
}
2018-09-05 21:54:48 +00:00
void testEncodingHebrew1255() {
// hewbrew string in windows-1255 charset
const char data[] = "\xe9\xf9\xf7\xf8\xe9\xf9\xf8\xf7\xe9\xe9\xf9";
vmime::word w = vmime::word(data, "windows-1255");
vmime::string encoded = w.generate();
// less than 60% ascii, base64 received
VASSERT_EQ("1", "=?windows-1255?B?6fn3+On5+Pfp6fk=?=", encoded);
}
2005-10-19 21:00:26 +00:00
2018-09-05 21:54:48 +00:00
static const vmime::string convertHelper(
const vmime::string& in,
const vmime::charset& csrc,
const vmime::charset& cdest,
const vmime::charsetConverterOptions& opts = vmime::charsetConverterOptions(),
vmime::charsetConverter::status* st = NULL
) {
2015-02-16 17:43:03 +00:00
vmime::shared_ptr <vmime::charsetConverter> conv =
vmime::charsetConverter::create(csrc, cdest, opts);
vmime::string out;
2015-02-16 17:43:03 +00:00
conv->convert(in, out, st);
return out;
}
2018-09-05 21:54:48 +00:00
void testEncodeIDNA() {
VASSERT_EQ("1", "xn--espaol-zwa", convertHelper("español", "utf-8", "idna"));
// Tests from ICANN
VASSERT_EQ("2.1", "xn--hxajbheg2az3al", convertHelper("παράδειγμα", "utf-8", "idna"));
VASSERT_EQ("2.2", "xn--jxalpdlp", convertHelper("δοκιμή", "utf-8", "idna"));
VASSERT_EQ("3.1", "xn--mgbh0fb", convertHelper("مثال", "utf-8", "idna"));
VASSERT_EQ("3.2", "xn--kgbechtv", convertHelper("إختبار", "utf-8", "idna"));
}
2018-09-05 21:54:48 +00:00
void testDecodeIDNA() {
VASSERT_EQ("1", "español", convertHelper("xn--espaol-zwa", "idna", "utf-8"));
// Tests from ICANN
VASSERT_EQ("2.1", "παράδειγμα", convertHelper("xn--hxajbheg2az3al", "idna", "utf-8"));
VASSERT_EQ("2.2", "δοκιμή", convertHelper("xn--jxalpdlp", "idna", "utf-8"));
VASSERT_EQ("3.1", "مثال", convertHelper("xn--mgbh0fb", "idna", "utf-8"));
VASSERT_EQ("3.2", "إختبار", convertHelper("xn--kgbechtv", "idna", "utf-8"));
}
2018-09-05 21:54:48 +00:00
void testUTF7Support() {
// Ensure UTF-7 is supported, because it is used for IMAP
VASSERT_EQ("1", "VMime +- UTF-7 encoding", convertHelper("VMime + UTF-7 encoding", "utf-8", "utf-7"));
VASSERT_EQ("2", "f+APg-o", convertHelper("\x66\xc3\xb8\x6f", "utf-8", "utf-7"));
}
2018-09-05 21:54:48 +00:00
void testReplaceInvalidSequence() {
2015-02-16 17:43:03 +00:00
vmime::charsetConverterOptions opts;
opts.silentlyReplaceInvalidSequences = true;
opts.invalidSequence = "?";
2018-09-05 21:54:48 +00:00
vmime::string res = convertHelper(
"\x61\xf1\x80\x80\xe1\x80\xc2\x62\x80\x63\x80\xbf\x64",
"utf-8", "iso-8859-1", opts
);
2015-02-16 17:43:03 +00:00
// Result should be in the form "a???b?c??d" or "a??????b?c??d"...
// Remove consecutive question marks for easier matching.
res.erase(std::unique(res.begin(), res.end()), res.end());
VASSERT_EQ(
"Illegal UTF-8 sequence",
"a?b?c?d",
res
);
}
2018-09-05 21:54:48 +00:00
void testStopOnInvalidSequence() {
2015-02-16 17:43:03 +00:00
vmime::charsetConverterOptions opts;
opts.silentlyReplaceInvalidSequences = false;
VASSERT_THROW(
"Illegal UTF-8 sequence",
convertHelper("\x61\xf1\x80\x80\xe1\x80\xc2\x62\x80\x63\x80\xbf\x64", "utf-8", "iso-8859-1", opts),
vmime::exceptions::illegal_byte_sequence_for_charset
);
}
2018-09-05 21:54:48 +00:00
void testStatus() {
2015-02-16 17:43:03 +00:00
vmime::charsetConverterOptions opts;
opts.silentlyReplaceInvalidSequences = false;
vmime::charsetConverter::status st;
// 012345 6 7
convertHelper("Gwena\xc3\xabl", "utf-8", "iso-8859-1", opts, &st);
VASSERT_EQ("inputBytesRead", 8, st.inputBytesRead);
VASSERT_EQ("outputBytesWritten", 7, st.outputBytesWritten);
}
2018-09-05 21:54:48 +00:00
void testStatusWithInvalidSequence() {
2015-02-16 17:43:03 +00:00
vmime::charsetConverterOptions opts;
opts.silentlyReplaceInvalidSequences = false;
vmime::charsetConverter::status st;
2018-09-05 21:54:48 +00:00
try {
2015-02-16 17:43:03 +00:00
// 01234 5 6789 0 1
convertHelper("Fran\xc3\xa7ois\xf1\x80\x65", "utf-8", "iso-8859-1", opts, &st);
2018-09-05 21:54:48 +00:00
} catch (vmime::exceptions::illegal_byte_sequence_for_charset& e) {
} catch (...) {
2015-02-16 17:43:03 +00:00
throw;
}
VASSERT_EQ("inputBytesRead", 9, st.inputBytesRead);
VASSERT_EQ("outputBytesWritten", 8, st.outputBytesWritten);
}
2018-09-05 21:54:48 +00:00
void testIsValidText() {
2015-02-16 17:43:03 +00:00
// Invalid text
const vmime::string invalidText("Fran\xc3\xa7ois\xf1\x80\x65");
vmime::string::size_type firstInvalidByte;
VASSERT_EQ("invalid.isValidText", false, vmime::charset("utf-8").isValidText(invalidText, &firstInvalidByte));
VASSERT_EQ("invalid.firstInvalidByte", 9, firstInvalidByte);
// Valid text
const vmime::string validText("Gwena\xc3\xabl");
VASSERT_EQ("valid.isValidText", true, vmime::charset("utf-8").isValidText(validText, &firstInvalidByte));
VASSERT_EQ("valid.firstInvalidByte", 8, firstInvalidByte);
}
2005-10-19 21:00:26 +00:00
VMIME_TEST_SUITE_END