diff options
author | Vincent Richard <[email protected]> | 2015-02-16 17:43:03 +0000 |
---|---|---|
committer | Vincent Richard <[email protected]> | 2015-02-16 17:43:03 +0000 |
commit | c5c66f9fdcd7d9ba4faf6f62cd17d1de112b228e (patch) | |
tree | e907e7f47670b1f66da4573d2432d0db3e768860 /tests | |
parent | Issue #99: replaced C99 VLAs with dynamic array using std::vector. (diff) | |
download | vmime-c5c66f9fdcd7d9ba4faf6f62cd17d1de112b228e.tar.gz vmime-c5c66f9fdcd7d9ba4faf6f62cd17d1de112b228e.zip |
Issue #103: fix badly encoded words.
Diffstat (limited to 'tests')
-rw-r--r-- | tests/parser/charsetTest.cpp | 105 | ||||
-rw-r--r-- | tests/parser/textTest.cpp | 46 |
2 files changed, 149 insertions, 2 deletions
diff --git a/tests/parser/charsetTest.cpp b/tests/parser/charsetTest.cpp index 915b8560..e599c5b5 100644 --- a/tests/parser/charsetTest.cpp +++ b/tests/parser/charsetTest.cpp @@ -21,6 +21,8 @@ // the GNU General Public License cover the whole combination. // +#include <algorithm> + #include "tests/testUtils.hpp" #include "charsetTestSuites.hpp" @@ -39,6 +41,14 @@ VMIME_TEST_SUITE_BEGIN(charsetTest) VMIME_TEST(testDecodeIDNA) VMIME_TEST(testUTF7Support) + + VMIME_TEST(testReplaceInvalidSequence) + VMIME_TEST(testStopOnInvalidSequence) + + VMIME_TEST(testStatus) + VMIME_TEST(testStatusWithInvalidSequence) + + VMIME_TEST(testIsValidText) VMIME_TEST_LIST_END @@ -106,10 +116,15 @@ VMIME_TEST_SUITE_BEGIN(charsetTest) } static const vmime::string convertHelper - (const vmime::string& in, const vmime::charset& csrc, const vmime::charset& cdest) + (const vmime::string& in, const vmime::charset& csrc, const vmime::charset& cdest, + const vmime::charsetConverterOptions& opts = vmime::charsetConverterOptions(), + vmime::charsetConverter::status* st = NULL) { + vmime::shared_ptr <vmime::charsetConverter> conv = + vmime::charsetConverter::create(csrc, cdest, opts); + vmime::string out; - vmime::charset::convert(in, out, csrc, cdest); + conv->convert(in, out, st); return out; } @@ -145,5 +160,91 @@ VMIME_TEST_SUITE_BEGIN(charsetTest) VASSERT_EQ("2", "f+APg-o", convertHelper("\x66\xc3\xb8\x6f", "utf-8", "utf-7")); } + void testReplaceInvalidSequence() + { + vmime::charsetConverterOptions opts; + opts.silentlyReplaceInvalidSequences = true; + opts.invalidSequence = "?"; + + vmime::string res = convertHelper + ("\x61\xf1\x80\x80\xe1\x80\xc2\x62\x80\x63\x80\xbf\x64", "utf-8", "iso-8859-1", opts); + + // Result should be in the form "a???b?c??d" or "a??????b?c??d"... + // Remove consecutive question marks for easier matching. + res.erase(std::unique(res.begin(), res.end()), res.end()); + + VASSERT_EQ( + "Illegal UTF-8 sequence", + "a?b?c?d", + res + ); + } + + void testStopOnInvalidSequence() + { + vmime::charsetConverterOptions opts; + opts.silentlyReplaceInvalidSequences = false; + + VASSERT_THROW( + "Illegal UTF-8 sequence", + convertHelper("\x61\xf1\x80\x80\xe1\x80\xc2\x62\x80\x63\x80\xbf\x64", "utf-8", "iso-8859-1", opts), + vmime::exceptions::illegal_byte_sequence_for_charset + ); + } + + void testStatus() + { + vmime::charsetConverterOptions opts; + opts.silentlyReplaceInvalidSequences = false; + + vmime::charsetConverter::status st; + + // 012345 6 7 + convertHelper("Gwena\xc3\xabl", "utf-8", "iso-8859-1", opts, &st); + + VASSERT_EQ("inputBytesRead", 8, st.inputBytesRead); + VASSERT_EQ("outputBytesWritten", 7, st.outputBytesWritten); + } + + void testStatusWithInvalidSequence() + { + vmime::charsetConverterOptions opts; + opts.silentlyReplaceInvalidSequences = false; + + vmime::charsetConverter::status st; + + try + { + // 01234 5 6789 0 1 + convertHelper("Fran\xc3\xa7ois\xf1\x80\x65", "utf-8", "iso-8859-1", opts, &st); + } + catch (vmime::exceptions::illegal_byte_sequence_for_charset& e) + { + } + catch (...) + { + throw; + } + + VASSERT_EQ("inputBytesRead", 9, st.inputBytesRead); + VASSERT_EQ("outputBytesWritten", 8, st.outputBytesWritten); + } + + void testIsValidText() + { + // Invalid text + const vmime::string invalidText("Fran\xc3\xa7ois\xf1\x80\x65"); + vmime::string::size_type firstInvalidByte; + + VASSERT_EQ("invalid.isValidText", false, vmime::charset("utf-8").isValidText(invalidText, &firstInvalidByte)); + VASSERT_EQ("invalid.firstInvalidByte", 9, firstInvalidByte); + + // Valid text + const vmime::string validText("Gwena\xc3\xabl"); + + VASSERT_EQ("valid.isValidText", true, vmime::charset("utf-8").isValidText(validText, &firstInvalidByte)); + VASSERT_EQ("valid.firstInvalidByte", 8, firstInvalidByte); + } + VMIME_TEST_SUITE_END diff --git a/tests/parser/textTest.cpp b/tests/parser/textTest.cpp index 588dc194..978d9145 100644 --- a/tests/parser/textTest.cpp +++ b/tests/parser/textTest.cpp @@ -61,6 +61,7 @@ VMIME_TEST_SUITE_BEGIN(textTest) VMIME_TEST(testInternationalizedEmail_folding) VMIME_TEST(testWronglyPaddedB64Words) + VMIME_TEST(testFixBrokenWords) VMIME_TEST_LIST_END @@ -617,5 +618,50 @@ VMIME_TEST_SUITE_BEGIN(textTest) outText.getConvertedText(vmime::charset("utf-8"))); } + // Ensure that words which encode a non-integral number of characters + // are correctly decoded. + void testFixBrokenWords() + { + vmime::text outText; + + vmime::charsetConverterOptions opts; + opts.silentlyReplaceInvalidSequences = false; // just to be sure that broken words are actually fixed + + // Test case 1 + vmime::text::decodeAndUnfold + ("=?utf-8?Q?Gwena=C3?=" + "=?utf-8?Q?=ABl?=", &outText); + + VASSERT_EQ("1", "Gwena\xebl", + outText.getConvertedText(vmime::charset("iso-8859-1"), opts)); + + // Test case 2 + vmime::text::decodeAndUnfold + ("=?utf-8?B?5Lit6Yu85qmf5qKw6JGj5LqL5pyDMTAz5bm056ysMDXlsYbn?=" + "=?utf-8?B?rKwwN+asoeitsOeoiw==?=", &outText); + + VASSERT_EQ("2", "\xe4\xb8\xad\xe9\x8b\xbc\xe6\xa9\x9f\xe6\xa2\xb0" + "\xe8\x91\xa3\xe4\xba\x8b\xe6\x9c\x83\x31\x30\x33\xe5\xb9\xb4" + "\xe7\xac\xac\x30\x35\xe5\xb1\x86\xe7\xac\xac\x30\x37\xe6\xac" + "\xa1\xe8\xad\xb0\xe7\xa8\x8b", + outText.getConvertedText(vmime::charset("utf-8"))); + + // Test case 3 (a character spanning over 3 words: 'を' = E3 82 92) + vmime::text::decodeAndUnfold + ("=?utf-8?Q?abc=E3?=" + "=?utf-8?Q?=82?=" + "=?utf-8?Q?=92xyz?=", &outText); + + std::string out; // decode as UTF-16 then rencode to UTF-8 for easier comparison + vmime::charset::convert( + outText.getConvertedText(vmime::charset("utf-16"), opts), + out, + vmime::charset("utf-16"), + vmime::charset("utf-8") + ); + + VASSERT_EQ("3", "abc\xe3\x82\x92xyz", out); + } + VMIME_TEST_SUITE_END |