From 0c5d4a10e6f616f5a63787b8fbda86ec9fc487a9 Mon Sep 17 00:00:00 2001 From: Vincent Richard Date: Sun, 24 Feb 2013 16:28:13 +0100 Subject: [PATCH] Message generation/parsing context. Charset conversion options. Preliminary implementation of RFC-6532. --- SConstruct | 9 +- contrib/punycode/punycode.c | 263 +++++++++ contrib/punycode/punycode.h | 84 +++ contrib/utf8/utf8.h | 34 ++ contrib/utf8/utf8/checked.h | 327 +++++++++++ contrib/utf8/utf8/core.h | 326 +++++++++++ contrib/utf8/utf8/unchecked.h | 228 ++++++++ src/address.cpp | 7 +- src/addressList.cpp | 17 +- src/base.cpp | 6 +- src/body.cpp | 22 +- src/bodyPart.cpp | 16 +- src/charset.cpp | 30 +- src/charsetConverter.cpp | 393 +------------- src/charsetConverterOptions.cpp | 37 ++ src/charsetConverter_iconv.cpp | 435 +++++++++++++++ src/charsetConverter_idna.cpp | 168 ++++++ src/component.cpp | 65 ++- src/constants.cpp | 2 + src/contentDisposition.cpp | 10 +- src/context.cpp | 87 +++ src/dateTime.cpp | 10 +- src/disposition.cpp | 12 +- src/emailAddress.cpp | 513 ++++++++++++++++++ src/encoding.cpp | 10 +- src/generationContext.cpp | 109 ++++ src/header.cpp | 14 +- src/headerField.cpp | 21 +- src/mailbox.cpp | 77 ++- src/mailboxField.cpp | 7 +- src/mailboxGroup.cpp | 21 +- src/mailboxList.cpp | 11 +- src/mdn/MDNHelper.cpp | 2 +- src/mediaType.cpp | 12 +- src/message.cpp | 30 +- src/messageId.cpp | 19 +- src/messageIdSequence.cpp | 17 +- src/net/sendmail/sendmailTransport.cpp | 4 +- src/net/smtp/SMTPCommand.cpp | 15 +- src/parameter.cpp | 28 +- src/parameterizedHeaderField.cpp | 18 +- src/{options.cpp => parsingContext.cpp} | 35 +- src/path.cpp | 10 +- src/relay.cpp | 20 +- src/text.cpp | 45 +- src/utility/stringUtils.cpp | 33 ++ src/word.cpp | 98 ++-- src/wordEncoder.cpp | 28 +- tests/parser/charsetTest.cpp | 88 ++- tests/parser/emailAddressTest.cpp | 224 ++++++++ tests/parser/mailboxTest.cpp | 2 +- tests/parser/textTest.cpp | 79 ++- tests/testUtils.hpp | 17 +- vmime/address.hpp | 6 +- vmime/addressList.hpp | 7 +- vmime/body.hpp | 7 +- vmime/bodyPart.hpp | 7 +- vmime/charset.hpp | 18 +- vmime/charsetConverter.hpp | 103 ++-- ...ptions.hpp => charsetConverterOptions.hpp} | 62 +-- vmime/charsetConverter_iconv.hpp | 124 +++++ vmime/charsetConverter_idna.hpp | 70 +++ vmime/component.hpp | 73 ++- vmime/constants.hpp | 2 + vmime/contentDisposition.hpp | 7 +- vmime/context.hpp | 122 +++++ vmime/dateTime.hpp | 7 +- vmime/disposition.hpp | 7 +- vmime/emailAddress.hpp | 121 +++++ vmime/encoding.hpp | 7 +- vmime/generationContext.hpp | 106 ++++ vmime/header.hpp | 7 +- vmime/headerField.hpp | 10 +- vmime/mailbox.hpp | 18 +- vmime/mailboxField.hpp | 4 +- vmime/mailboxGroup.hpp | 7 +- vmime/mailboxList.hpp | 7 +- vmime/mediaType.hpp | 7 +- vmime/message.hpp | 20 +- vmime/messageId.hpp | 10 +- vmime/messageIdSequence.hpp | 7 +- vmime/parameter.hpp | 9 +- vmime/parameterizedHeaderField.hpp | 7 +- vmime/parsingContext.hpp | 59 ++ vmime/path.hpp | 7 +- vmime/relay.hpp | 7 +- vmime/text.hpp | 39 +- vmime/utility/stringUtils.hpp | 27 + vmime/vmime.hpp | 5 +- vmime/word.hpp | 22 +- vmime/wordEncoder.hpp | 3 +- 91 files changed, 4374 insertions(+), 959 deletions(-) create mode 100644 contrib/punycode/punycode.c create mode 100644 contrib/punycode/punycode.h create mode 100644 contrib/utf8/utf8.h create mode 100644 contrib/utf8/utf8/checked.h create mode 100644 contrib/utf8/utf8/core.h create mode 100644 contrib/utf8/utf8/unchecked.h create mode 100644 src/charsetConverterOptions.cpp create mode 100644 src/charsetConverter_iconv.cpp create mode 100644 src/charsetConverter_idna.cpp create mode 100644 src/context.cpp create mode 100644 src/emailAddress.cpp create mode 100644 src/generationContext.cpp rename src/{options.cpp => parsingContext.cpp} (60%) create mode 100644 tests/parser/emailAddressTest.cpp rename vmime/{options.hpp => charsetConverterOptions.hpp} (53%) create mode 100644 vmime/charsetConverter_iconv.hpp create mode 100644 vmime/charsetConverter_idna.hpp create mode 100644 vmime/context.hpp create mode 100644 vmime/emailAddress.hpp create mode 100644 vmime/generationContext.hpp create mode 100644 vmime/parsingContext.hpp diff --git a/SConstruct b/SConstruct index bc277263..3f89f673 100644 --- a/SConstruct +++ b/SConstruct @@ -56,20 +56,26 @@ libvmime_sources = [ 'bodyPartAttachment.cpp', 'bodyPartAttachment.hpp', 'charset.cpp', 'charset.hpp', 'charsetConverter.cpp', 'charsetConverter.hpp', + 'charsetConverter_iconv.cpp', 'charsetConverter_iconv.hpp', + 'charsetConverter_idna.cpp', 'charsetConverter_idna.hpp', + 'charsetConverterOptions.cpp', 'charsetConverterOptions.hpp', 'component.cpp', 'component.hpp', 'constants.cpp', 'constants.hpp', 'contentDisposition.cpp', 'contentDisposition.hpp', 'contentDispositionField.cpp', 'contentDispositionField.hpp', 'contentHandler.cpp', 'contentHandler.hpp', 'contentTypeField.cpp', 'contentTypeField.hpp', + 'context.hpp', 'context.cpp', 'dateTime.cpp', 'dateTime.hpp', 'defaultAttachment.cpp', 'defaultAttachment.hpp', 'disposition.cpp', 'disposition.hpp', + 'emailAddress.cpp', 'emailAddress.hpp', 'emptyContentHandler.cpp', 'emptyContentHandler.hpp', 'encoding.cpp', 'encoding.hpp', 'exception.cpp', 'exception.hpp', 'fileAttachment.cpp', 'fileAttachment.hpp', 'generatedMessageAttachment.hpp', 'generatedMessageAttachment.cpp', + 'generationContext.hpp', 'generationContext.cpp', 'header.cpp', 'header.hpp', 'headerFieldFactory.cpp', 'headerFieldFactory.hpp', 'headerField.cpp', 'headerField.hpp', @@ -87,12 +93,12 @@ libvmime_sources = [ 'messageIdSequence.cpp', 'messageIdSequence.hpp', 'messageParser.cpp', 'messageParser.hpp', 'object.cpp', 'object.hpp', - 'options.cpp', 'options.hpp', 'path.cpp', 'path.hpp', 'parameter.cpp', 'parameter.hpp', 'parameterizedHeaderField.cpp', 'parameterizedHeaderField.hpp', 'parsedMessageAttachment.cpp', 'parsedMessageAttachment.hpp', 'parserHelpers.hpp', + 'parsingContext.hpp', 'parsingContext.cpp', 'plainTextPart.cpp', 'plainTextPart.hpp', 'platform.cpp', 'platform.hpp', 'propertySet.cpp', 'propertySet.hpp', @@ -352,6 +358,7 @@ libvmimetest_sources = [ 'tests/parser/charsetTest.cpp', 'tests/parser/datetimeTest.cpp', 'tests/parser/dispositionTest.cpp', + 'tests/parser/emailAddressTest.cpp', 'tests/parser/headerTest.cpp', 'tests/parser/htmlTextPartTest.cpp', 'tests/parser/mailboxTest.cpp', diff --git a/contrib/punycode/punycode.c b/contrib/punycode/punycode.c new file mode 100644 index 00000000..0650d5da --- /dev/null +++ b/contrib/punycode/punycode.c @@ -0,0 +1,263 @@ +/* +punycode.c from RFC 3492 +http://www.nicemice.net/idn/ +Adam M. Costello +http://www.nicemice.net/amc/ + +This is ANSI C code (C89) implementing Punycode (RFC 3492). + +*/ + +#include + +/*** Bootstring parameters for Punycode ***/ + +enum { base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700, + initial_bias = 72, initial_n = 0x80, delimiter = 0x2D }; + +/* basic(cp) tests whether cp is a basic code point: */ +#define basic(cp) ((punycode_uint)(cp) < 0x80) + +/* delim(cp) tests whether cp is a delimiter: */ +#define delim(cp) ((cp) == delimiter) + +/* decode_digit(cp) returns the numeric value of a basic code */ +/* point (for use in representing integers) in the range 0 to */ +/* base-1, or base if cp is does not represent a value. */ + +static punycode_uint decode_digit(punycode_uint cp) +{ + return cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : + cp - 97 < 26 ? cp - 97 : (punycode_uint) base; +} + +/* encode_digit(d,flag) returns the basic code point whose value */ +/* (when used for representing integers) is d, which needs to be in */ +/* the range 0 to base-1. The lowercase form is used unless flag is */ +/* nonzero, in which case the uppercase form is used. The behavior */ +/* is undefined if flag is nonzero and digit d has no uppercase form. */ + +static char encode_digit(punycode_uint d, int flag) +{ + return d + 22 + 75 * (d < 26) - ((flag != 0) << 5); + /* 0..25 map to ASCII a..z or A..Z */ + /* 26..35 map to ASCII 0..9 */ +} + +/* flagged(bcp) tests whether a basic code point is flagged */ +/* (uppercase). The behavior is undefined if bcp is not a */ +/* basic code point. */ + +#define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26) + +/* encode_basic(bcp,flag) forces a basic code point to lowercase */ +/* if flag is zero, uppercase if flag is nonzero, and returns */ +/* the resulting code point. The code point is unchanged if it */ +/* is caseless. The behavior is undefined if bcp is not a basic */ +/* code point. */ + +static char encode_basic(punycode_uint bcp, int flag) +{ + bcp -= (bcp - 97 < 26) << 5; + return bcp + ((!flag && (bcp - 65 < 26)) << 5); +} + +/*** Platform-specific constants ***/ + +/* maxint is the maximum value of a punycode_uint variable: */ +static const punycode_uint maxint = -1U; +/* Because maxint is unsigned, -1 becomes the maximum value. */ + +/*** Bias adaptation function ***/ + +static punycode_uint adapt( + punycode_uint delta, punycode_uint numpoints, int firsttime ) +{ + punycode_uint k; + + delta = firsttime ? delta / damp : delta >> 1; + /* delta >> 1 is a faster way of doing delta / 2 */ + delta += delta / numpoints; + + for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) { + delta /= base - tmin; + } + + return k + (base - tmin + 1) * delta / (delta + skew); +} + +/*** Main encode function ***/ + +enum punycode_status punycode_encode( + punycode_uint input_length, + const punycode_uint input[], + const unsigned char case_flags[], + punycode_uint *output_length, + char output[] ) +{ + punycode_uint n, delta, h, b, out, max_out, bias, j, m, q, k, t; + + /* Initialize the state: */ + + n = initial_n; + delta = out = 0; + max_out = *output_length; + bias = initial_bias; + + /* Handle the basic code points: */ + + for (j = 0; j < input_length; ++j) { + if (basic(input[j])) { + if (max_out - out < 2) return punycode_big_output; + output[out++] = + case_flags ? encode_basic(input[j], case_flags[j]) : input[j]; + } + /* else if (input[j] < n) return punycode_bad_input; */ + /* (not needed for Punycode with unsigned code points) */ + } + + h = b = out; + + /* h is the number of code points that have been handled, b is the */ + /* number of basic code points, and out is the number of characters */ + /* that have been output. */ + + if (b > 0) output[out++] = delimiter; + + /* Main encoding loop: */ + + while (h < input_length) { + /* All non-basic code points < n have been */ + /* handled already. Find the next larger one: */ + + for (m = maxint, j = 0; j < input_length; ++j) { + /* if (basic(input[j])) continue; */ + /* (not needed for Punycode) */ + if (input[j] >= n && input[j] < m) m = input[j]; + } + + /* Increase delta enough to advance the decoder's */ + /* state to , but guard against overflow: */ + + if (m - n > (maxint - delta) / (h + 1)) return punycode_overflow; + delta += (m - n) * (h + 1); + n = m; + + for (j = 0; j < input_length; ++j) { + /* Punycode does not need to check whether input[j] is basic: */ + if (input[j] < n /* || basic(input[j]) */ ) { + if (++delta == 0) return punycode_overflow; + } + + if (input[j] == n) { + /* Represent delta as a generalized variable-length integer: */ + + for (q = delta, k = base; ; k += base) { + if (out >= max_out) return punycode_big_output; + t = k <= bias /* + tmin */ ? (punycode_uint) tmin : /* +tmin not needed */ + k >= (punycode_uint) bias + (punycode_uint) tmax ? (punycode_uint) tmax : k - (punycode_uint) bias; + if (q < t) break; + output[out++] = encode_digit(t + (q - t) % (base - t), 0); + q = (q - t) / (base - t); + } + + output[out++] = encode_digit(q, case_flags && case_flags[j]); + bias = adapt(delta, h + 1, h == b); + delta = 0; + ++h; + } + } + + ++delta, ++n; + } + + *output_length = out; + return punycode_success; +} + +/*** Main decode function ***/ + +enum punycode_status punycode_decode( + punycode_uint input_length, + const char input[], + punycode_uint *output_length, + punycode_uint output[], + unsigned char case_flags[] ) +{ + punycode_uint n, out, i, max_out, bias, + b, j, in, oldi, w, k, digit, t; + + /* Initialize the state: */ + + n = initial_n; + out = i = 0; + max_out = *output_length; + bias = initial_bias; + + /* Handle the basic code points: Let b be the number of input code */ + /* points before the last delimiter, or 0 if there is none, then */ + /* copy the first b code points to the output. */ + + for (b = j = 0; j < input_length; ++j) if (delim(input[j])) b = j; + if (b > max_out) return punycode_big_output; + + for (j = 0; j < b; ++j) { + if (case_flags) case_flags[out] = flagged(input[j]); + if (!basic(input[j])) return punycode_bad_input; + output[out++] = input[j]; + } + + /* Main decoding loop: Start just after the last delimiter if any */ + /* basic code points were copied; start at the beginning otherwise. */ + + for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) { + + /* in is the index of the next character to be consumed, and */ + /* out is the number of code points in the output array. */ + + /* Decode a generalized variable-length integer into delta, */ + /* which gets added to i. The overflow checking is easier */ + /* if we increase i as we go, then subtract off its starting */ + /* value at the end to obtain delta. */ + + for (oldi = i, w = 1, k = base; ; k += base) { + if (in >= input_length) return punycode_bad_input; + digit = decode_digit(input[in++]); + if (digit >= base) return punycode_bad_input; + if (digit > (maxint - i) / w) return punycode_overflow; + i += digit * w; + t = k <= (punycode_uint) bias /* + tmin */ ? (punycode_uint) tmin : /* +tmin not needed */ + k >= (punycode_uint) bias + (punycode_uint) tmax ? (punycode_uint) tmax : k - (punycode_uint) bias; + if (digit < t) break; + if (w > maxint / (base - t)) return punycode_overflow; + w *= (base - t); + } + + bias = adapt(i - oldi, out + 1, oldi == 0); + + /* i was supposed to wrap around from out+1 to 0, */ + /* incrementing n each time, so we'll fix that now: */ + + if (i / (out + 1) > maxint - n) return punycode_overflow; + n += i / (out + 1); + i %= (out + 1); + + /* Insert n at position i of the output: */ + + /* not needed for Punycode: */ + /* if (decode_digit(n) <= base) return punycode_invalid_input; */ + if (out >= max_out) return punycode_big_output; + + if (case_flags) { + memmove(case_flags + i + 1, case_flags + i, out - i); + /* Case of last character determines uppercase flag: */ + case_flags[i] = flagged(input[in - 1]); + } + + memmove(output + i + 1, output + i, (out - i) * sizeof *output); + output[i++] = n; + } + + *output_length = out; + return punycode_success; +} diff --git a/contrib/punycode/punycode.h b/contrib/punycode/punycode.h new file mode 100644 index 00000000..fb02ee96 --- /dev/null +++ b/contrib/punycode/punycode.h @@ -0,0 +1,84 @@ +/* +punycode.h from RFC 3492 +http://www.nicemice.net/idn/ +Adam M. Costello +http://www.nicemice.net/amc/ + +This is ANSI C code (C89) implementing Punycode (RFC 3492). + +*/ + +#include + +enum punycode_status { + punycode_success, + punycode_bad_input, /* Input is invalid. */ + punycode_big_output, /* Output would exceed the space provided. */ + punycode_overflow /* Input needs wider integers to process. */ +}; + +#if UINT_MAX >= (1 << 26) - 1 +typedef unsigned int punycode_uint; +#else +typedef unsigned long punycode_uint; +#endif + +enum punycode_status punycode_encode( + punycode_uint input_length, + const punycode_uint input[], + const unsigned char case_flags[], + punycode_uint *output_length, + char output[] ); + + /* punycode_encode() converts Unicode to Punycode. The input */ + /* is represented as an array of Unicode code points (not code */ + /* units; surrogate pairs are not allowed), and the output */ + /* will be represented as an array of ASCII code points. The */ + /* output string is *not* null-terminated; it will contain */ + /* zeros if and only if the input contains zeros. (Of course */ + /* the caller can leave room for a terminator and add one if */ + /* needed.) The input_length is the number of code points in */ + /* the input. The output_length is an in/out argument: the */ + /* caller passes in the maximum number of code points that it */ + /* can receive, and on successful return it will contain the */ + /* number of code points actually output. The case_flags array */ + /* holds input_length boolean values, where nonzero suggests that */ + /* the corresponding Unicode character be forced to uppercase */ + /* after being decoded (if possible), and zero suggests that */ + /* it be forced to lowercase (if possible). ASCII code points */ + /* are encoded literally, except that ASCII letters are forced */ + /* to uppercase or lowercase according to the corresponding */ + /* uppercase flags. If case_flags is a null pointer then ASCII */ + /* letters are left as they are, and other code points are */ + /* treated as if their uppercase flags were zero. The return */ + /* value can be any of the punycode_status values defined above */ + /* except punycode_bad_input; if not punycode_success, then */ + /* output_size and output might contain garbage. */ + +enum punycode_status punycode_decode( + punycode_uint input_length, + const char input[], + punycode_uint *output_length, + punycode_uint output[], + unsigned char case_flags[] ); + + /* punycode_decode() converts Punycode to Unicode. The input is */ + /* represented as an array of ASCII code points, and the output */ + /* will be represented as an array of Unicode code points. The */ + /* input_length is the number of code points in the input. The */ + /* output_length is an in/out argument: the caller passes in */ + /* the maximum number of code points that it can receive, and */ + /* on successful return it will contain the actual number of */ + /* code points output. The case_flags array needs room for at */ + /* least output_length values, or it can be a null pointer if the */ + /* case information is not needed. A nonzero flag suggests that */ + /* the corresponding Unicode character be forced to uppercase */ + /* by the caller (if possible), while zero suggests that it be */ + /* forced to lowercase (if possible). ASCII code points are */ + /* output already in the proper case, but their flags will be set */ + /* appropriately so that applying the flags would be harmless. */ + /* The return value can be any of the punycode_status values */ + /* defined above; if not punycode_success, then output_length, */ + /* output, and case_flags might contain garbage. On success, the */ + /* decoder will never need to write an output_length greater than */ + /* input_length, because of how the encoding is defined. */ diff --git a/contrib/utf8/utf8.h b/contrib/utf8/utf8.h new file mode 100644 index 00000000..4e445140 --- /dev/null +++ b/contrib/utf8/utf8.h @@ -0,0 +1,34 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "utf8/checked.h" +#include "utf8/unchecked.h" + +#endif // header guard diff --git a/contrib/utf8/utf8/checked.h b/contrib/utf8/utf8/checked.h new file mode 100644 index 00000000..13311551 --- /dev/null +++ b/contrib/utf8/utf8/checked.h @@ -0,0 +1,327 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "core.h" +#include + +namespace utf8 +{ + // Base for the exceptions that may be thrown from the library + class exception : public ::std::exception { + }; + + // Exceptions that may be thrown from the library functions. + class invalid_code_point : public exception { + uint32_t cp; + public: + invalid_code_point(uint32_t cp) : cp(cp) {} + virtual const char* what() const throw() { return "Invalid code point"; } + uint32_t code_point() const {return cp;} + }; + + class invalid_utf8 : public exception { + uint8_t u8; + public: + invalid_utf8 (uint8_t u) : u8(u) {} + virtual const char* what() const throw() { return "Invalid UTF-8"; } + uint8_t utf8_octet() const {return u8;} + }; + + class invalid_utf16 : public exception { + uint16_t u16; + public: + invalid_utf16 (uint16_t u) : u16(u) {} + virtual const char* what() const throw() { return "Invalid UTF-16"; } + uint16_t utf16_word() const {return u16;} + }; + + class not_enough_room : public exception { + public: + virtual const char* what() const throw() { return "Not enough space"; } + }; + + /// The library API - functions intended to be called by the users + + template + octet_iterator append(uint32_t cp, octet_iterator result) + { + if (!utf8::internal::is_code_point_valid(cp)) + throw invalid_code_point(cp); + + if (cp < 0x80) // one octet + *(result++) = static_cast(cp); + else if (cp < 0x800) { // two octets + *(result++) = static_cast((cp >> 6) | 0xc0); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else if (cp < 0x10000) { // three octets + *(result++) = static_cast((cp >> 12) | 0xe0); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else { // four octets + *(result++) = static_cast((cp >> 18) | 0xf0); + *(result++) = static_cast(((cp >> 12) & 0x3f) | 0x80); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + return result; + } + + template + output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) + { + while (start != end) { + octet_iterator sequence_start = start; + internal::utf_error err_code = utf8::internal::validate_next(start, end); + switch (err_code) { + case internal::UTF8_OK : + for (octet_iterator it = sequence_start; it != start; ++it) + *out++ = *it; + break; + case internal::NOT_ENOUGH_ROOM: + throw not_enough_room(); + case internal::INVALID_LEAD: + out = utf8::append (replacement, out); + ++start; + break; + case internal::INCOMPLETE_SEQUENCE: + case internal::OVERLONG_SEQUENCE: + case internal::INVALID_CODE_POINT: + out = utf8::append (replacement, out); + ++start; + // just one replacement mark for the sequence + while (start != end && utf8::internal::is_trail(*start)) + ++start; + break; + } + } + return out; + } + + template + inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) + { + static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd); + return utf8::replace_invalid(start, end, out, replacement_marker); + } + + template + uint32_t next(octet_iterator& it, octet_iterator end) + { + uint32_t cp = 0; + internal::utf_error err_code = utf8::internal::validate_next(it, end, cp); + switch (err_code) { + case internal::UTF8_OK : + break; + case internal::NOT_ENOUGH_ROOM : + throw not_enough_room(); + case internal::INVALID_LEAD : + case internal::INCOMPLETE_SEQUENCE : + case internal::OVERLONG_SEQUENCE : + throw invalid_utf8(*it); + case internal::INVALID_CODE_POINT : + throw invalid_code_point(cp); + } + return cp; + } + + template + uint32_t peek_next(octet_iterator it, octet_iterator end) + { + return utf8::next(it, end); + } + + template + uint32_t prior(octet_iterator& it, octet_iterator start) + { + // can't do much if it == start + if (it == start) + throw not_enough_room(); + + octet_iterator end = it; + // Go back until we hit either a lead octet or start + while (utf8::internal::is_trail(*(--it))) + if (it == start) + throw invalid_utf8(*it); // error - no lead byte in the sequence + return utf8::peek_next(it, end); + } + + /// Deprecated in versions that include "prior" + template + uint32_t previous(octet_iterator& it, octet_iterator pass_start) + { + octet_iterator end = it; + while (utf8::internal::is_trail(*(--it))) + if (it == pass_start) + throw invalid_utf8(*it); // error - no lead byte in the sequence + octet_iterator temp = it; + return utf8::next(temp, end); + } + + template + void advance (octet_iterator& it, distance_type n, octet_iterator end) + { + for (distance_type i = 0; i < n; ++i) + utf8::next(it, end); + } + + template + typename std::iterator_traits::difference_type + distance (octet_iterator first, octet_iterator last) + { + typename std::iterator_traits::difference_type dist; + for (dist = 0; first < last; ++dist) + utf8::next(first, last); + return dist; + } + + template + octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) + { + while (start != end) { + uint32_t cp = utf8::internal::mask16(*start++); + // Take care of surrogate pairs first + if (utf8::internal::is_lead_surrogate(cp)) { + if (start != end) { + uint32_t trail_surrogate = utf8::internal::mask16(*start++); + if (utf8::internal::is_trail_surrogate(trail_surrogate)) + cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; + else + throw invalid_utf16(static_cast(trail_surrogate)); + } + else + throw invalid_utf16(static_cast(cp)); + + } + // Lone trail surrogate + else if (utf8::internal::is_trail_surrogate(cp)) + throw invalid_utf16(static_cast(cp)); + + result = utf8::append(cp, result); + } + return result; + } + + template + u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) + { + while (start != end) { + uint32_t cp = utf8::next(start, end); + if (cp > 0xffff) { //make a surrogate pair + *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); + *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); + } + else + *result++ = static_cast(cp); + } + return result; + } + + template + octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) + { + while (start != end) + result = utf8::append(*(start++), result); + + return result; + } + + template + u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) + { + while (start != end) + (*result++) = utf8::next(start, end); + + return result; + } + + // The iterator class + template + class iterator : public std::iterator { + octet_iterator it; + octet_iterator range_start; + octet_iterator range_end; + public: + iterator () {} + explicit iterator (const octet_iterator& octet_it, + const octet_iterator& range_start, + const octet_iterator& range_end) : + it(octet_it), range_start(range_start), range_end(range_end) + { + if (it < range_start || it > range_end) + throw std::out_of_range("Invalid utf-8 iterator position"); + } + // the default "big three" are OK + octet_iterator base () const { return it; } + uint32_t operator * () const + { + octet_iterator temp = it; + return utf8::next(temp, range_end); + } + bool operator == (const iterator& rhs) const + { + if (range_start != rhs.range_start || range_end != rhs.range_end) + throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); + return (it == rhs.it); + } + bool operator != (const iterator& rhs) const + { + return !(operator == (rhs)); + } + iterator& operator ++ () + { + utf8::next(it, range_end); + return *this; + } + iterator operator ++ (int) + { + iterator temp = *this; + utf8::next(it, range_end); + return temp; + } + iterator& operator -- () + { + utf8::prior(it, range_start); + return *this; + } + iterator operator -- (int) + { + iterator temp = *this; + utf8::prior(it, range_start); + return temp; + } + }; // class iterator + +} // namespace utf8 + +#endif //header guard + + diff --git a/contrib/utf8/utf8/core.h b/contrib/utf8/utf8/core.h new file mode 100644 index 00000000..f858c4a8 --- /dev/null +++ b/contrib/utf8/utf8/core.h @@ -0,0 +1,326 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include + +namespace utf8 +{ + typedef vmime_uint8 uint8_t; + typedef vmime_uint16 uint16_t; + typedef vmime_uint32 uint32_t; + +// Helper code - not intended to be directly called by the library users. May be changed at any time +namespace internal +{ + // Unicode constants + // Leading (high) surrogates: 0xd800 - 0xdbff + // Trailing (low) surrogates: 0xdc00 - 0xdfff + const uint16_t LEAD_SURROGATE_MIN = 0xd800u; + const uint16_t LEAD_SURROGATE_MAX = 0xdbffu; + const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u; + const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu; + const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10); + const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; + + // Maximum valid value for a Unicode code point + const uint32_t CODE_POINT_MAX = 0x0010ffffu; + + template + inline uint8_t mask8(octet_type oc) + { + return static_cast(0xff & oc); + } + template + inline uint16_t mask16(u16_type oc) + { + return static_cast(0xffff & oc); + } + template + inline bool is_trail(octet_type oc) + { + return ((utf8::internal::mask8(oc) >> 6) == 0x2); + } + + template + inline bool is_lead_surrogate(u16 cp) + { + return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); + } + + template + inline bool is_trail_surrogate(u16 cp) + { + return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); + } + + template + inline bool is_surrogate(u16 cp) + { + return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); + } + + template + inline bool is_code_point_valid(u32 cp) + { + return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp)); + } + + template + inline typename std::iterator_traits::difference_type + sequence_length(octet_iterator lead_it) + { + uint8_t lead = utf8::internal::mask8(*lead_it); + if (lead < 0x80) + return 1; + else if ((lead >> 5) == 0x6) + return 2; + else if ((lead >> 4) == 0xe) + return 3; + else if ((lead >> 3) == 0x1e) + return 4; + else + return 0; + } + + template + inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length) + { + if (cp < 0x80) { + if (length != 1) + return true; + } + else if (cp < 0x800) { + if (length != 2) + return true; + } + else if (cp < 0x10000) { + if (length != 3) + return true; + } + + return false; + } + + enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; + + /// Helper for get_sequence_x + template + utf_error increase_safely(octet_iterator& it, octet_iterator end) + { + if (++it == end) + return NOT_ENOUGH_ROOM; + + if (!utf8::internal::is_trail(*it)) + return INCOMPLETE_SEQUENCE; + + return UTF8_OK; + } + + #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;} + + /// get_sequence_x functions decode utf-8 sequences of the length x + template + utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + return UTF8_OK; + } + + template + utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f); + + return UTF8_OK; + } + + template + utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point += (*it) & 0x3f; + + return UTF8_OK; + } + + template + utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point += (utf8::internal::mask8(*it) << 6) & 0xfff; + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point += (*it) & 0x3f; + + return UTF8_OK; + } + + #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR + + template + utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + // Save the original value of it so we can go back in case of failure + // Of course, it does not make much sense with i.e. stream iterators + octet_iterator original_it = it; + + uint32_t cp = 0; + // Determine the sequence length based on the lead octet + typedef typename std::iterator_traits::difference_type octet_difference_type; + const octet_difference_type length = utf8::internal::sequence_length(it); + + // Get trail octets and calculate the code point + utf_error err = UTF8_OK; + switch (length) { + case 0: + return INVALID_LEAD; + case 1: + err = utf8::internal::get_sequence_1(it, end, cp); + break; + case 2: + err = utf8::internal::get_sequence_2(it, end, cp); + break; + case 3: + err = utf8::internal::get_sequence_3(it, end, cp); + break; + case 4: + err = utf8::internal::get_sequence_4(it, end, cp); + break; + } + + if (err == UTF8_OK) { + // Decoding succeeded. Now, security checks... + if (utf8::internal::is_code_point_valid(cp)) { + if (!utf8::internal::is_overlong_sequence(cp, length)){ + // Passed! Return here. + code_point = cp; + ++it; + return UTF8_OK; + } + else + err = OVERLONG_SEQUENCE; + } + else + err = INVALID_CODE_POINT; + } + + // Failure branch - restore the original value of the iterator + it = original_it; + return err; + } + + template + inline utf_error validate_next(octet_iterator& it, octet_iterator end) { + uint32_t ignored; + return utf8::internal::validate_next(it, end, ignored); + } + +} // namespace internal + + /// The library API - functions intended to be called by the users + + // Byte order mark + const uint8_t bom[] = {0xef, 0xbb, 0xbf}; + + template + octet_iterator find_invalid(octet_iterator start, octet_iterator end) + { + octet_iterator result = start; + while (result != end) { + utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end); + if (err_code != internal::UTF8_OK) + return result; + } + return result; + } + + template + inline bool is_valid(octet_iterator start, octet_iterator end) + { + return (utf8::find_invalid(start, end) == end); + } + + template + inline bool starts_with_bom (octet_iterator it, octet_iterator end) + { + return ( + ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) && + ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) && + ((it != end) && (utf8::internal::mask8(*it)) == bom[2]) + ); + } + + //Deprecated in release 2.3 + template + inline bool is_bom (octet_iterator it) + { + return ( + (utf8::internal::mask8(*it++)) == bom[0] && + (utf8::internal::mask8(*it++)) == bom[1] && + (utf8::internal::mask8(*it)) == bom[2] + ); + } +} // namespace utf8 + +#endif // header guard + + diff --git a/contrib/utf8/utf8/unchecked.h b/contrib/utf8/utf8/unchecked.h new file mode 100644 index 00000000..cb242716 --- /dev/null +++ b/contrib/utf8/utf8/unchecked.h @@ -0,0 +1,228 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "core.h" + +namespace utf8 +{ + namespace unchecked + { + template + octet_iterator append(uint32_t cp, octet_iterator result) + { + if (cp < 0x80) // one octet + *(result++) = static_cast(cp); + else if (cp < 0x800) { // two octets + *(result++) = static_cast((cp >> 6) | 0xc0); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else if (cp < 0x10000) { // three octets + *(result++) = static_cast((cp >> 12) | 0xe0); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else { // four octets + *(result++) = static_cast((cp >> 18) | 0xf0); + *(result++) = static_cast(((cp >> 12) & 0x3f)| 0x80); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + return result; + } + + template + uint32_t next(octet_iterator& it) + { + uint32_t cp = utf8::internal::mask8(*it); + typename std::iterator_traits::difference_type length = utf8::internal::sequence_length(it); + switch (length) { + case 1: + break; + case 2: + it++; + cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); + break; + case 3: + ++it; + cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); + ++it; + cp += (*it) & 0x3f; + break; + case 4: + ++it; + cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); + ++it; + cp += (utf8::internal::mask8(*it) << 6) & 0xfff; + ++it; + cp += (*it) & 0x3f; + break; + } + ++it; + return cp; + } + + template + uint32_t peek_next(octet_iterator it) + { + return utf8::unchecked::next(it); + } + + template + uint32_t prior(octet_iterator& it) + { + while (utf8::internal::is_trail(*(--it))) ; + octet_iterator temp = it; + return utf8::unchecked::next(temp); + } + + // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous) + template + inline uint32_t previous(octet_iterator& it) + { + return utf8::unchecked::prior(it); + } + + template + void advance (octet_iterator& it, distance_type n) + { + for (distance_type i = 0; i < n; ++i) + utf8::unchecked::next(it); + } + + template + typename std::iterator_traits::difference_type + distance (octet_iterator first, octet_iterator last) + { + typename std::iterator_traits::difference_type dist; + for (dist = 0; first < last; ++dist) + utf8::unchecked::next(first); + return dist; + } + + template + octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) + { + while (start != end) { + uint32_t cp = utf8::internal::mask16(*start++); + // Take care of surrogate pairs first + if (utf8::internal::is_lead_surrogate(cp)) { + uint32_t trail_surrogate = utf8::internal::mask16(*start++); + cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; + } + result = utf8::unchecked::append(cp, result); + } + return result; + } + + template + u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) + { + while (start < end) { + uint32_t cp = utf8::unchecked::next(start); + if (cp > 0xffff) { //make a surrogate pair + *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); + *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); + } + else + *result++ = static_cast(cp); + } + return result; + } + + template + octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) + { + while (start != end) + result = utf8::unchecked::append(*(start++), result); + + return result; + } + + template + u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) + { + while (start < end) + (*result++) = utf8::unchecked::next(start); + + return result; + } + + // The iterator class + template + class iterator : public std::iterator { + octet_iterator it; + public: + iterator () {} + explicit iterator (const octet_iterator& octet_it): it(octet_it) {} + // the default "big three" are OK + octet_iterator base () const { return it; } + uint32_t operator * () const + { + octet_iterator temp = it; + return utf8::unchecked::next(temp); + } + bool operator == (const iterator& rhs) const + { + return (it == rhs.it); + } + bool operator != (const iterator& rhs) const + { + return !(operator == (rhs)); + } + iterator& operator ++ () + { + ::std::advance(it, utf8::internal::sequence_length(it)); + return *this; + } + iterator operator ++ (int) + { + iterator temp = *this; + ::std::advance(it, utf8::internal::sequence_length(it)); + return temp; + } + iterator& operator -- () + { + utf8::unchecked::prior(it); + return *this; + } + iterator operator -- (int) + { + iterator temp = *this; + utf8::unchecked::prior(it); + return temp; + } + }; // class iterator + + } // namespace utf8::unchecked +} // namespace utf8 + + +#endif // header guard + diff --git a/src/address.cpp b/src/address.cpp index eccf4e21..ab207cf6 100644 --- a/src/address.cpp +++ b/src/address.cpp @@ -66,8 +66,9 @@ address-list = (address *("," address)) / obs-addr-list */ -ref
address::parseNext(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +ref
address::parseNext + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { bool escaped = false; bool quoted = false; @@ -179,7 +180,7 @@ ref
address::parseNext(const string& buffer, const string::size_type p ? create ().dynamicCast
() : create ().dynamicCast
(); - parsedAddress->parse(buffer, start, pos, NULL); + parsedAddress->parse(ctx, buffer, start, pos, NULL); parsedAddress->setParsedBounds(start, pos); return (parsedAddress); diff --git a/src/addressList.cpp b/src/addressList.cpp index 467a283a..5e033f38 100644 --- a/src/addressList.cpp +++ b/src/addressList.cpp @@ -50,8 +50,9 @@ addressList::~addressList() } -void addressList::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void addressList::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { removeAllAddresses(); @@ -59,7 +60,7 @@ void addressList::parseImpl(const string& buffer, const string::size_type positi while (pos < end) { - ref
parsedAddress = address::parseNext(buffer, pos, end, &pos); + ref
parsedAddress = address::parseNext(ctx, buffer, pos, end, &pos); if (parsedAddress != NULL) m_list.push_back(parsedAddress); @@ -72,16 +73,20 @@ void addressList::parseImpl(const string& buffer, const string::size_type positi } -void addressList::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const +void addressList::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { string::size_type pos = curLinePos; + generationContext tmpCtx(ctx); + tmpCtx.setMaxLineLength(tmpCtx.getMaxLineLength() - 2); + if (!m_list.empty()) { for (std::vector >::const_iterator i = m_list.begin() ; ; ) { - (*i)->generate(os, maxLineLength - 2, pos, &pos); + (*i)->generate(ctx, os, pos, &pos); if (++i == m_list.end()) break; diff --git a/src/base.cpp b/src/base.cpp index 47262faf..d5f3e787 100644 --- a/src/base.cpp +++ b/src/base.cpp @@ -40,7 +40,8 @@ #include "vmime/utility/encoder/encoderFactory.hpp" #include "vmime/headerFieldFactory.hpp" #include "vmime/textPartFactory.hpp" -#include "vmime/options.hpp" +#include "vmime/generationContext.hpp" +#include "vmime/parsingContext.hpp" #if VMIME_HAVE_MESSAGING_FEATURES #include "vmime/net/serviceFactory.hpp" @@ -132,7 +133,8 @@ public: initializer() { - options::getInstance(); + parsingContext::getDefaultContext(); + generationContext::getDefaultContext(); utility::encoder::encoderFactory::getInstance(); headerFieldFactory::getInstance(); diff --git a/src/body.cpp b/src/body.cpp index 20781012..8c599b99 100644 --- a/src/body.cpp +++ b/src/body.cpp @@ -24,8 +24,6 @@ #include "vmime/bodyPart.hpp" #include "vmime/body.hpp" -#include "vmime/options.hpp" - #include "vmime/contentTypeField.hpp" #include "vmime/text.hpp" @@ -56,7 +54,8 @@ body::~body() void body::parseImpl - (ref parser, + (const parsingContext& /* ctx */, + ref parser, const utility::stream::size_type position, const utility::stream::size_type end, utility::stream::size_type* newPosition) @@ -381,8 +380,9 @@ void body::parseImpl } -void body::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type /* curLinePos */, string::size_type* newLinePos) const +void body::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type /* curLinePos */, string::size_type* newLinePos) const { // MIME-Multipart if (getPartCount() != 0) @@ -418,7 +418,7 @@ void body::generateImpl(utility::outputStream& os, const string::size_type maxLi const string& prologText = m_prologText.empty() ? (isRootPart() - ? options::getInstance()->multipart.getPrologText() + ? ctx.getPrologText() : NULL_STRING ) : m_prologText; @@ -426,7 +426,7 @@ void body::generateImpl(utility::outputStream& os, const string::size_type maxLi const string& epilogText = m_epilogText.empty() ? (isRootPart() - ? options::getInstance()->multipart.getEpilogText() + ? ctx.getEpilogText() : NULL_STRING ) : m_epilogText; @@ -435,7 +435,7 @@ void body::generateImpl(utility::outputStream& os, const string::size_type maxLi { text prolog(prologText, vmime::charset("us-ascii")); - prolog.encodeAndFold(os, maxLineLength, 0, + prolog.encodeAndFold(ctx, os, 0, NULL, text::FORCE_NO_ENCODING | text::NO_NEW_LINE_SEQUENCE); os << CRLF; @@ -447,7 +447,7 @@ void body::generateImpl(utility::outputStream& os, const string::size_type maxLi { os << CRLF; - getPartAt(p)->generate(os, maxLineLength, 0); + getPartAt(p)->generate(ctx, os, 0); os << CRLF << "--" << boundary; } @@ -458,7 +458,7 @@ void body::generateImpl(utility::outputStream& os, const string::size_type maxLi { text epilog(epilogText, vmime::charset("us-ascii")); - epilog.encodeAndFold(os, maxLineLength, 0, + epilog.encodeAndFold(ctx, os, 0, NULL, text::FORCE_NO_ENCODING | text::NO_NEW_LINE_SEQUENCE); os << CRLF; @@ -471,7 +471,7 @@ void body::generateImpl(utility::outputStream& os, const string::size_type maxLi else { // Generate the contents - m_contents->generate(os, getEncoding(), maxLineLength); + m_contents->generate(os, getEncoding(), ctx.getMaxLineLength()); } } diff --git a/src/bodyPart.cpp b/src/bodyPart.cpp index fbe9f1ed..32544ba8 100644 --- a/src/bodyPart.cpp +++ b/src/bodyPart.cpp @@ -47,17 +47,18 @@ bodyPart::bodyPart(weak_ref parentPart) void bodyPart::parseImpl - (ref parser, + (const parsingContext& ctx, + ref parser, const utility::stream::size_type position, const utility::stream::size_type end, utility::stream::size_type* newPosition) { // Parse the headers string::size_type pos = position; - m_header->parse(parser, pos, end, &pos); + m_header->parse(ctx, parser, pos, end, &pos); // Parse the body contents - m_body->parse(parser, pos, end, NULL); + m_body->parse(ctx, parser, pos, end, NULL); setParsedBounds(position, end); @@ -66,14 +67,15 @@ void bodyPart::parseImpl } -void bodyPart::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type /* curLinePos */, string::size_type* newLinePos) const +void bodyPart::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type /* curLinePos */, string::size_type* newLinePos) const { - m_header->generate(os, maxLineLength); + m_header->generate(ctx, os); os << CRLF; - m_body->generate(os, maxLineLength); + m_body->generate(ctx, os); if (newLinePos) *newLinePos = 0; diff --git a/src/charset.cpp b/src/charset.cpp index 84368e85..092676b2 100644 --- a/src/charset.cpp +++ b/src/charset.cpp @@ -57,8 +57,9 @@ charset::charset(const char* name) } -void charset::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void charset::parseImpl + (const parsingContext& /* ctx */, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { m_name = utility::stringUtils::trim (string(buffer.begin() + position, buffer.begin() + end)); @@ -74,8 +75,9 @@ void charset::parseImpl(const string& buffer, const string::size_type position, } -void charset::generateImpl(utility::outputStream& os, const string::size_type /* maxLineLength */, - const string::size_type curLinePos, string::size_type* newLinePos) const +void charset::generateImpl + (const generationContext& /* ctx */, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { os << m_name; @@ -85,17 +87,25 @@ void charset::generateImpl(utility::outputStream& os, const string::size_type /* void charset::convert(utility::inputStream& in, utility::outputStream& out, - const charset& source, const charset& dest) + const charset& source, const charset& dest, + const charsetConverterOptions& opts) { - charsetConverter conv(source, dest); - conv.convert(in, out); + ref conv = charsetConverter::create(source, dest, opts); + conv->convert(in, out); } -void charset::convert(const string& in, string& out, const charset& source, const charset& dest) +void charset::convert(const string& in, string& out, const charset& source, const charset& dest, + const charsetConverterOptions& opts) { - charsetConverter conv(source, dest); - conv.convert(in, out); + if (source == dest) + { + out = in; + return; + } + + ref conv = charsetConverter::create(source, dest, opts); + conv->convert(in, out); } diff --git a/src/charsetConverter.cpp b/src/charsetConverter.cpp index a33f4f84..c2041476 100644 --- a/src/charsetConverter.cpp +++ b/src/charsetConverter.cpp @@ -22,398 +22,25 @@ // #include "vmime/charsetConverter.hpp" -#include "vmime/exception.hpp" -#include "vmime/utility/inputStreamStringAdapter.hpp" -#include "vmime/utility/outputStreamStringAdapter.hpp" - - -extern "C" -{ -#ifndef VMIME_BUILDING_DOC - - #include - #include - - // HACK: prototypes may differ depending on the compiler and/or system (the - // second parameter may or may not be 'const'). This relies on the compiler - // for choosing the right type. - class ICONV_HACK - { - public: - - ICONV_HACK(const char** ptr) : m_ptr(ptr) { } - - operator const char**() { return m_ptr; } - operator char**() { return const_cast (m_ptr); } - - private: - - const char** m_ptr; - }; - -#endif // VMIME_BUILDING_DOC -} - - - -// Output replacement char when an invalid sequence is encountered -template -void outputInvalidChar(OUTPUT_CLASS& out, ICONV_DESC cd) -{ - const char* invalidCharIn = "?"; - size_t invalidCharInLen = 1; - - char invalidCharOutBuffer[16]; - char* invalidCharOutPtr = invalidCharOutBuffer; - size_t invalidCharOutLen = 16; - - if (iconv(cd, ICONV_HACK(&invalidCharIn), &invalidCharInLen, - &invalidCharOutPtr, &invalidCharOutLen) != static_cast (-1)) - { - out.write(invalidCharOutBuffer, 16 - invalidCharOutLen); - } -} +#include "vmime/charsetConverter_iconv.hpp" +#include "vmime/charsetConverter_idna.hpp" namespace vmime { -charsetConverter::charsetConverter(const charset& source, const charset& dest) - : m_desc(NULL), m_source(source), m_dest(dest) +// static +ref charsetConverter::create + (const charset& source, const charset& dest, + const charsetConverterOptions& opts) { - // Get an iconv descriptor - const iconv_t cd = iconv_open(dest.getName().c_str(), source.getName().c_str()); - - if (cd != reinterpret_cast (-1)) - { - iconv_t* p = new iconv_t; - *p= cd; - - m_desc = p; - } + if (source == "idna" || dest == "idna") + return vmime::create (source, dest, opts); + else + return vmime::create (source, dest, opts); } -charsetConverter::~charsetConverter() -{ - if (m_desc != NULL) - { - // Close iconv handle - iconv_close(*static_cast (m_desc)); - - delete static_cast (m_desc); - m_desc = NULL; - } -} - - -void charsetConverter::convert(utility::inputStream& in, utility::outputStream& out) -{ - if (m_desc == NULL) - throw exceptions::charset_conv_error("Cannot initialize converter."); - - const iconv_t cd = *static_cast (m_desc); - - char inBuffer[32768]; - char outBuffer[32768]; - size_t inPos = 0; - - bool prevIsInvalid = false; - bool breakAfterNext = false; - - while (true) - { - // Fullfill the buffer - size_t inLength = static_cast (in.read(inBuffer + inPos, sizeof(inBuffer) - inPos) + inPos); - size_t outLength = sizeof(outBuffer); - - const char* inPtr = breakAfterNext ? NULL : inBuffer; - size_t *ptrLength = breakAfterNext ? NULL : &inLength; - char* outPtr = outBuffer; - - // Convert input bytes - if (iconv(cd, ICONV_HACK(&inPtr), ptrLength, - &outPtr, &outLength) == static_cast (-1)) - { - // Illegal input sequence or input sequence has no equivalent - // sequence in the destination charset. - if (prevIsInvalid) - { - // Write successfully converted bytes - out.write(outBuffer, sizeof(outBuffer) - outLength); - - // Output a special character to indicate we don't known how to - // convert the sequence at this position - outputInvalidChar(out, cd); - - // Skip a byte and leave unconverted bytes in the input buffer - std::copy(const_cast (inPtr + 1), inBuffer + sizeof(inBuffer), inBuffer); - inPos = inLength - 1; - } - else - { - // Write successfully converted bytes - out.write(outBuffer, sizeof(outBuffer) - outLength); - - // Leave unconverted bytes in the input buffer - std::copy(const_cast (inPtr), inBuffer + sizeof(inBuffer), inBuffer); - inPos = inLength; - - if (errno != E2BIG) - prevIsInvalid = true; - } - } - else - { - // Write successfully converted bytes - out.write(outBuffer, sizeof(outBuffer) - outLength); - - inPos = 0; - prevIsInvalid = false; - } - - if (breakAfterNext) - break; - - // Check for end of data, loop again to flush stateful data from iconv - if (in.eof() && inPos == 0) - breakAfterNext = true; - } -} - - -void charsetConverter::convert(const string& in, string& out) -{ - out.clear(); - - utility::inputStreamStringAdapter is(in); - utility::outputStreamStringAdapter os(out); - - convert(is, os); - - os.flush(); -} - - - -// charsetFilteredOutputStream - -namespace utility { - - -charsetFilteredOutputStream::charsetFilteredOutputStream - (const charset& source, const charset& dest, outputStream& os) - : m_desc(NULL), m_sourceCharset(source), m_destCharset(dest), - m_stream(os), m_unconvCount(0) -{ - // Get an iconv descriptor - const iconv_t cd = iconv_open(dest.getName().c_str(), source.getName().c_str()); - - if (cd != reinterpret_cast (-1)) - { - iconv_t* p = new iconv_t; - *p= cd; - - m_desc = p; - } -} - - -charsetFilteredOutputStream::~charsetFilteredOutputStream() -{ - if (m_desc != NULL) - { - // Close iconv handle - iconv_close(*static_cast (m_desc)); - - delete static_cast (m_desc); - m_desc = NULL; - } -} - - -outputStream& charsetFilteredOutputStream::getNextOutputStream() -{ - return m_stream; -} - - -void charsetFilteredOutputStream::write - (const value_type* const data, const size_type count) -{ - if (m_desc == NULL) - throw exceptions::charset_conv_error("Cannot initialize converter."); - - const iconv_t cd = *static_cast (m_desc); - - const value_type* curData = data; - size_type curDataLen = count; - - // If there is some unconverted bytes left, add more data from this - // chunk to see if it can now be converted. - while (m_unconvCount != 0 || curDataLen != 0) - { - if (m_unconvCount != 0) - { - // Check if an incomplete input sequence is larger than the - // input buffer size: should not happen except if something - // in the input sequence is invalid. If so, output a special - // character and skip one byte in the invalid sequence. - if (m_unconvCount >= sizeof(m_unconvBuffer)) - { - outputInvalidChar(m_stream, cd); - - std::copy(m_unconvBuffer + 1, - m_unconvBuffer + m_unconvCount, m_unconvBuffer); - - m_unconvCount--; - } - - // Get more data - const size_type remaining = - std::min(curDataLen, sizeof(m_unconvBuffer) - m_unconvCount); - - std::copy(curData, curData + remaining, m_unconvBuffer + m_unconvCount); - - m_unconvCount += remaining; - curDataLen -= remaining; - curData += remaining; - - if (remaining == 0) - return; // no more data - - // Try a conversion - const char* inPtr = m_unconvBuffer; - size_t inLength = m_unconvCount; - char* outPtr = m_outputBuffer; - size_t outLength = sizeof(m_outputBuffer); - - const size_t inLength0 = inLength; - - if (iconv(cd, ICONV_HACK(&inPtr), &inLength, &outPtr, &outLength) == static_cast (-1)) - { - const size_t inputConverted = inLength0 - inLength; - - // Write successfully converted bytes - m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength); - - // Shift unconverted bytes - std::copy(m_unconvBuffer + inputConverted, - m_unconvBuffer + m_unconvCount, m_unconvBuffer); - - m_unconvCount -= inputConverted; - - continue; - } - - // Write successfully converted bytes - m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength); - - // Empty the unconverted buffer - m_unconvCount = 0; - } - - if (curDataLen == 0) - return; // no more data - - // Now, convert the current data buffer - const char* inPtr = curData; - size_t inLength = std::min(curDataLen, sizeof(m_outputBuffer) / MAX_CHARACTER_WIDTH); - char* outPtr = m_outputBuffer; - size_t outLength = sizeof(m_outputBuffer); - - const size_t inLength0 = inLength; - - if (iconv(cd, ICONV_HACK(&inPtr), &inLength, &outPtr, &outLength) == static_cast (-1)) - { - // Write successfully converted bytes - m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength); - - const size_t inputConverted = inLength0 - inLength; - - curData += inputConverted; - curDataLen -= inputConverted; - - // Put one byte byte into the unconverted buffer so - // that the next iteration fill it - if (curDataLen != 0) - { - m_unconvCount = 1; - m_unconvBuffer[0] = *curData; - - curData++; - curDataLen--; - } - } - else - { - // Write successfully converted bytes - m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength); - - curData += inLength0; - curDataLen -= inLength0; - } - } -} - - -void charsetFilteredOutputStream::flush() -{ - if (m_desc == NULL) - throw exceptions::charset_conv_error("Cannot initialize converter."); - - const iconv_t cd = *static_cast (m_desc); - - size_t offset = 0; - - // Process unconverted bytes - while (m_unconvCount != 0) - { - // Try a conversion - const char* inPtr = m_unconvBuffer + offset; - size_t inLength = m_unconvCount; - char* outPtr = m_outputBuffer; - size_t outLength = sizeof(m_outputBuffer); - - const size_t inLength0 = inLength; - - if (iconv(cd, ICONV_HACK(&inPtr), &inLength, &outPtr, &outLength) == static_cast (-1)) - { - const size_t inputConverted = inLength0 - inLength; - - // Skip a "blocking" character - if (inputConverted == 0) - { - outputInvalidChar(m_stream, cd); - - offset++; - m_unconvCount--; - } - else - { - // Write successfully converted bytes - m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength); - - offset += inputConverted; - m_unconvCount -= inputConverted; - } - } - else - { - // Write successfully converted bytes - m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength); - - m_unconvCount = 0; - } - } - - m_stream.flush(); -} - - -} // utility - - } // vmime diff --git a/src/charsetConverterOptions.cpp b/src/charsetConverterOptions.cpp new file mode 100644 index 00000000..caeacd01 --- /dev/null +++ b/src/charsetConverterOptions.cpp @@ -0,0 +1,37 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#include "vmime/charsetConverterOptions.hpp" + + +namespace vmime +{ + + +charsetConverterOptions::charsetConverterOptions() + : invalidSequence("?") +{ +} + + +} // vmime diff --git a/src/charsetConverter_iconv.cpp b/src/charsetConverter_iconv.cpp new file mode 100644 index 00000000..c5d3557e --- /dev/null +++ b/src/charsetConverter_iconv.cpp @@ -0,0 +1,435 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#include "vmime/charsetConverter_iconv.hpp" + +#include "vmime/exception.hpp" +#include "vmime/utility/inputStreamStringAdapter.hpp" +#include "vmime/utility/outputStreamStringAdapter.hpp" + + +extern "C" +{ +#ifndef VMIME_BUILDING_DOC + + #include + #include + + // HACK: prototypes may differ depending on the compiler and/or system (the + // second parameter may or may not be 'const'). This relies on the compiler + // for choosing the right type. + class ICONV_HACK + { + public: + + ICONV_HACK(const char** ptr) : m_ptr(ptr) { } + + operator const char**() { return m_ptr; } + operator char**() { return const_cast (m_ptr); } + + private: + + const char** m_ptr; + }; + +#endif // VMIME_BUILDING_DOC +} + + + +// Output replacement char when an invalid sequence is encountered +template +void outputInvalidChar(OUTPUT_CLASS& out, ICONV_DESC cd, + const vmime::charsetConverterOptions& opts = vmime::charsetConverterOptions()) +{ + const char* invalidCharIn = opts.invalidSequence.c_str(); + size_t invalidCharInLen = opts.invalidSequence.length(); + + char invalidCharOutBuffer[16]; + char* invalidCharOutPtr = invalidCharOutBuffer; + size_t invalidCharOutLen = 16; + + if (iconv(cd, ICONV_HACK(&invalidCharIn), &invalidCharInLen, + &invalidCharOutPtr, &invalidCharOutLen) != static_cast (-1)) + { + out.write(invalidCharOutBuffer, 16 - invalidCharOutLen); + } +} + + + +namespace vmime +{ + + +charsetConverter_iconv::charsetConverter_iconv + (const charset& source, const charset& dest, const charsetConverterOptions& opts) + : m_desc(NULL), m_source(source), m_dest(dest), m_options(opts) +{ + // Get an iconv descriptor + const iconv_t cd = iconv_open(dest.getName().c_str(), source.getName().c_str()); + + if (cd != reinterpret_cast (-1)) + { + iconv_t* p = new iconv_t; + *p= cd; + + m_desc = p; + } +} + + +charsetConverter_iconv::~charsetConverter_iconv() +{ + if (m_desc != NULL) + { + // Close iconv handle + iconv_close(*static_cast (m_desc)); + + delete static_cast (m_desc); + m_desc = NULL; + } +} + + +void charsetConverter_iconv::convert(utility::inputStream& in, utility::outputStream& out) +{ + if (m_desc == NULL) + throw exceptions::charset_conv_error("Cannot initialize converter."); + + const iconv_t cd = *static_cast (m_desc); + + char inBuffer[32768]; + char outBuffer[32768]; + size_t inPos = 0; + + bool prevIsInvalid = false; + bool breakAfterNext = false; + + while (true) + { + // Fullfill the buffer + size_t inLength = static_cast (in.read(inBuffer + inPos, sizeof(inBuffer) - inPos) + inPos); + size_t outLength = sizeof(outBuffer); + + const char* inPtr = breakAfterNext ? NULL : inBuffer; + size_t *ptrLength = breakAfterNext ? NULL : &inLength; + char* outPtr = outBuffer; + + // Convert input bytes + if (iconv(cd, ICONV_HACK(&inPtr), ptrLength, + &outPtr, &outLength) == static_cast (-1)) + { + // Illegal input sequence or input sequence has no equivalent + // sequence in the destination charset. + if (prevIsInvalid) + { + // Write successfully converted bytes + out.write(outBuffer, sizeof(outBuffer) - outLength); + + // Output a special character to indicate we don't known how to + // convert the sequence at this position + outputInvalidChar(out, cd, m_options); + + // Skip a byte and leave unconverted bytes in the input buffer + std::copy(const_cast (inPtr + 1), inBuffer + sizeof(inBuffer), inBuffer); + inPos = inLength - 1; + } + else + { + // Write successfully converted bytes + out.write(outBuffer, sizeof(outBuffer) - outLength); + + // Leave unconverted bytes in the input buffer + std::copy(const_cast (inPtr), inBuffer + sizeof(inBuffer), inBuffer); + inPos = inLength; + + if (errno != E2BIG) + prevIsInvalid = true; + } + } + else + { + // Write successfully converted bytes + out.write(outBuffer, sizeof(outBuffer) - outLength); + + inPos = 0; + prevIsInvalid = false; + } + + if (breakAfterNext) + break; + + // Check for end of data, loop again to flush stateful data from iconv + if (in.eof() && inPos == 0) + breakAfterNext = true; + } +} + + +void charsetConverter_iconv::convert(const string& in, string& out) +{ + if (m_source == m_dest) + { + // No conversion needed + out = in; + return; + } + + out.clear(); + + utility::inputStreamStringAdapter is(in); + utility::outputStreamStringAdapter os(out); + + convert(is, os); + + os.flush(); +} + + +ref charsetConverter_iconv::getFilteredOutputStream(utility::outputStream& os) +{ + return vmime::create (m_source, m_dest, &os); +} + + + +// charsetFilteredOutputStream_iconv + +namespace utility { + + +charsetFilteredOutputStream_iconv::charsetFilteredOutputStream_iconv + (const charset& source, const charset& dest, outputStream* os) + : m_desc(NULL), m_sourceCharset(source), m_destCharset(dest), + m_stream(*os), m_unconvCount(0) +{ + // Get an iconv descriptor + const iconv_t cd = iconv_open(dest.getName().c_str(), source.getName().c_str()); + + if (cd != reinterpret_cast (-1)) + { + iconv_t* p = new iconv_t; + *p= cd; + + m_desc = p; + } +} + + +charsetFilteredOutputStream_iconv::~charsetFilteredOutputStream_iconv() +{ + if (m_desc != NULL) + { + // Close iconv handle + iconv_close(*static_cast (m_desc)); + + delete static_cast (m_desc); + m_desc = NULL; + } +} + + +outputStream& charsetFilteredOutputStream_iconv::getNextOutputStream() +{ + return m_stream; +} + + +void charsetFilteredOutputStream_iconv::write + (const value_type* const data, const size_type count) +{ + if (m_desc == NULL) + throw exceptions::charset_conv_error("Cannot initialize converter."); + + const iconv_t cd = *static_cast (m_desc); + + const value_type* curData = data; + size_type curDataLen = count; + + // If there is some unconverted bytes left, add more data from this + // chunk to see if it can now be converted. + while (m_unconvCount != 0 || curDataLen != 0) + { + if (m_unconvCount != 0) + { + // Check if an incomplete input sequence is larger than the + // input buffer size: should not happen except if something + // in the input sequence is invalid. If so, output a special + // character and skip one byte in the invalid sequence. + if (m_unconvCount >= sizeof(m_unconvBuffer)) + { + outputInvalidChar(m_stream, cd); + + std::copy(m_unconvBuffer + 1, + m_unconvBuffer + m_unconvCount, m_unconvBuffer); + + m_unconvCount--; + } + + // Get more data + const size_type remaining = + std::min(curDataLen, sizeof(m_unconvBuffer) - m_unconvCount); + + std::copy(curData, curData + remaining, m_unconvBuffer + m_unconvCount); + + m_unconvCount += remaining; + curDataLen -= remaining; + curData += remaining; + + if (remaining == 0) + return; // no more data + + // Try a conversion + const char* inPtr = m_unconvBuffer; + size_t inLength = m_unconvCount; + char* outPtr = m_outputBuffer; + size_t outLength = sizeof(m_outputBuffer); + + const size_t inLength0 = inLength; + + if (iconv(cd, ICONV_HACK(&inPtr), &inLength, &outPtr, &outLength) == static_cast (-1)) + { + const size_t inputConverted = inLength0 - inLength; + + // Write successfully converted bytes + m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength); + + // Shift unconverted bytes + std::copy(m_unconvBuffer + inputConverted, + m_unconvBuffer + m_unconvCount, m_unconvBuffer); + + m_unconvCount -= inputConverted; + + continue; + } + + // Write successfully converted bytes + m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength); + + // Empty the unconverted buffer + m_unconvCount = 0; + } + + if (curDataLen == 0) + return; // no more data + + // Now, convert the current data buffer + const char* inPtr = curData; + size_t inLength = std::min(curDataLen, sizeof(m_outputBuffer) / MAX_CHARACTER_WIDTH); + char* outPtr = m_outputBuffer; + size_t outLength = sizeof(m_outputBuffer); + + const size_t inLength0 = inLength; + + if (iconv(cd, ICONV_HACK(&inPtr), &inLength, &outPtr, &outLength) == static_cast (-1)) + { + // Write successfully converted bytes + m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength); + + const size_t inputConverted = inLength0 - inLength; + + curData += inputConverted; + curDataLen -= inputConverted; + + // Put one byte byte into the unconverted buffer so + // that the next iteration fill it + if (curDataLen != 0) + { + m_unconvCount = 1; + m_unconvBuffer[0] = *curData; + + curData++; + curDataLen--; + } + } + else + { + // Write successfully converted bytes + m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength); + + curData += inLength0; + curDataLen -= inLength0; + } + } +} + + +void charsetFilteredOutputStream_iconv::flush() +{ + if (m_desc == NULL) + throw exceptions::charset_conv_error("Cannot initialize converter."); + + const iconv_t cd = *static_cast (m_desc); + + size_t offset = 0; + + // Process unconverted bytes + while (m_unconvCount != 0) + { + // Try a conversion + const char* inPtr = m_unconvBuffer + offset; + size_t inLength = m_unconvCount; + char* outPtr = m_outputBuffer; + size_t outLength = sizeof(m_outputBuffer); + + const size_t inLength0 = inLength; + + if (iconv(cd, ICONV_HACK(&inPtr), &inLength, &outPtr, &outLength) == static_cast (-1)) + { + const size_t inputConverted = inLength0 - inLength; + + // Skip a "blocking" character + if (inputConverted == 0) + { + outputInvalidChar(m_stream, cd); + + offset++; + m_unconvCount--; + } + else + { + // Write successfully converted bytes + m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength); + + offset += inputConverted; + m_unconvCount -= inputConverted; + } + } + else + { + // Write successfully converted bytes + m_stream.write(m_outputBuffer, sizeof(m_outputBuffer) - outLength); + + m_unconvCount = 0; + } + } + + m_stream.flush(); +} + + +} // utility + + +} // vmime diff --git a/src/charsetConverter_idna.cpp b/src/charsetConverter_idna.cpp new file mode 100644 index 00000000..cde2209a --- /dev/null +++ b/src/charsetConverter_idna.cpp @@ -0,0 +1,168 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#include "vmime/charsetConverter_idna.hpp" + +#include "vmime/exception.hpp" + +#include "vmime/utility/stringUtils.hpp" +#include "vmime/utility/streamUtils.hpp" +#include "vmime/utility/outputStreamStringAdapter.hpp" + + +extern "C" +{ + +#include "contrib/punycode/punycode.h" +#include "contrib/punycode/punycode.c" + +} + +#include "contrib/utf8/utf8.h" + + +namespace vmime +{ + + +charsetConverter_idna::charsetConverter_idna + (const charset& source, const charset& dest, const charsetConverterOptions& opts) + : m_source(source), m_dest(dest), m_options(opts) +{ +} + + +charsetConverter_idna::~charsetConverter_idna() +{ +} + + +void charsetConverter_idna::convert(utility::inputStream& in, utility::outputStream& out) +{ + // IDNA should be used for short strings, so it does not matter if we + // do not work directly on the stream + string inStr; + vmime::utility::outputStreamStringAdapter os(inStr); + vmime::utility::bufferedStreamCopy(in, os); + + string outStr; + convert(inStr, outStr); + + out << outStr; +} + + +void charsetConverter_idna::convert(const string& in, string& out) +{ + if (m_source == m_dest) + { + // No conversion needed + out = in; + return; + } + + out.clear(); + + if (m_dest == "idna") + { + if (utility::stringUtils::is7bit(in)) + { + // No need to encode as Punycode + out = in; + return; + } + + string inUTF8; + charset::convert(in, inUTF8, m_source, vmime::charsets::UTF_8); + + const string::value_type* ch = inUTF8.c_str(); + const string::value_type* end = inUTF8.c_str() + inUTF8.length(); + + std::vector unichars; + unichars.reserve(inUTF8.length()); + + while (ch < end) + { + const utf8::uint32_t uc = utf8::unchecked::next(ch); + unichars.push_back(uc); + } + + std::vector output(inUTF8.length() * 2); + punycode_uint outputLen = output.size(); + + const punycode_status status = punycode_encode + (unichars.size(), &unichars[0], /* case_flags */ NULL, &outputLen, &output[0]); + + if (status == punycode_success) + { + out = string("xn--") + string(output.begin(), output.begin() + outputLen); + } + else + { + // TODO + } + } + else if (m_source == "idna") + { + if (in.length() < 5 || in.substr(0, 4) != "xn--") + { + // Not an IDNA string + out = in; + return; + } + + std::vector output(in.length() - 4); + punycode_uint outputLen = output.size(); + + const punycode_status status = punycode_decode + (in.length() - 4, &in[4], &outputLen, &output[0], /* case_flags */ NULL); + + if (status == punycode_success) + { + std::vector outUTF8Bytes(outputLen * 4); + string::value_type* p = &outUTF8Bytes[0]; + + for (std::vector ::const_iterator it = output.begin() ; + it != output.begin() + outputLen ; ++it) + { + p = utf8::unchecked::append(*it, p); + } + + string outUTF8(&outUTF8Bytes[0], p); + charset::convert(outUTF8, out, vmime::charsets::UTF_8, m_dest); + } + else + { + // TODO + } + } +} + + +ref charsetConverter_idna::getFilteredOutputStream(utility::outputStream& /* os */) +{ + return NULL; +} + + +} // vmime diff --git a/src/component.cpp b/src/component.cpp index f2d34093..b102b45d 100644 --- a/src/component.cpp +++ b/src/component.cpp @@ -56,6 +56,15 @@ void component::parse void component::parse (ref inputStream, const utility::stream::size_type position, const utility::stream::size_type end, utility::stream::size_type* newPosition) +{ + parse(parsingContext::getDefaultContext(), inputStream, position, end, newPosition); +} + + +void component::parse + (const parsingContext& ctx, + ref inputStream, const utility::stream::size_type position, + const utility::stream::size_type end, utility::stream::size_type* newPosition) { m_parsedOffset = m_parsedLength = 0; @@ -71,14 +80,14 @@ void component::parse utility::bufferedStreamCopyRange(*inputStream, ossAdapter, position, end - position); const string buffer = oss.str(); - parseImpl(buffer, 0, buffer.length(), NULL); + parseImpl(ctx, buffer, 0, buffer.length(), NULL); } else { ref parser = vmime::create (seekableStream); - parseImpl(parser, position, end, newPosition); + parseImpl(ctx, parser, position, end, newPosition); } } @@ -87,7 +96,15 @@ void component::parse(const string& buffer) { m_parsedOffset = m_parsedLength = 0; - parseImpl(buffer, 0, buffer.length(), NULL); + parseImpl(parsingContext::getDefaultContext(), buffer, 0, buffer.length(), NULL); +} + + +void component::parse(const parsingContext& ctx, const string& buffer) +{ + m_parsedOffset = m_parsedLength = 0; + + parseImpl(ctx, buffer, 0, buffer.length(), NULL); } @@ -97,7 +114,18 @@ void component::parse { m_parsedOffset = m_parsedLength = 0; - parseImpl(buffer, position, end, newPosition); + parseImpl(parsingContext::getDefaultContext(), buffer, position, end, newPosition); +} + + +void component::parse + (const parsingContext& ctx, + const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) +{ + m_parsedOffset = m_parsedLength = 0; + + parseImpl(ctx, buffer, position, end, newPosition); } @@ -116,11 +144,14 @@ void component::offsetParsedBounds(const utility::stream::size_type offset) void component::parseImpl - (ref parser, const utility::stream::size_type position, + (const parsingContext& ctx, ref parser, + const utility::stream::size_type position, const utility::stream::size_type end, utility::stream::size_type* newPosition) { + // This is the default implementation for parsing from an input stream: + // actually, we extract the substring and use the "parse from string" implementation const std::string buffer = parser->extract(position, end); - parseImpl(buffer, 0, buffer.length(), newPosition); + parseImpl(ctx, buffer, 0, buffer.length(), newPosition); // Recursivey offset parsed bounds on children if (position != 0) @@ -132,16 +163,19 @@ void component::parseImpl void component::parseImpl - (const string& buffer, const string::size_type position, + (const parsingContext& ctx, const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition) { + // This is the default implementation for parsing from a string: + // actually, we encapsulate the string buffer in an input stream, then use + // the "parse from input stream" implementation ref stream = vmime::create (buffer); ref parser = vmime::create (stream); - parseImpl(parser, position, end, newPosition); + parseImpl(ctx, parser, position, end, newPosition); } @@ -151,7 +185,10 @@ const string component::generate(const string::size_type maxLineLength, std::ostringstream oss; utility::outputStreamAdapter adapter(oss); - generate(adapter, maxLineLength, curLinePos, NULL); + generationContext ctx(generationContext::getDefaultContext()); + ctx.setMaxLineLength(maxLineLength); + + generateImpl(ctx, adapter, curLinePos, NULL); return (oss.str()); } @@ -159,21 +196,21 @@ const string component::generate(const string::size_type maxLineLength, void component::generate (utility::outputStream& os, - const string::size_type maxLineLength, const string::size_type curLinePos, string::size_type* newLinePos) const { - generateImpl(os, maxLineLength, curLinePos, newLinePos); + generateImpl(generationContext::getDefaultContext(), + os, curLinePos, newLinePos); } void component::generate - (ref os, - const string::size_type maxLineLength, + (const generationContext& ctx, + utility::outputStream& outputStream, const string::size_type curLinePos, string::size_type* newLinePos) const { - generateImpl(*os, maxLineLength, curLinePos, newLinePos); + generateImpl(ctx, outputStream, curLinePos, newLinePos); } diff --git a/src/constants.cpp b/src/constants.cpp index 9ce7189b..551d0a18 100644 --- a/src/constants.cpp +++ b/src/constants.cpp @@ -153,6 +153,8 @@ namespace charsets const string::value_type* const WINDOWS_1256 = "windows-1256"; const string::value_type* const WINDOWS_1257 = "windows-1257"; const string::value_type* const WINDOWS_1258 = "windows-1258"; + + const string::value_type* const IDNA = "idna"; } diff --git a/src/contentDisposition.cpp b/src/contentDisposition.cpp index 401e9958..300d4ee3 100644 --- a/src/contentDisposition.cpp +++ b/src/contentDisposition.cpp @@ -47,8 +47,9 @@ contentDisposition::contentDisposition(const contentDisposition& type) } -void contentDisposition::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void contentDisposition::parseImpl + (const parsingContext& /* ctx */, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { m_name = utility::stringUtils::trim(utility::stringUtils::toLower (string(buffer.begin() + position, buffer.begin() + end))); @@ -60,8 +61,9 @@ void contentDisposition::parseImpl(const string& buffer, const string::size_type } -void contentDisposition::generateImpl(utility::outputStream& os, const string::size_type /* maxLineLength */, - const string::size_type curLinePos, string::size_type* newLinePos) const +void contentDisposition::generateImpl + (const generationContext& /* ctx */, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { os << m_name; diff --git a/src/context.cpp b/src/context.cpp new file mode 100644 index 00000000..07fe4875 --- /dev/null +++ b/src/context.cpp @@ -0,0 +1,87 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#include "vmime/context.hpp" + + +namespace vmime +{ + + +context::context() + : m_internationalizedEmail(false) +{ +} + + +context::context(const context& ctx) + : object(), + m_internationalizedEmail(ctx.m_internationalizedEmail) +{ +} + + +context::~context() +{ +} + + +bool context::getInternationalizedEmailSupport() const +{ + return m_internationalizedEmail; +} + + +void context::setInternationalizedEmailSupport(const bool support) +{ + m_internationalizedEmail = support; +} + + +const charsetConverterOptions& context::getCharsetConversionOptions() const +{ + return m_charsetConvOptions; +} + + +void context::setCharsetConversionOptions(const charsetConverterOptions& opts) +{ + m_charsetConvOptions = opts; +} + + +context& context::operator=(const context& ctx) +{ + copyFrom(ctx); + return *this; +} + + +void context::copyFrom(const context& ctx) +{ + m_internationalizedEmail = ctx.m_internationalizedEmail; + m_charsetConvOptions = ctx.m_charsetConvOptions; +} + + +} // vmime diff --git a/src/dateTime.cpp b/src/dateTime.cpp index f98d7c64..eaf955c3 100644 --- a/src/dateTime.cpp +++ b/src/dateTime.cpp @@ -68,8 +68,9 @@ zone = "UT" / "GMT" ; Universal Time */ -void datetime::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void datetime::parseImpl + (const parsingContext& /* ctx */, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { const string::value_type* const pend = buffer.data() + end; const string::value_type* p = buffer.data() + position; @@ -589,8 +590,9 @@ void datetime::parseImpl(const string& buffer, const string::size_type position, } -void datetime::generateImpl(utility::outputStream& os, const string::size_type /* maxLineLength */, - const string::size_type curLinePos, string::size_type* newLinePos) const +void datetime::generateImpl + (const generationContext& /* ctx */, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { static const string::value_type* dayNames[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" }; diff --git a/src/disposition.cpp b/src/disposition.cpp index 7a31ed8c..c5da6e30 100644 --- a/src/disposition.cpp +++ b/src/disposition.cpp @@ -171,8 +171,9 @@ const std::vector disposition::getModifierList() const } -void disposition::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void disposition::parseImpl + (const parsingContext& /* ctx */, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { // disposition-mode ";" disposition-type // [ "/" disposition-modifier *( "," disposition-modifier ) ] @@ -276,8 +277,9 @@ void disposition::parseImpl(const string& buffer, const string::size_type positi } -void disposition::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const +void disposition::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { string::size_type pos = curLinePos; @@ -287,7 +289,7 @@ void disposition::generateImpl(utility::outputStream& os, const string::size_typ os << actionMode << "/" << sendingMode << ";"; pos += actionMode.length() + 1 + sendingMode.length() + 1; - if (pos > maxLineLength) + if (pos > ctx.getMaxLineLength()) { os << NEW_LINE_SEQUENCE; pos = NEW_LINE_SEQUENCE_LENGTH; diff --git a/src/emailAddress.cpp b/src/emailAddress.cpp new file mode 100644 index 00000000..09d08780 --- /dev/null +++ b/src/emailAddress.cpp @@ -0,0 +1,513 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#include "vmime/emailAddress.hpp" + +#include "vmime/platform.hpp" + +#include "vmime/parserHelpers.hpp" +#include "vmime/utility/outputStreamStringAdapter.hpp" +#include "vmime/utility/stringUtils.hpp" + + +namespace vmime +{ + + +emailAddress::emailAddress() +{ +} + + +emailAddress::emailAddress(const emailAddress& eml) + : component(), m_localName(eml.m_localName), m_domainName(eml.m_domainName) +{ +} + + +emailAddress::emailAddress(const string& email) +{ + parse(email); +} + + +emailAddress::emailAddress(const char* email) +{ + parse(email); +} + + +emailAddress::emailAddress(const string& localName, const string& domainName) + : component(), m_localName(word(localName, vmime::charsets::UTF_8)), + m_domainName(word(domainName, vmime::charsets::UTF_8)) +{ +} + + +emailAddress::emailAddress(const word& localName, const word& domainName) + : component(), m_localName(localName), m_domainName(domainName) +{ +} + + +void emailAddress::parseImpl + (const parsingContext& /* ctx */, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) +{ + const string::value_type* const pend = buffer.data() + end; + const string::value_type* const pstart = buffer.data() + position; + const string::value_type* p = pstart; + + enum ParserStates + { + State_Before, + State_LocalPartStart, + State_LocalPartMiddle, + State_LocalPartComment, + State_LocalPartQuoted, + State_DomainPartStart, + State_DomainPartMiddle, + State_DomainPartComment, + State_End, + State_Error + } state = State_Before; + + std::ostringstream localPart; + std::ostringstream domainPart; + + bool escapeNext = false; // for quoting + bool prevIsDot = false; + bool atFound = false; + bool stop = false; + int commentLevel = 0; + + while (p < pend && !stop) + { + const string::value_type c = *p; + + if ((localPart.str().length() + domainPart.str().length()) >= 256) + { + state = State_Error; + break; + } + + switch (state) + { + case State_Before: + + if (parserHelpers::isSpace(c)) + ++p; + else + state = State_LocalPartStart; + + case State_LocalPartStart: + + if (c == '"') + { + state = State_LocalPartQuoted; + ++p; + } + else if (c == '(') + { + state = State_LocalPartComment; + ++commentLevel; + ++p; + } + else + { + state = State_LocalPartMiddle; + localPart << c; + ++p; + } + + break; + + case State_LocalPartComment: + + if (escapeNext) + { + escapeNext = false; + ++p; + } + else if (c == '\\') + { + escapeNext = true; + ++p; + } + else if (c == '(') + { + ++commentLevel; + ++p; + } + else if (c == ')') + { + if (--commentLevel == 0) + { + // End of comment + state = State_LocalPartMiddle; + } + + ++p; + } + else + { + // Comment continues + ++p; + } + + break; + + case State_LocalPartQuoted: + + if (escapeNext) + { + escapeNext = false; + + if (c == '"' || c == '\\') + { + localPart << c; + ++p; + } + else + { + // This char cannot be escaped + state = State_Error; + } + } + else if (c == '"') + { + // End of quoted string + state = State_LocalPartMiddle; + ++p; + } + else if (c == '\\') + { + escapeNext = true; + ++p; + } + else + { + localPart << c; + ++p; + } + + break; + + case State_LocalPartMiddle: + + if (c == '.') + { + prevIsDot = true; + localPart << c; + ++p; + } + else if (c == '"' && prevIsDot) + { + prevIsDot = false; + state = State_LocalPartQuoted; + ++p; + } + else if (c == '(') + { + // By allowing comments anywhere in the local part, + // we are more permissive than RFC-2822 + state = State_LocalPartComment; + ++commentLevel; + ++p; + } + else if (c == '@') + { + atFound = true; + state = State_DomainPartStart; + ++p; + } + else if (parserHelpers::isSpace(c)) + { + // Allow not specifying domain part + state = State_End; + } + else + { + prevIsDot = false; + localPart << c; + ++p; + } + + break; + + case State_DomainPartStart: + + if (c == '(') + { + state = State_DomainPartComment; + ++commentLevel; + ++p; + } + else + { + state = State_DomainPartMiddle; + domainPart << c; + ++p; + } + + break; + + case State_DomainPartMiddle: + + if (parserHelpers::isSpace(c)) + { + state = State_End; + } + else if (c == '(') + { + // By allowing comments anywhere in the domain part, + // we are more permissive than RFC-2822 + state = State_DomainPartComment; + ++commentLevel; + ++p; + } + else + { + domainPart << c; + ++p; + } + + break; + + case State_DomainPartComment: + + if (escapeNext) + { + escapeNext = false; + ++p; + } + else if (c == '\\') + { + escapeNext = true; + ++p; + } + else if (c == '(') + { + ++commentLevel; + ++p; + } + else if (c == ')') + { + if (--commentLevel == 0) + { + // End of comment + state = State_DomainPartMiddle; + } + + ++p; + } + else + { + // Comment continues + ++p; + } + + break; + + case State_End: + case State_Error: + + stop = true; + break; + } + } + + if (p == pend && state != State_Error) + { + if (state == State_DomainPartMiddle) + state = State_End; + else if (state == State_LocalPartMiddle) + state = State_End; // allow not specifying domain part + } + + if (state != State_End) + { + m_localName = word("invalid", vmime::charsets::UTF_8); + m_domainName = word("invalid", vmime::charsets::UTF_8); + } + else + { + // If the domain part is missing, use local host name + if (domainPart.str().empty() && !atFound) + domainPart << platform::getHandler()->getHostName(); + + m_localName = word(localPart.str(), vmime::charsets::UTF_8); + m_domainName = word(domainPart.str(), vmime::charsets::UTF_8); + } + + setParsedBounds(position, p - pend); + + if (newPosition) + *newPosition = p - pend; +} + + +static const string domainNameToIDNA(const string& domainName) +{ + std::ostringstream idnaDomain; + string::size_type p = 0; + + for (string::size_type n = domainName.find('.', p) ; + (n = domainName.find('.', p)) != string::npos ; p = n + 1) + { + string idnaPart; + charset::convert(string(domainName.begin() + p, domainName.begin() + n), + idnaPart, vmime::charsets::UTF_8, vmime::charsets::IDNA); + + idnaDomain << idnaPart << '.'; + } + + if (p < domainName.length()) + { + string idnaPart; + charset::convert(string(domainName.begin() + p, domainName.end()), + idnaPart, vmime::charsets::UTF_8, vmime::charsets::IDNA); + + idnaDomain << idnaPart; + } + + return idnaDomain.str(); +} + + +void emailAddress::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const +{ + string localPart, domainPart; + + if (ctx.getInternationalizedEmailSupport() && + (!utility::stringUtils::is7bit(m_localName.getBuffer()) || + !utility::stringUtils::is7bit(m_domainName.getBuffer()))) + { + // Local part + string localPartUTF8(m_localName.getConvertedText(vmime::charsets::UTF_8)); + word localPartWord(localPartUTF8, vmime::charsets::UTF_8); + + vmime::utility::outputStreamStringAdapter os(localPart); + localPartWord.generate(ctx, os, 0, NULL, text::FORCE_NO_ENCODING | text::QUOTE_IF_NEEDED, NULL); + + // Domain part + domainPart = m_domainName.getConvertedText(vmime::charsets::UTF_8); + } + else + { + // Local part + vmime::utility::outputStreamStringAdapter os(localPart); + m_localName.generate(ctx, os, 0, NULL, text::QUOTE_IF_NEEDED, NULL); + + // Domain part as IDNA + domainPart = domainNameToIDNA(m_domainName.getConvertedText(vmime::charsets::UTF_8)); + } + + os << localPart + << "@" + << domainPart; + + if (newLinePos) + { + *newLinePos = curLinePos + + localPart.length() + + 1 // @ + + domainPart.length(); + } +} + + +bool emailAddress::operator==(const class emailAddress& eml) const +{ + return (m_localName == eml.m_localName && + m_domainName == eml.m_domainName); +} + + +bool emailAddress::operator!=(const class emailAddress& eml) const +{ + return !(*this == eml); +} + + +void emailAddress::copyFrom(const component& other) +{ + const emailAddress& source = dynamic_cast (other); + + m_localName = source.m_localName; + m_domainName = source.m_domainName; +} + + +emailAddress& emailAddress::operator=(const emailAddress& other) +{ + copyFrom(other); + return (*this); +} + + +ref emailAddress::clone() const +{ + return vmime::create (*this); +} + + +const word& emailAddress::getLocalName() const +{ + return m_localName; +} + + +void emailAddress::setLocalName(const word& localName) +{ + m_localName = localName; +} + + +const word& emailAddress::getDomainName() const +{ + return m_domainName; +} + + +void emailAddress::setDomainName(const word& domainName) +{ + m_domainName = domainName; +} + + +const std::vector > emailAddress::getChildComponents() +{ + return std::vector >(); +} + + +bool emailAddress::isEmpty() const +{ + return m_localName.isEmpty(); +} + + +} // vmime diff --git a/src/encoding.cpp b/src/encoding.cpp index 49d78b75..53f88531 100644 --- a/src/encoding.cpp +++ b/src/encoding.cpp @@ -61,8 +61,9 @@ encoding::encoding(const encoding& enc) } -void encoding::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void encoding::parseImpl + (const parsingContext& /* ctx */, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { m_usage = USAGE_UNKNOWN; @@ -80,8 +81,9 @@ void encoding::parseImpl(const string& buffer, const string::size_type position, } -void encoding::generateImpl(utility::outputStream& os, const string::size_type /* maxLineLength */, - const string::size_type curLinePos, string::size_type* newLinePos) const +void encoding::generateImpl + (const generationContext& /* ctx */, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { os << m_name; diff --git a/src/generationContext.cpp b/src/generationContext.cpp new file mode 100644 index 00000000..0f19e623 --- /dev/null +++ b/src/generationContext.cpp @@ -0,0 +1,109 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#include "vmime/generationContext.hpp" + + +namespace vmime +{ + + +generationContext::generationContext() + : m_maxLineLength(lineLengthLimits::convenient), + m_prologText("This is a multi-part message in MIME format. Your mail reader " \ + "does not understand MIME message format."), + m_epilogText("") +{ +} + + +generationContext::generationContext(const generationContext& ctx) + : context(ctx), + m_maxLineLength(ctx.m_maxLineLength), + m_prologText(ctx.m_prologText), + m_epilogText(ctx.m_epilogText) +{ +} + + +generationContext& generationContext::getDefaultContext() +{ + static generationContext ctx; + return ctx; +} + + +string::size_type generationContext::getMaxLineLength() const +{ + return m_maxLineLength; +} + + +void generationContext::setMaxLineLength(const string::size_type maxLineLength) +{ + m_maxLineLength = maxLineLength; +} + + +const string generationContext::getPrologText() const +{ + return m_prologText; +} + + +void generationContext::setPrologText(const string& prologText) +{ + m_prologText = prologText; +} + + +const string generationContext::getEpilogText() const +{ + return m_epilogText; +} + + +void generationContext::setEpilogText(const string& epilogText) +{ + m_epilogText = epilogText; +} + + +generationContext& generationContext::operator=(const generationContext& ctx) +{ + copyFrom(ctx); + return *this; +} + + +void generationContext::copyFrom(const generationContext& ctx) +{ + context::copyFrom(ctx); + + m_maxLineLength = ctx.m_maxLineLength; + m_prologText = ctx.m_prologText; + m_epilogText = ctx.m_epilogText; +} + + +} // vmime diff --git a/src/header.cpp b/src/header.cpp index 6543a302..d1896d96 100644 --- a/src/header.cpp +++ b/src/header.cpp @@ -61,8 +61,9 @@ field-body-contents = specials tokens, or else consisting of texts> */ -void header::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void header::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { string::size_type pos = position; @@ -70,7 +71,7 @@ void header::parseImpl(const string& buffer, const string::size_type position, while (pos < end) { - ref field = headerField::parseNext(buffer, pos, end, &pos); + ref field = headerField::parseNext(ctx, buffer, pos, end, &pos); if (field == NULL) break; m_fields.push_back(field); @@ -83,14 +84,15 @@ void header::parseImpl(const string& buffer, const string::size_type position, } -void header::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type /* curLinePos */, string::size_type* newLinePos) const +void header::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type /* curLinePos */, string::size_type* newLinePos) const { // Generate the fields for (std::vector >::const_iterator it = m_fields.begin() ; it != m_fields.end() ; ++it) { - (*it)->generate(os, maxLineLength); + (*it)->generate(ctx, os); os << CRLF; } diff --git a/src/headerField.cpp b/src/headerField.cpp index 3d0f8834..0a17abac 100644 --- a/src/headerField.cpp +++ b/src/headerField.cpp @@ -73,8 +73,9 @@ headerField& headerField::operator=(const headerField& other) } -ref headerField::parseNext(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +ref headerField::parseNext + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { string::size_type pos = position; @@ -215,7 +216,7 @@ ref headerField::parseNext(const string& buffer, const string::siz // Return a new field ref field = headerFieldFactory::getInstance()->create(name); - field->parse(buffer, contentsStart, contentsEnd, NULL); + field->parse(ctx, buffer, contentsStart, contentsEnd, NULL); field->setParsedBounds(nameStart, pos); if (newPosition) @@ -262,19 +263,21 @@ ref headerField::parseNext(const string& buffer, const string::siz } -void headerField::parseImpl(const string& buffer, const string::size_type position, const string::size_type end, - string::size_type* newPosition) +void headerField::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { - m_value->parse(buffer, position, end, newPosition); + m_value->parse(ctx, buffer, position, end, newPosition); } -void headerField::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const +void headerField::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { os << m_name + ": "; - m_value->generate(os, maxLineLength, curLinePos + m_name.length() + 2, newLinePos); + m_value->generate(ctx, os, curLinePos + m_name.length() + 2, newLinePos); } diff --git a/src/mailbox.cpp b/src/mailbox.cpp index a9d18958..1c199a76 100644 --- a/src/mailbox.cpp +++ b/src/mailbox.cpp @@ -23,6 +23,7 @@ #include "vmime/mailbox.hpp" #include "vmime/parserHelpers.hpp" +#include "vmime/utility/outputStreamStringAdapter.hpp" namespace vmime @@ -40,13 +41,13 @@ mailbox::mailbox(const mailbox& mbox) } -mailbox::mailbox(const string& email) +mailbox::mailbox(const emailAddress& email) : m_email(email) { } -mailbox::mailbox(const text& name, const string& email) +mailbox::mailbox(const text& name, const emailAddress& email) : m_name(name), m_email(email) { } @@ -65,8 +66,9 @@ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr */ -void mailbox::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void mailbox::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { const string::value_type* const pend = buffer.data() + end; const string::value_type* const pstart = buffer.data() + position; @@ -313,27 +315,13 @@ void mailbox::parseImpl(const string& buffer, const string::size_type position, // (email address is mandatory, whereas name is optional). if (address.empty() && !name.empty() && !hadBrackets) { - m_email.clear(); - m_email.reserve(name.size()); m_name.removeAllWords(); - - for (string::size_type i = 0 ; i < name.size() ; ++i) - { - if (!parserHelpers::isSpace(name[i])) - m_email += name[i]; - } + m_email.parse(ctx, name); } else { - text::decodeAndUnfold(name, &m_name); - m_email.clear(); - m_email.reserve(address.size()); - - for (string::size_type i = 0 ; i < address.size() ; ++i) - { - if (!parserHelpers::isSpace(address[i])) - m_email += address[i]; - } + text::decodeAndUnfold(ctx, name, &m_name); + m_email.parse(ctx, address); } setParsedBounds(position, position + (p - pstart)); @@ -343,28 +331,30 @@ void mailbox::parseImpl(const string& buffer, const string::size_type position, } -void mailbox::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const +void mailbox::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { + string generatedEmail; + utility::outputStreamStringAdapter generatedEmailStream(generatedEmail); + m_email.generate(ctx, generatedEmailStream, 0, NULL); + if (m_name.isEmpty()) { - bool newLine = false; + string::size_type pos = curLinePos; // No display name is specified, only email address. - if (curLinePos /* + 2 */ + m_email.length() > maxLineLength) + if (curLinePos + generatedEmail.length() > ctx.getMaxLineLength()) { os << NEW_LINE_SEQUENCE; - newLine = true; + pos = NEW_LINE_SEQUENCE.length(); } - //os << "<" << m_email << ">"; - os << m_email; + os << generatedEmail; + pos += generatedEmail.length(); if (newLinePos) - { - *newLinePos = curLinePos + m_email.length() /* + 2 */; - if (newLine) *newLinePos += 1; - } + *newLinePos = pos; } else { @@ -415,24 +405,21 @@ void mailbox::generateImpl(utility::outputStream& os, const string::size_type ma } string::size_type pos = curLinePos; - bool newLine = true; - m_name.encodeAndFold(os, maxLineLength, pos, &pos, + m_name.encodeAndFold(ctx, os, pos, &pos, text::QUOTE_IF_POSSIBLE | (forceEncode ? text::FORCE_ENCODING : 0)); - if (pos + m_email.length() + 3 > maxLineLength) + if (pos + generatedEmail.length() + 3 > ctx.getMaxLineLength()) { os << NEW_LINE_SEQUENCE; - newLine = true; + pos = NEW_LINE_SEQUENCE.length(); } - os << " <" << m_email << ">"; + os << " <" << generatedEmail << ">"; + pos += 2 + generatedEmail.length() + 1; if (newLinePos) - { - *newLinePos = pos + m_email.length() + 3; - if (newLine) *newLinePos += NEW_LINE_SEQUENCE.length(); - } + *newLinePos = pos; } } @@ -473,14 +460,14 @@ ref mailbox::clone() const bool mailbox::isEmpty() const { - return (m_email.empty()); + return m_email.isEmpty(); } void mailbox::clear() { m_name.removeAllWords(); - m_email.clear(); + m_email = emailAddress(); } @@ -502,13 +489,13 @@ void mailbox::setName(const text& name) } -const string& mailbox::getEmail() const +const emailAddress& mailbox::getEmail() const { return (m_email); } -void mailbox::setEmail(const string& email) +void mailbox::setEmail(const emailAddress& email) { m_email = email; } diff --git a/src/mailboxField.cpp b/src/mailboxField.cpp index c3c5214e..1f11f49c 100644 --- a/src/mailboxField.cpp +++ b/src/mailboxField.cpp @@ -43,15 +43,16 @@ mailboxField::mailboxField(const mailboxField&) } -void mailboxField::parse(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void mailboxField::parse + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { ref mbox = vmime::create (); // Here, we cannot simply call "m_mailbox.parse()" because it // may have more than one address specified (even if this field // should contain only one). We are never too much careful... - ref
parsedAddress = address::parseNext(buffer, position, end, newPosition); + ref
parsedAddress = address::parseNext(ctx, buffer, position, end, newPosition); if (parsedAddress) { diff --git a/src/mailboxGroup.cpp b/src/mailboxGroup.cpp index 251f920b..65611b33 100644 --- a/src/mailboxGroup.cpp +++ b/src/mailboxGroup.cpp @@ -54,8 +54,9 @@ mailboxGroup::~mailboxGroup() } -void mailboxGroup::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void mailboxGroup::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { const string::value_type* const pend = buffer.data() + end; const string::value_type* const pstart = buffer.data() + position; @@ -80,7 +81,7 @@ void mailboxGroup::parseImpl(const string& buffer, const string::size_type posit while (pos < end) { - ref
parsedAddress = address::parseNext(buffer, pos, end, &pos); + ref
parsedAddress = address::parseNext(ctx, buffer, pos, end, &pos); if (parsedAddress) { @@ -102,7 +103,7 @@ void mailboxGroup::parseImpl(const string& buffer, const string::size_type posit } } - text::decodeAndUnfold(name, &m_name); + text::decodeAndUnfold(ctx, name, &m_name); setParsedBounds(position, end); @@ -111,8 +112,9 @@ void mailboxGroup::parseImpl(const string& buffer, const string::size_type posit } -void mailboxGroup::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const +void mailboxGroup::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { // We have to encode the name: // - if it contains characters in a charset different from "US-ASCII", @@ -156,7 +158,10 @@ void mailboxGroup::generateImpl(utility::outputStream& os, const string::size_ty string::size_type pos = curLinePos; - m_name.encodeAndFold(os, maxLineLength - 2, pos, &pos, + generationContext tmpCtx(ctx); + tmpCtx.setMaxLineLength(ctx.getMaxLineLength() - 2); + + m_name.encodeAndFold(ctx, os, pos, &pos, forceEncode ? text::FORCE_ENCODING : 0); os << ":"; @@ -176,7 +181,7 @@ void mailboxGroup::generateImpl(utility::outputStream& os, const string::size_ty ++pos; } - (*it)->generate(os, maxLineLength - 2, pos, &pos); + (*it)->generate(tmpCtx, os, pos, &pos); } os << ";"; diff --git a/src/mailboxList.cpp b/src/mailboxList.cpp index b3106fb8..03a225f8 100644 --- a/src/mailboxList.cpp +++ b/src/mailboxList.cpp @@ -196,17 +196,18 @@ const std::vector > mailboxList::getChildComponents() } -void mailboxList::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void mailboxList::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { - m_list.parse(buffer, position, end, newPosition); + m_list.parse(ctx, buffer, position, end, newPosition); } -void mailboxList::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, +void mailboxList::generateImpl(const generationContext& ctx, utility::outputStream& os, const string::size_type curLinePos, string::size_type* newLinePos) const { - m_list.generate(os, maxLineLength, curLinePos, newLinePos); + m_list.generate(ctx, os, curLinePos, newLinePos); } diff --git a/src/mdn/MDNHelper.cpp b/src/mdn/MDNHelper.cpp index 533813b1..a0d48599 100644 --- a/src/mdn/MDNHelper.cpp +++ b/src/mdn/MDNHelper.cpp @@ -269,7 +269,7 @@ ref MDNHelper::createSecondMDNPart(const sendableMDNInfos& mdnInfos, ref fr = headerFieldFactory::getInstance()-> create(vmime::fields::FINAL_RECIPIENT); - fr->setValue("rfc822; " + mdnInfos.getRecipient().getEmail()); + fr->setValue("rfc822; " + mdnInfos.getRecipient().getEmail().generate()); fields.appendField(fr); diff --git a/src/mediaType.cpp b/src/mediaType.cpp index 92e8d058..62d65c23 100644 --- a/src/mediaType.cpp +++ b/src/mediaType.cpp @@ -48,8 +48,9 @@ mediaType::mediaType(const string& type, const string& subType) } -void mediaType::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void mediaType::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { const string::value_type* const pend = buffer.data() + end; const string::value_type* const pstart = buffer.data() + position; @@ -82,12 +83,13 @@ void mediaType::parseImpl(const string& buffer, const string::size_type position } -void mediaType::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const +void mediaType::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { const string value = m_type + "/" + m_subType; - if (curLinePos + value.length() > maxLineLength) + if (curLinePos + value.length() > ctx.getMaxLineLength()) { os << NEW_LINE_SEQUENCE; os << value; diff --git a/src/message.cpp b/src/message.cpp index c52d54c2..75016c36 100644 --- a/src/message.cpp +++ b/src/message.cpp @@ -22,7 +22,6 @@ // #include "vmime/message.hpp" -#include "vmime/options.hpp" #include "vmime/utility/outputStreamAdapter.hpp" @@ -38,37 +37,10 @@ message::message() } -void message::generate(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const -{ - // We override this function to change the default value for the - // "maxLineLength" parameter. So, the user will just have to call - // message::generate() without any argument to use the maximum line - // length specified in vmime::options... - bodyPart::generate(os, maxLineLength, curLinePos, newLinePos); -} - - const string message::generate(const string::size_type maxLineLength, const string::size_type curLinePos) const { - std::ostringstream oss; - utility::outputStreamAdapter adapter(oss); - - generate(adapter, maxLineLength, curLinePos, NULL); - - return (oss.str()); -} - - - -void message::generate - (ref os, - const string::size_type maxLineLength, - const string::size_type curLinePos, - string::size_type* newLinePos) const -{ - bodyPart::generate(os, maxLineLength, curLinePos, newLinePos); + return bodyPart::generate(maxLineLength, curLinePos); } diff --git a/src/messageId.cpp b/src/messageId.cpp index 3102294e..6b558e1b 100644 --- a/src/messageId.cpp +++ b/src/messageId.cpp @@ -61,8 +61,9 @@ messageId::messageId(const string& left, const string& right) msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] */ -void messageId::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void messageId::parseImpl + (const parsingContext& /* ctx */, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { const string::value_type* const pend = buffer.data() + end; const string::value_type* const pstart = buffer.data() + position; @@ -145,8 +146,9 @@ void messageId::parseImpl(const string& buffer, const string::size_type position } -ref messageId::parseNext(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +ref messageId::parseNext + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { string::size_type pos = position; @@ -161,7 +163,7 @@ ref messageId::parseNext(const string& buffer, const string::size_ty ++pos; ref mid = vmime::create (); - mid->parse(buffer, begin, pos, NULL); + mid->parse(ctx, buffer, begin, pos, NULL); if (newPosition != NULL) *newPosition = pos; @@ -185,12 +187,13 @@ const string messageId::getId() const } -void messageId::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const +void messageId::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { string::size_type pos = curLinePos; - if (curLinePos + m_left.length() + m_right.length() + 3 > maxLineLength) + if (curLinePos + m_left.length() + m_right.length() + 3 > ctx.getMaxLineLength()) { os << NEW_LINE_SEQUENCE; pos = NEW_LINE_SEQUENCE_LENGTH; diff --git a/src/messageIdSequence.cpp b/src/messageIdSequence.cpp index a255235a..99c96319 100644 --- a/src/messageIdSequence.cpp +++ b/src/messageIdSequence.cpp @@ -84,8 +84,9 @@ const std::vector > messageIdSequence::getChildComponents() } -void messageIdSequence::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void messageIdSequence::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { removeAllMessageIds(); @@ -93,7 +94,7 @@ void messageIdSequence::parseImpl(const string& buffer, const string::size_type while (pos < end) { - ref parsedMid = messageId::parseNext(buffer, pos, end, &pos); + ref parsedMid = messageId::parseNext(ctx, buffer, pos, end, &pos); if (parsedMid != NULL) m_list.push_back(parsedMid); @@ -106,16 +107,20 @@ void messageIdSequence::parseImpl(const string& buffer, const string::size_type } -void messageIdSequence::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const +void messageIdSequence::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { string::size_type pos = curLinePos; if (!m_list.empty()) { + generationContext tmpCtx(ctx); + tmpCtx.setMaxLineLength(ctx.getMaxLineLength() - 2); + for (std::vector >::const_iterator it = m_list.begin() ; ; ) { - (*it)->generate(os, maxLineLength - 2, pos, &pos); + (*it)->generate(ctx, os, pos, &pos); if (++it == m_list.end()) break; diff --git a/src/net/sendmail/sendmailTransport.cpp b/src/net/sendmail/sendmailTransport.cpp index 181d0d7f..dbbb55a8 100644 --- a/src/net/sendmail/sendmailTransport.cpp +++ b/src/net/sendmail/sendmailTransport.cpp @@ -152,11 +152,11 @@ void sendmailTransport::send args.push_back("-i"); args.push_back("-f"); - args.push_back(expeditor.getEmail()); + args.push_back(expeditor.getEmail().generate()); args.push_back("--"); for (int i = 0 ; i < recipients.getMailboxCount() ; ++i) - args.push_back(recipients.getMailboxAt(i)->getEmail()); + args.push_back(recipients.getMailboxAt(i)->getEmail().generate()); // Call sendmail try diff --git a/src/net/smtp/SMTPCommand.cpp b/src/net/smtp/SMTPCommand.cpp index f338e248..99a3ac17 100644 --- a/src/net/smtp/SMTPCommand.cpp +++ b/src/net/smtp/SMTPCommand.cpp @@ -32,6 +32,7 @@ #include "vmime/net/socket.hpp" #include "vmime/mailbox.hpp" +#include "vmime/utility/outputStreamAdapter.hpp" namespace vmime { @@ -90,7 +91,12 @@ ref SMTPCommand::MAIL(const mailbox& mbox) { std::ostringstream cmd; cmd.imbue(std::locale::classic()); - cmd << "MAIL FROM:<" << mbox.getEmail() << ">"; + cmd << "MAIL FROM:<"; + + vmime::utility::outputStreamAdapter cmd2(cmd); + mbox.getEmail().generate(cmd2); + + cmd << ">"; return createCommand(cmd.str()); } @@ -101,7 +107,12 @@ ref SMTPCommand::RCPT(const mailbox& mbox) { std::ostringstream cmd; cmd.imbue(std::locale::classic()); - cmd << "RCPT TO:<" << mbox.getEmail() << ">"; + cmd << "RCPT TO:<"; + + vmime::utility::outputStreamAdapter cmd2(cmd); + mbox.getEmail().generate(cmd2); + + cmd << ">"; return createCommand(cmd.str()); } diff --git a/src/parameter.cpp b/src/parameter.cpp index fd39c641..37a59890 100644 --- a/src/parameter.cpp +++ b/src/parameter.cpp @@ -113,18 +113,23 @@ void parameter::setValue(const word& value) } -void parameter::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void parameter::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { m_value->setBuffer(string(buffer.begin() + position, buffer.begin() + end)); - m_value->setCharset(charset(charsets::US_ASCII)); + + if (ctx.getInternationalizedEmailSupport()) + m_value->setCharset(charset(charsets::UTF_8)); + else + m_value->setCharset(charset(charsets::US_ASCII)); if (newPosition) *newPosition = end; } -void parameter::parse(const std::vector & chunks) +void parameter::parse(const parsingContext& ctx, const std::vector & chunks) { bool foundCharsetChunk = false; @@ -236,7 +241,7 @@ void parameter::parse(const std::vector & chunks) // if the data is not encoded, because it can recover // from parsing errors. vmime::text t; - t.parse(chunk.data); + t.parse(ctx, chunk.data); if (t.getWordCount() != 0) { @@ -253,8 +258,9 @@ void parameter::parse(const std::vector & chunks) } -void parameter::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const +void parameter::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { const string& name = m_name; const string& value = m_value->getBuffer(); @@ -276,7 +282,7 @@ void parameter::generateImpl(utility::outputStream& os, const string::size_type string::size_type pos = curLinePos; - if (pos + name.length() + 10 + value.length() > maxLineLength) + if (pos + name.length() + 10 + value.length() > ctx.getMaxLineLength()) { sevenBitStream << NEW_LINE_SEQUENCE; pos = NEW_LINE_SEQUENCE_LENGTH; @@ -287,7 +293,7 @@ void parameter::generateImpl(utility::outputStream& os, const string::size_type string::size_type valueLength = 0; // Use worst-case length name.length()+2 for 'name=' part of line - for (string::size_type i = 0 ; (i < value.length()) && (pos + name.length() + 2 + valueLength < maxLineLength - 4) ; ++i, ++valueLength) + for (string::size_type i = 0 ; (i < value.length()) && (pos + name.length() + 2 + valueLength < ctx.getMaxLineLength() - 4) ; ++i, ++valueLength) { switch (value[i]) { @@ -431,7 +437,7 @@ void parameter::generateImpl(utility::outputStream& os, const string::size_type name.length() + 4 /* *0*= */ + 2 /* '' */ + m_value->getCharset().getName().length(); - if (pos + firstSectionLength + 5 >= maxLineLength) + if (pos + firstSectionLength + 5 >= ctx.getMaxLineLength()) { os << NEW_LINE_SEQUENCE; pos = NEW_LINE_SEQUENCE_LENGTH; @@ -448,7 +454,7 @@ void parameter::generateImpl(utility::outputStream& os, const string::size_type { // Check whether we should start a new line (taking into // account the next character will be encoded = worst case) - if (currentSectionLength + 3 >= maxLineLength) + if (currentSectionLength + 3 >= ctx.getMaxLineLength()) { sectionText.push_back(currentSection); sectionCount++; diff --git a/src/parameterizedHeaderField.cpp b/src/parameterizedHeaderField.cpp index 77d732df..619fe7cf 100644 --- a/src/parameterizedHeaderField.cpp +++ b/src/parameterizedHeaderField.cpp @@ -78,8 +78,9 @@ struct paramInfo #endif // VMIME_BUILDING_DOC -void parameterizedHeaderField::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void parameterizedHeaderField::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { const string::value_type* const pend = buffer.data() + end; const string::value_type* const pstart = buffer.data() + position; @@ -108,7 +109,7 @@ void parameterizedHeaderField::parseImpl(const string& buffer, const string::siz --valueLength; // Parse value - getValue()->parse(buffer, valueStart, valueStart + valueLength); + getValue()->parse(ctx, buffer, valueStart, valueStart + valueLength); // Reset parameters removeAllParameters(); @@ -316,7 +317,7 @@ void parameterizedHeaderField::parseImpl(const string& buffer, const string::siz // Append this parameter to the list ref param = vmime::create ((*it).first); - param->parse(info.value); + param->parse(ctx, info.value); param->setParsedBounds(info.start, info.end); appendParameter(param); @@ -328,13 +329,14 @@ void parameterizedHeaderField::parseImpl(const string& buffer, const string::siz } -void parameterizedHeaderField::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const +void parameterizedHeaderField::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { string::size_type pos = curLinePos; // Parent header field - headerField::generateImpl(os, maxLineLength, pos, &pos); + headerField::generateImpl(ctx, os, pos, &pos); // Parameters for (std::vector >::const_iterator @@ -343,7 +345,7 @@ void parameterizedHeaderField::generateImpl(utility::outputStream& os, const str os << "; "; pos += 2; - (*it)->generate(os, maxLineLength, pos, &pos); + (*it)->generate(ctx, os, pos, &pos); } if (newLinePos) diff --git a/src/options.cpp b/src/parsingContext.cpp similarity index 60% rename from src/options.cpp rename to src/parsingContext.cpp index 9ec6056b..b440ef1e 100644 --- a/src/options.cpp +++ b/src/parsingContext.cpp @@ -21,49 +21,28 @@ // the GNU General Public License cover the whole combination. // -#include "vmime/options.hpp" +#include "vmime/parsingContext.hpp" namespace vmime { -options* options::getInstance() -{ - static options instance; - return (&instance); -} - - -options::multipartOptions::multipartOptions() - : m_prologText("This is a multi-part message in MIME format. Your mail reader " \ - "does not understand MIME message format."), - m_epilogText("") +parsingContext::parsingContext() { } -const string& options::multipartOptions::getPrologText() const +parsingContext::parsingContext(const parsingContext& ctx) + : context() { - return (m_prologText); } -void options::multipartOptions::setPrologText(const string& prologText) +parsingContext& parsingContext::getDefaultContext() { - m_prologText = prologText; -} - - -const string& options::multipartOptions::getEpilogText() const -{ - return (m_epilogText); -} - - -void options::multipartOptions::setEpilogText(const string& epilogText) -{ - m_epilogText = epilogText; + static parsingContext ctx; + return ctx; } diff --git a/src/path.cpp b/src/path.cpp index 6fe3e7aa..3e6e7a84 100644 --- a/src/path.cpp +++ b/src/path.cpp @@ -112,8 +112,9 @@ const std::vector > path::getChildComponents() } -void path::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void path::parseImpl + (const parsingContext& /* ctx */, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { string::size_type pos = position; @@ -165,8 +166,9 @@ void path::parseImpl(const string& buffer, const string::size_type position, } -void path::generateImpl(utility::outputStream& os, const string::size_type /* maxLineLength */, - const string::size_type curLinePos, string::size_type* newLinePos) const +void path::generateImpl + (const generationContext& /* ctx */, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { if (m_localPart.empty() && m_domain.empty()) { diff --git a/src/relay.cpp b/src/relay.cpp index 2262fa7e..90957dbe 100644 --- a/src/relay.cpp +++ b/src/relay.cpp @@ -24,6 +24,7 @@ #include "vmime/relay.hpp" #include "vmime/text.hpp" #include "vmime/parserHelpers.hpp" +#include "vmime/utility/outputStreamAdapter.hpp" #include @@ -57,8 +58,9 @@ relay::relay(const relay& r) ["for" addr-spec] ; initial form */ -void relay::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void relay::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { const string::value_type* const pend = buffer.data() + end; const string::value_type* const pstart = buffer.data() + position; @@ -71,7 +73,7 @@ void relay::parseImpl(const string& buffer, const string::size_type position, if (p >= pstart) { // Parse the date/time part - m_date.parse(buffer, position + (p - pstart) + 1, end); + m_date.parse(ctx, buffer, position + (p - pstart) + 1, end); // Parse the components std::istringstream iss(string @@ -198,8 +200,9 @@ void relay::parseImpl(const string& buffer, const string::size_type position, } -void relay::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const +void relay::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { std::ostringstream oss; int count = 0; @@ -217,9 +220,12 @@ void relay::generateImpl(utility::outputStream& os, const string::size_type maxL if (m_id.length()) oss << (count++ > 0 ? " " : "") << "id " << m_id; if (m_for.length()) oss << (count++ > 0 ? " " : "") << "for " << m_for; - oss << "; " << m_date.generate(); + oss << "; "; - text(oss.str()).encodeAndFold(os, maxLineLength, + vmime::utility::outputStreamAdapter dos(oss); + m_date.generate(ctx, dos, 0, NULL); + + text(oss.str()).encodeAndFold(ctx, os, curLinePos, newLinePos, text::FORCE_NO_ENCODING); } diff --git a/src/text.cpp b/src/text.cpp index 1ba83101..d1ae6075 100644 --- a/src/text.cpp +++ b/src/text.cpp @@ -67,14 +67,15 @@ text::~text() } -void text::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void text::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { removeAllWords(); string::size_type newPos; - const std::vector > words = word::parseMultiple(buffer, position, end, &newPos); + const std::vector > words = word::parseMultiple(ctx, buffer, position, end, &newPos); copy_vector(words, m_words); @@ -85,10 +86,11 @@ void text::parseImpl(const string& buffer, const string::size_type position, } -void text::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type curLinePos, string::size_type* newLinePos) const +void text::generateImpl + (const generationContext& ctx, utility::outputStream& os, + const string::size_type curLinePos, string::size_type* newLinePos) const { - encodeAndFold(os, maxLineLength, curLinePos, newLinePos, 0); + encodeAndFold(ctx, os, curLinePos, newLinePos, 0); } @@ -142,12 +144,12 @@ bool text::operator!=(const text& t) const } -const string text::getConvertedText(const charset& dest) const +const string text::getConvertedText(const charset& dest, const charsetConverterOptions& opts) const { string out; for (std::vector >::const_iterator i = m_words.begin() ; i != m_words.end() ; ++i) - out += (*i)->getConvertedText(dest); + out += (*i)->getConvertedText(dest, opts); return (out); } @@ -348,15 +350,16 @@ void text::createFromString(const string& in, const charset& ch) } -void text::encodeAndFold(utility::outputStream& os, const string::size_type maxLineLength, - const string::size_type firstLineOffset, string::size_type* lastLineLength, const int flags) const +void text::encodeAndFold + (const generationContext& ctx, utility::outputStream& os, + const string::size_type firstLineOffset, string::size_type* lastLineLength, const int flags) const { string::size_type curLineLength = firstLineOffset; word::generatorState state; for (size_t wi = 0 ; wi < getWordCount() ; ++wi) { - getWordAt(wi)->generate(os, maxLineLength, curLineLength, + getWordAt(wi)->generate(ctx, os, curLineLength, &curLineLength, flags, &state); } @@ -369,19 +372,35 @@ ref text::decodeAndUnfold(const string& in) { ref t = vmime::create (); - decodeAndUnfold(in, t.get()); + decodeAndUnfold(parsingContext::getDefaultContext(), in, t.get()); + + return t; +} + + +ref text::decodeAndUnfold(const parsingContext& ctx, const string& in) +{ + ref t = vmime::create (); + + decodeAndUnfold(ctx, in, t.get()); return t; } text* text::decodeAndUnfold(const string& in, text* generateInExisting) +{ + return decodeAndUnfold(parsingContext::getDefaultContext(), in, generateInExisting); +} + + +text* text::decodeAndUnfold(const parsingContext& ctx, const string& in, text* generateInExisting) { text* out = (generateInExisting != NULL) ? generateInExisting : new text(); out->removeAllWords(); - const std::vector > words = word::parseMultiple(in, 0, in.length(), NULL); + const std::vector > words = word::parseMultiple(ctx, in, 0, in.length(), NULL); copy_vector(words, out->m_words); diff --git a/src/utility/stringUtils.cpp b/src/utility/stringUtils.cpp index ad498342..8e5f7205 100644 --- a/src/utility/stringUtils.cpp +++ b/src/utility/stringUtils.cpp @@ -151,6 +151,12 @@ string::size_type stringUtils::countASCIIchars } +bool stringUtils::is7bit(const string& str) +{ + return countASCIIchars(str.begin(), str.end()) == str.length(); +} + + string::size_type stringUtils::findFirstNonASCIIchar (const string::const_iterator begin, const string::const_iterator end) { @@ -205,5 +211,32 @@ const string stringUtils::unquote(const string& str) } +bool stringUtils::needQuoting(const string& str, const string& specialChars) +{ + return str.find_first_of(specialChars.c_str()) != string::npos; +} + + +string stringUtils::quote + (const string& str, const string& escapeSpecialChars, const string& escapeChar) +{ + std::ostringstream oss; + string::size_type lastPos = 0, pos = 0; + + while ((pos = str.find_first_of(escapeSpecialChars, lastPos)) != string::npos) + { + oss << str.substr(lastPos, pos - lastPos) + << escapeChar + << str[pos]; + + lastPos = pos + 1; + } + + oss << str.substr(lastPos); + + return oss.str(); +} + + } // utility } // vmime diff --git a/src/word.cpp b/src/word.cpp index 9ab31087..3be9998e 100644 --- a/src/word.cpp +++ b/src/word.cpp @@ -66,9 +66,10 @@ word::word(const string& buffer, const charset& charset) } -ref word::parseNext(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition, - bool prevIsEncoded, bool* isEncoded, bool isFirst) +ref word::parseNext + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition, + bool prevIsEncoded, bool* isEncoded, bool isFirst) { string::size_type pos = position; @@ -87,6 +88,9 @@ ref word::parseNext(const string& buffer, const string::size_type positio string::size_type startPos = pos; string unencoded; + const charset defaultCharset = ctx.getInternationalizedEmailSupport() + ? charset(charsets::UTF_8) : charset(charsets::US_ASCII); + while (pos < end) { // End of line: does not occur in the middle of an encoded word. This is @@ -124,7 +128,7 @@ ref word::parseNext(const string& buffer, const string::size_type positio if (prevIsEncoded) unencoded = whiteSpaces + unencoded; - ref w = vmime::create (unencoded, charset(charsets::US_ASCII)); + ref w = vmime::create (unencoded, defaultCharset); w->setParsedBounds(position, pos); if (newPosition) @@ -205,7 +209,7 @@ ref word::parseNext(const string& buffer, const string::size_type positio // Treat unencoded text at the end of the buffer if (!unencoded.empty()) { - ref w = vmime::create (unencoded, charset(charsets::US_ASCII)); + ref w = vmime::create (unencoded, defaultCharset); w->setParsedBounds(position, end); if (newPosition) @@ -221,8 +225,9 @@ ref word::parseNext(const string& buffer, const string::size_type positio } -const std::vector > word::parseMultiple(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +const std::vector > word::parseMultiple + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { std::vector > res; ref w; @@ -231,7 +236,7 @@ const std::vector > word::parseMultiple(const string& buffer, const bool prevIsEncoded = false; - while ((w = word::parseNext(buffer, pos, end, &pos, prevIsEncoded, &prevIsEncoded, (w == NULL))) != NULL) + while ((w = word::parseNext(ctx, buffer, pos, end, &pos, prevIsEncoded, &prevIsEncoded, (w == NULL))) != NULL) res.push_back(w); if (newPosition) @@ -241,8 +246,9 @@ const std::vector > word::parseMultiple(const string& buffer, const } -void word::parseImpl(const string& buffer, const string::size_type position, - const string::size_type end, string::size_type* newPosition) +void word::parseImpl + (const parsingContext& ctx, const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) { if (position + 6 < end && // 6 = "=?(.+)?(.*)?=" buffer[position] == '=' && buffer[position + 1] == '?') @@ -315,7 +321,8 @@ void word::parseImpl(const string& buffer, const string::size_type position, // Unknown encoding or malformed encoded word: treat the buffer as ordinary text (RFC-2047, Page 9). m_buffer = string(buffer.begin() + position, buffer.begin() + end); - m_charset = charsets::US_ASCII; + m_charset = ctx.getInternationalizedEmailSupport() + ? charset(charsets::UTF_8) : charset(charsets::US_ASCII); setParsedBounds(position, end); @@ -324,14 +331,14 @@ void word::parseImpl(const string& buffer, const string::size_type position, } -void word::generateImpl(utility::outputStream& os, const string::size_type maxLineLength, +void word::generateImpl(const generationContext& ctx, utility::outputStream& os, const string::size_type curLinePos, string::size_type* newLinePos) const { - generate(os, maxLineLength, curLinePos, newLinePos, 0, NULL); + generate(ctx, os, curLinePos, newLinePos, 0, NULL); } -void word::generate(utility::outputStream& os, const string::size_type maxLineLength, +void word::generate(const generationContext& ctx, utility::outputStream& os, const string::size_type curLinePos, string::size_type* newLinePos, const int flags, generatorState* state) const { @@ -350,17 +357,27 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe else if ((flags & text::FORCE_ENCODING) != 0) encodingNeeded = true; else // auto-detect - encodingNeeded = wordEncoder::isEncodingNeeded(m_buffer, m_charset); + encodingNeeded = wordEncoder::isEncodingNeeded(ctx, m_buffer, m_charset); + // If text does not need to be encoded, quote the buffer (no folding is performed). + if (!encodingNeeded && + (flags & text::QUOTE_IF_NEEDED) && + utility::stringUtils::needQuoting(m_buffer)) + { + const string quoted = utility::stringUtils::quote(m_buffer, "\\\"", "\\"); + + os << '"' << quoted << '"'; + curLineLength += 1 + quoted.length() + 1; + } // If possible and requested (with flag), quote the buffer (no folding is performed). // Quoting is possible if and only if: // - the buffer does not need to be encoded // - the buffer does not contain quoting character (") // - there is enough remaining space on the current line to hold the whole buffer - if (!encodingNeeded && - (flags & text::QUOTE_IF_POSSIBLE) && - m_buffer.find('"') == string::npos && - (curLineLength + 2 /* 2 x " */ + m_buffer.length()) < maxLineLength) + else if (!encodingNeeded && + (flags & text::QUOTE_IF_POSSIBLE) && + m_buffer.find('"') == string::npos && + (curLineLength + 2 /* 2 x " */ + m_buffer.length()) < ctx.getMaxLineLength()) { os << '"' << m_buffer << '"'; curLineLength += 2 + m_buffer.length(); @@ -368,6 +385,19 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe // We will fold lines without encoding them. else if (!encodingNeeded) { + string buffer; + + if (ctx.getInternationalizedEmailSupport()) + { + // Convert the buffer to UTF-8 + charset::convert(m_buffer, buffer, m_charset, charsets::UTF_8); + } + else + { + // Leave the buffer as-is + buffer = m_buffer; + } + // Here, we could have the following conditions: // // * a maximum line length of N bytes @@ -379,7 +409,7 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe string::size_type maxRunLength = 0; string::size_type curRunLength = 0; - for (string::const_iterator p = m_buffer.begin(), end = m_buffer.end() ; p != end ; ++p) + for (string::const_iterator p = buffer.begin(), end = buffer.end() ; p != end ; ++p) { if (parserHelpers::isSpace(*p)) { @@ -394,19 +424,19 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe maxRunLength = std::max(maxRunLength, curRunLength); - if (((flags & text::FORCE_NO_ENCODING) == 0) && maxRunLength >= maxLineLength - 3) + if (((flags & text::FORCE_NO_ENCODING) == 0) && maxRunLength >= ctx.getMaxLineLength() - 3) { // Generate with encoding forced - generate(os, maxLineLength, curLinePos, newLinePos, flags | text::FORCE_ENCODING, state); + generate(ctx, os, curLinePos, newLinePos, flags | text::FORCE_ENCODING, state); return; } // Output runs, and fold line when a whitespace is encountered - string::const_iterator lastWSpos = m_buffer.end(); // last white-space position - string::const_iterator curLineStart = m_buffer.begin(); // current line start + string::const_iterator lastWSpos = buffer.end(); // last white-space position + string::const_iterator curLineStart = buffer.begin(); // current line start - string::const_iterator p = m_buffer.begin(); - const string::const_iterator end = m_buffer.end(); + string::const_iterator p = buffer.begin(); + const string::const_iterator end = buffer.end(); bool finished = false; bool newLine = false; @@ -417,7 +447,7 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe { // Exceeded maximum line length, but we have found a white-space // where we can cut the line... - if (curLineLength >= maxLineLength && lastWSpos != end) + if (curLineLength >= ctx.getMaxLineLength() && lastWSpos != end) break; if (*p == ' ' || *p == '\t') @@ -437,7 +467,7 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe // we write the full line no matter of the max line length... if (!newLine && p != end && lastWSpos == end && - !state->isFirstWord && curLineStart == m_buffer.begin()) + !state->isFirstWord && curLineStart == buffer.begin()) { // Here, we are continuing on the line of previous encoded // word, but there is not even enough space to put the @@ -468,7 +498,7 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe os << string(curLineStart, p); - if (p != m_buffer.begin() && parserHelpers::isSpace(*(p - 1))) + if (p != buffer.begin() && parserHelpers::isSpace(*(p - 1))) state->lastCharIsSpace = true; else state->lastCharIsSpace = false; @@ -563,9 +593,9 @@ void word::generate(utility::outputStream& os, const string::size_type maxLineLe */ const string::size_type maxLineLength3 = - (maxLineLength == lineLengthLimits::infinite) - ? maxLineLength - : std::min(maxLineLength, static_cast (76)); + (ctx.getMaxLineLength() == lineLengthLimits::infinite) + ? ctx.getMaxLineLength() + : std::min(ctx.getMaxLineLength(), static_cast (76)); wordEncoder wordEnc(m_buffer, m_charset); @@ -691,13 +721,13 @@ bool word::operator!=(const word& w) const } -const string word::getConvertedText(const charset& dest) const +const string word::getConvertedText(const charset& dest, const charsetConverterOptions& opts) const { string out; try { - charset::convert(m_buffer, out, m_charset, dest); + charset::convert(m_buffer, out, m_charset, dest, opts); } catch (vmime::exceptions::charset_conv_error& e) { diff --git a/src/wordEncoder.cpp b/src/wordEncoder.cpp index 32e46df1..82a74cff 100644 --- a/src/wordEncoder.cpp +++ b/src/wordEncoder.cpp @@ -168,7 +168,7 @@ const string wordEncoder::getNextChunk(const string::size_type maxLength) // Fully RFC-compliant encoding else { - charsetConverter conv(charsets::UTF_8, m_charset); + ref conv = charsetConverter::create(charsets::UTF_8, m_charset); string::size_type inputCount = 0; string::size_type outputCount = 0; @@ -185,7 +185,7 @@ const string wordEncoder::getNextChunk(const string::size_type maxLength) // Convert back to original encoding string encodeBytes; - conv.convert(inputChar, encodeBytes); + conv->convert(inputChar, encodeBytes); encodeBuffer += encodeBytes; @@ -225,23 +225,31 @@ wordEncoder::Encoding wordEncoder::getEncoding() const // static -bool wordEncoder::isEncodingNeeded(const string& buffer, const charset& charset) +bool wordEncoder::isEncodingNeeded + (const generationContext& ctx, const string& buffer, const charset& charset) { - // Charset-specific encoding - encoding recEncoding; + if (!ctx.getInternationalizedEmailSupport()) + { + // Charset-specific encoding + encoding recEncoding; - if (charset.getRecommendedEncoding(recEncoding)) - return true; + if (charset.getRecommendedEncoding(recEncoding)) + return true; - // No encoding is needed if the buffer only contains ASCII chars - if (utility::stringUtils::findFirstNonASCIIchar(buffer.begin(), buffer.end()) != string::npos) - return true; + // No encoding is needed if the buffer only contains ASCII chars + if (utility::stringUtils::findFirstNonASCIIchar(buffer.begin(), buffer.end()) != string::npos) + return true; + } // Force encoding when there are only ASCII chars, but there is // also at least one of '\n' or '\r' (header fields) if (buffer.find_first_of("\n\r") != string::npos) return true; + // If any RFC-2047 sequence is found in the buffer, encode it + if (buffer.find("=?") != string::npos) + return true; + return false; } diff --git a/tests/parser/charsetTest.cpp b/tests/parser/charsetTest.cpp index f07694ca..eaedb218 100644 --- a/tests/parser/charsetTest.cpp +++ b/tests/parser/charsetTest.cpp @@ -105,6 +105,10 @@ VMIME_TEST_SUITE_BEGIN // Test invalid input VMIME_TEST(testFilterInvalid1) + // IDNA + VMIME_TEST(testEncodeIDNA) + VMIME_TEST(testDecodeIDNA) + // TODO: more tests VMIME_TEST_LIST_END @@ -147,14 +151,18 @@ VMIME_TEST_SUITE_BEGIN vmime::string actualOut; vmime::utility::outputStreamStringAdapter osa(actualOut); - vmime::utility::charsetFilteredOutputStream os - (inputCharset, outputCharset, osa); + + vmime::ref conv = + vmime::charsetConverter::create(inputCharset, outputCharset); + + vmime::ref os = + conv->getFilteredOutputStream(osa); vmime::utility::inputStreamStringAdapter is(in); - vmime::utility::bufferedStreamCopy(is, os); + vmime::utility::bufferedStreamCopy(is, *os); - os.flush(); + os->flush(); VASSERT_EQ("1", toHex(expectedOut), toHex(actualOut)); } @@ -167,17 +175,21 @@ VMIME_TEST_SUITE_BEGIN vmime::string actualOut; vmime::utility::outputStreamStringAdapter osa(actualOut); - vmime::utility::charsetFilteredOutputStream os - (inputCharset, outputCharset, osa); + + vmime::ref conv = + vmime::charsetConverter::create(inputCharset, outputCharset); + + vmime::ref os = + conv->getFilteredOutputStream(osa); vmime::utility::inputStreamStringAdapter is(in); vmime::utility::stream::value_type buffer[16]; for (int i = 0 ; !is.eof() ; ++i) - os.write(buffer, is.read(buffer, 1)); + os->write(buffer, is.read(buffer, 1)); - os.flush(); + os->flush(); VASSERT_EQ("1", toHex(expectedOut), toHex(actualOut)); } @@ -190,17 +202,21 @@ VMIME_TEST_SUITE_BEGIN vmime::string actualOut; vmime::utility::outputStreamStringAdapter osa(actualOut); - vmime::utility::charsetFilteredOutputStream os - (inputCharset, outputCharset, osa); + + vmime::ref conv = + vmime::charsetConverter::create(inputCharset, outputCharset); + + vmime::ref os = + conv->getFilteredOutputStream(osa); vmime::utility::inputStreamStringAdapter is(in); vmime::utility::stream::value_type buffer[16]; for (int i = 0 ; !is.eof() ; ++i) - os.write(buffer, is.read(buffer, (i % 5) + 1)); + os->write(buffer, is.read(buffer, (i % 5) + 1)); - os.flush(); + os->flush(); VASSERT_EQ("1", toHex(expectedOut), toHex(actualOut)); } @@ -212,18 +228,23 @@ VMIME_TEST_SUITE_BEGIN vmime::string actualOut; vmime::utility::outputStreamStringAdapter osa(actualOut); - vmime::utility::charsetFilteredOutputStream os - (vmime::charset("utf-8"), - vmime::charset("iso-8859-1"), osa); + + vmime::ref conv = + vmime::charsetConverter::create + (vmime::charset("utf-8"), + vmime::charset("iso-8859-1")); + + vmime::ref os = + conv->getFilteredOutputStream(osa); vmime::utility::inputStreamStringAdapter is(in); vmime::utility::stream::value_type buffer[16]; for (int i = 0 ; !is.eof() ; ++i) - os.write(buffer, is.read(buffer, 1)); + os->write(buffer, is.read(buffer, 1)); - os.flush(); + os->flush(); VASSERT_EQ("1", toHex(expectedOut), toHex(actualOut)); } @@ -276,5 +297,38 @@ VMIME_TEST_SUITE_BEGIN return res; } + static const vmime::string convertHelper + (const vmime::string& in, const vmime::charset& csrc, const vmime::charset& cdest) + { + vmime::string out; + vmime::charset::convert(in, out, csrc, cdest); + + return out; + } + + void testEncodeIDNA() + { + VASSERT_EQ("1", "xn--espaol-zwa", convertHelper("español", "utf-8", "idna")); + + // Tests from ICANN + VASSERT_EQ("2.1", "xn--hxajbheg2az3al", convertHelper("παράδειγμα", "utf-8", "idna")); + VASSERT_EQ("2.2", "xn--jxalpdlp", convertHelper("δοκιμή", "utf-8", "idna")); + + VASSERT_EQ("3.1", "xn--mgbh0fb", convertHelper("مثال", "utf-8", "idna")); + VASSERT_EQ("3.2", "xn--kgbechtv", convertHelper("إختبار", "utf-8", "idna")); + } + + void testDecodeIDNA() + { + VASSERT_EQ("1", "español", convertHelper("xn--espaol-zwa", "idna", "utf-8")); + + // Tests from ICANN + VASSERT_EQ("2.1", "παράδειγμα", convertHelper("xn--hxajbheg2az3al", "idna", "utf-8")); + VASSERT_EQ("2.2", "δοκιμή", convertHelper("xn--jxalpdlp", "idna", "utf-8")); + + VASSERT_EQ("3.1", "مثال", convertHelper("xn--mgbh0fb", "idna", "utf-8")); + VASSERT_EQ("3.2", "إختبار", convertHelper("xn--kgbechtv", "idna", "utf-8")); + } + VMIME_TEST_SUITE_END diff --git a/tests/parser/emailAddressTest.cpp b/tests/parser/emailAddressTest.cpp new file mode 100644 index 00000000..0185c220 --- /dev/null +++ b/tests/parser/emailAddressTest.cpp @@ -0,0 +1,224 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#include "tests/testUtils.hpp" + +#include "vmime/platform.hpp" + + +#define VMIME_TEST_SUITE emailAddressTest +#define VMIME_TEST_SUITE_MODULE "Parser" + + +VMIME_TEST_SUITE_BEGIN + + VMIME_TEST_LIST_BEGIN + VMIME_TEST(testParseASCII) + VMIME_TEST(testParseEAI) + VMIME_TEST(testParseInvalid) + VMIME_TEST(testGenerateASCII) + VMIME_TEST(testGenerateEAI) + VMIME_TEST(testParseSpecialChars) + VMIME_TEST(testParseCommentInLocalPart) + VMIME_TEST(testParseCommentInDomainPart) + VMIME_TEST(testGenerateSpecialChars) + VMIME_TEST_LIST_END + + + void testParseASCII() + { + vmime::emailAddress eml1("local@domain"); + VASSERT_EQ("1/local", "local", eml1.getLocalName()); + VASSERT_EQ("1/domain", "domain", eml1.getDomainName()); + + // When not specified, domain should be local host name + vmime::emailAddress eml2("local"); + VASSERT_EQ("2/local", "local", eml2.getLocalName()); + VASSERT_EQ("2/domain", vmime::platform::getHandler()->getHostName(), eml2.getDomainName()); + } + + void testParseEAI() + { + // Examples taken from Wikipedia (http://en.wikipedia.org/wiki/Email_address) + + // Latin Alphabet (with diacritics): + vmime::emailAddress eml1("Pelé@example.com"); + VASSERT_EQ("1/local", "Pelé", eml1.getLocalName()); + VASSERT_EQ("1/domain", "example.com", eml1.getDomainName()); + + // Greek Alphabet + vmime::emailAddress eml2("δοκιμή@παράδειγμα.δοκιμή"); + VASSERT_EQ("2/local", "δοκιμή", eml2.getLocalName()); + VASSERT_EQ("2/domain", "παράδειγμα.δοκιμή", eml2.getDomainName()); + + // Japanese Characters + vmime::emailAddress eml3("甲斐@黒川.日本"); + VASSERT_EQ("3/local", "甲斐", eml3.getLocalName()); + VASSERT_EQ("3/domain", "黒川.日本", eml3.getDomainName()); + + // Cyrillic Characters + vmime::emailAddress eml4("чебурашка@ящик-с-апельсинами.рф"); + VASSERT_EQ("4/local", "чебурашка", eml4.getLocalName()); + VASSERT_EQ("4/domain", "ящик-с-апельсинами.рф", eml4.getDomainName()); + } + + void testParseInvalid() + { + // Only one @ is allowed outside quotation marks + vmime::emailAddress eml1("local@part@domain"); + VASSERT_EQ("1/local", "local", eml1.getLocalName()); + VASSERT_EQ("1/domain", "part@domain", eml1.getDomainName()); + + // Quoted strings must be dot separated, or the only element making up + // the local-part: should be parsed correctly, but it still represents + // an invalid email address + vmime::emailAddress eml2("Just\"not\"right@example.com"); + VASSERT_EQ("2/local", "Just\"not\"right", eml2.getLocalName()); + VASSERT_EQ("2/domain", "example.com", eml2.getDomainName()); + + // An @ character must separate the local and domain parts + vmime::emailAddress eml3("Abc.example.com"); + VASSERT_EQ("3/local", "Abc.example.com", eml3.getLocalName()); + VASSERT_EQ("3/domain", vmime::platform::getHandler()->getHostName(), eml3.getDomainName()); + + // Whitespace must be escaped + vmime::emailAddress eml4("local part@domain"); + VASSERT_EQ("4/local", "local", eml4.getLocalName()); + VASSERT_EQ("4/domain", vmime::platform::getHandler()->getHostName(), eml4.getDomainName()); + + vmime::emailAddress eml5("this\\ still\\\"not\\\\allowed@example.com"); + VASSERT_EQ("5/local", "this\\", eml5.getLocalName()); + VASSERT_EQ("5/domain", vmime::platform::getHandler()->getHostName(), eml5.getDomainName()); + } + + void testParseSpecialChars() + { + // Examples taken from Wikipedia (http://en.wikipedia.org/wiki/Email_address) + + vmime::emailAddress eml1("\" \"@example.org"); + VASSERT_EQ("1/local", " ", eml1.getLocalName()); + VASSERT_EQ("1/domain", "example.org", eml1.getDomainName()); + + vmime::emailAddress eml2("\"()<>[]:,;@\\\\\\\"!#$%&'*+-/=?^_`{}| ~.a\"@example.org"); + VASSERT_EQ("2/local", "()<>[]:,;@\\\"!#$%&'*+-/=?^_`{}| ~.a", eml2.getLocalName()); + VASSERT_EQ("3/domain", "example.org", eml2.getDomainName()); + + vmime::emailAddress eml3("!#$%&'*+-/=?^_`{}|~@example.org"); + VASSERT_EQ("3/local", "!#$%&'*+-/=?^_`{}|~", eml3.getLocalName()); + VASSERT_EQ("3/domain", "example.org", eml3.getDomainName()); + + vmime::emailAddress eml4("!#$%&'*+-/=?^_`{}|~@example.org"); + VASSERT_EQ("4/local", "!#$%&'*+-/=?^_`{}|~", eml4.getLocalName()); + VASSERT_EQ("4/domain", "example.org", eml4.getDomainName()); + + vmime::emailAddress eml5("\"very.unusual.@.unusual.com\"@example.com"); + VASSERT_EQ("5/local", "very.unusual.@.unusual.com", eml5.getLocalName()); + VASSERT_EQ("5/domain", "example.com", eml5.getDomainName()); + + vmime::emailAddress eml6("\"very.(),:;<>[]\\\".VERY.\\\"very@\\\\ \\\"very\\\".unusual\"@strange.example.com"); + VASSERT_EQ("6/local", "very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual", eml6.getLocalName()); + VASSERT_EQ("6/domain", "strange.example.com", eml6.getDomainName()); + } + + void testParseCommentInLocalPart() + { + vmime::emailAddress eml1("john.smith(comment)@example.com"); + VASSERT_EQ("1/local", "john.smith", eml1.getLocalName()); + VASSERT_EQ("1/domain", "example.com", eml1.getDomainName()); + + vmime::emailAddress eml2("(comment)john.smith@example.com"); + VASSERT_EQ("2/local", "john.smith", eml2.getLocalName()); + VASSERT_EQ("2/domain", "example.com", eml2.getDomainName()); + + vmime::emailAddress eml3("(comment (comment in comment))john.smith@example.com"); + VASSERT_EQ("3/local", "john.smith", eml3.getLocalName()); + VASSERT_EQ("3/domain", "example.com", eml3.getDomainName()); + + vmime::emailAddress eml4("(comment \\) end comment)john.smith@example.com"); + VASSERT_EQ("4/local", "john.smith", eml4.getLocalName()); + VASSERT_EQ("4/domain", "example.com", eml4.getDomainName()); + } + + void testParseCommentInDomainPart() + { + vmime::emailAddress eml1("john.smith@(comment)example.com"); + VASSERT_EQ("1/local", "john.smith", eml1.getLocalName()); + VASSERT_EQ("1/domain", "example.com", eml1.getDomainName()); + + vmime::emailAddress eml2("john.smith@example.com(comment)"); + VASSERT_EQ("2/local", "john.smith", eml2.getLocalName()); + VASSERT_EQ("2/domain", "example.com", eml2.getDomainName()); + + vmime::emailAddress eml3("john.smith@(comment (comment in comment))example.com"); + VASSERT_EQ("3/local", "john.smith", eml3.getLocalName()); + VASSERT_EQ("3/domain", "example.com", eml3.getDomainName()); + + vmime::emailAddress eml4("john.smith@(comment \\) end comment)example.com"); + VASSERT_EQ("4/local", "john.smith", eml4.getLocalName()); + VASSERT_EQ("4/domain", "example.com", eml4.getDomainName()); + } + + void testGenerateASCII() + { + VASSERT_EQ("email 1", "local@domain", vmime::emailAddress("local", "domain").generate()); + + VASSERT_EQ("email 2", "=?utf-8?Q?Pel=C3=A9?=@example.com", + vmime::emailAddress("Pelé", "example.com").generate()); + VASSERT_EQ("email 3", "=?utf-8?B?55Sy5paQ?=@xn--5rtw95l.xn--wgv71a", + vmime::emailAddress("甲斐", "黒川.日本").generate()); + VASSERT_EQ("email 4", "mailtest@xn--r8jz45g.xn--zckzah", + vmime::emailAddress("mailtest", "例え.テスト").generate()); + VASSERT_EQ("email 5", "mailtest@xn--mgbh0fb.xn--kgbechtv", + vmime::emailAddress("mailtest", "مثال.إختبار").generate()); + } + + void testGenerateEAI() + { + vmime::generationContext ctx(vmime::generationContext::getDefaultContext()); + ctx.setInternationalizedEmailSupport(true); + + vmime::generationContext::switcher contextSwitcher(ctx); + + VASSERT_EQ("email 1", "Pelé@example.com", + vmime::emailAddress("Pelé", "example.com").generate()); + VASSERT_EQ("email 2", "δοκιμή@παράδειγμα.δοκιμή", + vmime::emailAddress("δοκιμή", "παράδειγμα.δοκιμή").generate()); + VASSERT_EQ("email 3", "甲斐@黒川.日本", + vmime::emailAddress("甲斐", "黒川.日本").generate()); + VASSERT_EQ("email 4", "чебурашка@ящик-с-апельсинами.рф", + vmime::emailAddress("чебурашка", "ящик-с-апельсинами.рф").generate()); + } + + void testGenerateSpecialChars() + { + VASSERT_EQ("email 1", "\"very.unusual.@.unusual.com\"@example.com", + vmime::emailAddress("very.unusual.@.unusual.com", "example.com").generate()); + + VASSERT_EQ("email 2", "\"very.(),:;<>[]\\\".VERY.\\\"very@\\\\ \\\"very\\\".unusual\"@strange.example.com", + vmime::emailAddress("very.(),:;<>[]\".VERY.\"very@\\ \"very\".unusual", "strange.example.com").generate()); + + VASSERT_EQ("email 3", "\" \"@example.com", + vmime::emailAddress(" ", "example.com").generate()); + } + +VMIME_TEST_SUITE_END diff --git a/tests/parser/mailboxTest.cpp b/tests/parser/mailboxTest.cpp index 7de7abd3..9e48a235 100644 --- a/tests/parser/mailboxTest.cpp +++ b/tests/parser/mailboxTest.cpp @@ -61,7 +61,7 @@ VMIME_TEST_SUITE_BEGIN "[address-list: [[mailbox: name=[text: []], email=john.doe@acme.com]]]", // Test 5 - "John.Doe (ignore) @acme.com (John Doe)", + "John.Doe(ignore)@acme.com (John Doe)", "[address-list: [[mailbox: name=[text: []], email=John.Doe@acme.com]]]", diff --git a/tests/parser/textTest.cpp b/tests/parser/textTest.cpp index 37c62072..d2047f02 100644 --- a/tests/parser/textTest.cpp +++ b/tests/parser/textTest.cpp @@ -55,6 +55,11 @@ VMIME_TEST_SUITE_BEGIN VMIME_TEST(testForcedNonEncoding) VMIME_TEST(testBugFix20110511) + + VMIME_TEST(testInternationalizedEmail_specialChars) + VMIME_TEST(testInternationalizedEmail_UTF8) + VMIME_TEST(testInternationalizedEmail_nonUTF8) + VMIME_TEST(testInternationalizedEmail_folding) VMIME_TEST_LIST_END @@ -355,19 +360,22 @@ VMIME_TEST_SUITE_BEGIN std::string str; vmime::utility::outputStreamStringAdapter os(str); + vmime::generationContext ctx; + ctx.setMaxLineLength(1000); + // ASCII-only text is quotable str.clear(); - vmime::word("Quoted text").generate(os, 1000, 0, NULL, vmime::text::QUOTE_IF_POSSIBLE, NULL); + vmime::word("Quoted text").generate(ctx, os, 0, NULL, vmime::text::QUOTE_IF_POSSIBLE, NULL); VASSERT_EQ("1", "\"Quoted text\"", cleanGeneratedWords(str)); // Text with CR/LF is not quotable str.clear(); - vmime::word("Non-quotable\ntext", "us-ascii").generate(os, 1000, 0, NULL, vmime::text::QUOTE_IF_POSSIBLE, NULL); + vmime::word("Non-quotable\ntext", "us-ascii").generate(ctx, os, 0, NULL, vmime::text::QUOTE_IF_POSSIBLE, NULL); VASSERT_EQ("2", "=?us-ascii?Q?Non-quotable=0Atext?=", cleanGeneratedWords(str)); // Text with non-ASCII chars is not quotable str.clear(); - vmime::word("Non-quotable text \xc3\xa9").generate(os, 1000, 0, NULL, vmime::text::QUOTE_IF_POSSIBLE, NULL); + vmime::word("Non-quotable text \xc3\xa9").generate(ctx, os, 0, NULL, vmime::text::QUOTE_IF_POSSIBLE, NULL); VASSERT_EQ("3", "=?UTF-8?Q?Non-quotable_text_=C3=A9?=", cleanGeneratedWords(str)); } @@ -493,5 +501,70 @@ VMIME_TEST_SUITE_BEGIN VASSERT_EQ("decode2", DECODED_TEXT, t.getWholeBuffer()); } + void testInternationalizedEmail_specialChars() + { + vmime::generationContext ctx(vmime::generationContext::getDefaultContext()); + ctx.setInternationalizedEmailSupport(true); + + vmime::generationContext::switcher contextSwitcher(ctx); + + // Special sequence/chars should still be encoded + VASSERT_EQ("1", "=?us-ascii?Q?Test=3D=3Frfc2047_sequence?=", + vmime::word("Test=?rfc2047 sequence", vmime::charset("us-ascii")).generate()); + + VASSERT_EQ("2", "=?us-ascii?Q?Line_One=0ALine_Two?=", + vmime::word("Line One\nLine Two", vmime::charset("us-ascii")).generate()); + } + + void testInternationalizedEmail_UTF8() + { + vmime::generationContext ctx(vmime::generationContext::getDefaultContext()); + ctx.setInternationalizedEmailSupport(true); + + vmime::generationContext::switcher contextSwitcher(ctx); + + // Already UTF-8 encoded text should be left as is + VASSERT_EQ("1", "Achim Br\xc3\xa4ndt", + vmime::word("Achim Br\xc3\xa4ndt", vmime::charset("utf-8")).generate()); + } + + void testInternationalizedEmail_nonUTF8() + { + vmime::generationContext ctx(vmime::generationContext::getDefaultContext()); + ctx.setInternationalizedEmailSupport(true); + + vmime::generationContext::switcher contextSwitcher(ctx); + + // Non UTF-8 encoded text should first be converted to UTF-8 + VASSERT_EQ("1", "Achim Br\xc3\xa4ndt", + vmime::word("Achim Br\xe4ndt", vmime::charset("iso-8859-1")).generate()); + } + + void testInternationalizedEmail_folding() + { + vmime::generationContext ctx(vmime::generationContext::getDefaultContext()); + ctx.setInternationalizedEmailSupport(true); + + vmime::generationContext::switcher contextSwitcher(ctx); + + // RFC-2047 encoding must be performed, as line folding is needed + vmime::word w1("01234567890123456789\xc3\xa0x012345678901234567890123456789" + "01234567890123456789\xc3\xa0x012345678901234567890123456789", vmime::charset("utf-8")); + + VASSERT_EQ("1", + "=?utf-8?Q?01234567890123456789=C3=A0x01234567890?=\r\n" + " =?utf-8?Q?1234567890123456789012345678901234567?=\r\n" + " =?utf-8?Q?89=C3=A0x0123456789012345678901234567?=\r\n" + " =?utf-8?Q?89?=", w1.generate(50)); + + // RFC-2047 encoding will not be forced, as words can be wrapped in a new line + vmime::word w2("bla bla bla This is some '\xc3\xa0\xc3\xa7' UTF-8 encoded text", vmime::charset("utf-8")); + + VASSERT_EQ("2", + "bla bla bla This is\r\n" + " some '\xc3\xa0\xc3\xa7' UTF-8\r\n" + " encoded text", w2.generate(20)); + } + VMIME_TEST_SUITE_END diff --git a/tests/testUtils.hpp b/tests/testUtils.hpp index 55c0424e..39d9e555 100644 --- a/tests/testUtils.hpp +++ b/tests/testUtils.hpp @@ -107,6 +107,13 @@ inline std::ostream& operator<<(std::ostream& os, const vmime::charset& ch) } +inline std::ostream& operator<<(std::ostream& os, const vmime::word& w) +{ + os << "[word: charset=" << w.getCharset().getName() << ", buffer=" << w.getBuffer() << "]"; + return (os); +} + + inline std::ostream& operator<<(std::ostream& os, const vmime::text& txt) { os << "[text: ["; @@ -118,7 +125,7 @@ inline std::ostream& operator<<(std::ostream& os, const vmime::text& txt) if (i != 0) os << ","; - os << "[word: charset=" << w.getCharset().getName() << ", buffer=" << w.getBuffer() << "]"; + os << w; } os << "]]"; @@ -127,6 +134,14 @@ inline std::ostream& operator<<(std::ostream& os, const vmime::text& txt) } +inline std::ostream& operator<<(std::ostream& os, const vmime::emailAddress& email) +{ + os << email.generate(); + + return (os); +} + + inline std::ostream& operator<<(std::ostream& os, const vmime::mailbox& mbox) { os << "[mailbox: name=" << mbox.getName() << ", email=" << mbox.getEmail() << "]"; diff --git a/vmime/address.hpp b/vmime/address.hpp index 8a00c04c..0faf876f 100644 --- a/vmime/address.hpp +++ b/vmime/address.hpp @@ -65,13 +65,17 @@ public: /** Parse an address from an input buffer. * + * @param ctx parsing context * @param buffer input buffer * @param position position in the input buffer * @param end end position in the input buffer * @param newPosition will receive the new position in the input buffer * @return a new address object, or null if no more address is available in the input buffer */ - static ref
parseNext(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition); + static ref
parseNext + (const parsingContext& ctx, const string& buffer, + const string::size_type position, const string::size_type end, + string::size_type* newPosition); }; diff --git a/vmime/addressList.hpp b/vmime/addressList.hpp index b4694724..b7bb40cb 100644 --- a/vmime/addressList.hpp +++ b/vmime/addressList.hpp @@ -167,14 +167,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/body.hpp b/vmime/body.hpp index 4d563ebd..3ba140c8 100644 --- a/vmime/body.hpp +++ b/vmime/body.hpp @@ -303,14 +303,15 @@ protected: // Component parsing & assembling void parseImpl - (ref parser, + (const parsingContext& ctx, + ref parser, const utility::stream::size_type position, const utility::stream::size_type end, utility::stream::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/bodyPart.hpp b/vmime/bodyPart.hpp index 6b96e923..5972cea3 100644 --- a/vmime/bodyPart.hpp +++ b/vmime/bodyPart.hpp @@ -114,14 +114,15 @@ protected: // Component parsing & assembling void parseImpl - (ref parser, + (const parsingContext& ctx, + ref parser, const utility::stream::size_type position, const utility::stream::size_type end, utility::stream::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/charset.hpp b/vmime/charset.hpp index 24f3f79c..e2de7ebb 100644 --- a/vmime/charset.hpp +++ b/vmime/charset.hpp @@ -28,6 +28,7 @@ #include "vmime/base.hpp" #include "vmime/utility/inputStream.hpp" #include "vmime/utility/outputStream.hpp" +#include "vmime/charsetConverterOptions.hpp" #include "vmime/component.hpp" @@ -93,10 +94,13 @@ public: * @param out output buffer * @param source input charset * @param dest output charset + * @param opts conversion options * @throws exceptions::charset_conv_error if an error occured during * the conversion */ - static void convert(const string& in, string& out, const charset& source, const charset& dest); + static void convert(const string& in, string& out, + const charset& source, const charset& dest, + const charsetConverterOptions& opts = charsetConverterOptions()); /** Convert the contents of an input stream in a specified charset * to another charset and write the result to an output stream. @@ -105,10 +109,13 @@ public: * @param out output stream to write the converted data * @param source input charset * @param dest output charset + * @param opts conversion options * @throws exceptions::charset_conv_error if an error occured during * the conversion */ - static void convert(utility::inputStream& in, utility::outputStream& out, const charset& source, const charset& dest); + static void convert(utility::inputStream& in, utility::outputStream& out, + const charset& source, const charset& dest, + const charsetConverterOptions& opts = charsetConverterOptions()); ref clone() const; void copyFrom(const component& other); @@ -121,14 +128,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/charsetConverter.hpp b/vmime/charsetConverter.hpp index a76cc69d..b7292d66 100644 --- a/vmime/charsetConverter.hpp +++ b/vmime/charsetConverter.hpp @@ -29,6 +29,7 @@ #include "vmime/component.hpp" #include "vmime/charset.hpp" +#include "vmime/charsetConverterOptions.hpp" #include "vmime/utility/filteredStream.hpp" @@ -36,6 +37,25 @@ namespace vmime { +namespace utility +{ + + +/** A filtered output stream which applies a charset conversion + * to input bytes. + * + * May throw a exceptions::charset_conv_error if an error + * occured when initializing convert, or during charset conversion. + */ + +class charsetFilteredOutputStream : public filteredOutputStream +{ +}; + + +} // utility + + /** Convert between charsets. */ @@ -43,14 +63,15 @@ class charsetConverter : public object { public: - /** Construct and initialize a charset converter. + /** Construct and initialize an iconv charset converter. * * @param source input charset * @param dest output charset + * @param opts conversion options */ - charsetConverter(const charset& source, const charset& dest); - - ~charsetConverter(); + static ref create + (const charset& source, const charset& dest, + const charsetConverterOptions& opts = charsetConverterOptions()); /** Convert a string buffer from one charset to another * charset (in-memory conversion) @@ -63,7 +84,7 @@ public: * @throws exceptions::charset_conv_error if an error occured during * the conversion */ - void convert(const string& in, string& out); + virtual void convert(const string& in, string& out) = 0; /** Convert the contents of an input stream in a specified charset * to another charset and write the result to an output stream. @@ -73,78 +94,20 @@ public: * @throws exceptions::charset_conv_error if an error occured during * the conversion */ - void convert(utility::inputStream& in, utility::outputStream& out); + virtual void convert(utility::inputStream& in, utility::outputStream& out) = 0; -private: - - void* m_desc; - - charset m_source; - charset m_dest; -}; - - -namespace utility { - - -/** A filtered output stream which applies a charset conversion - * to input bytes. - * - * May throw a exceptions::charset_conv_error if an error - * occured when initializing convert, or during charset conversion. - */ - -class charsetFilteredOutputStream : public filteredOutputStream -{ -public: - - /** Construct a new filter for the specified output stream. + /** Returns a filtered output stream which applies a charset + * conversion to input bytes. Please note that it may not be + * supported by the converter. * - * @param source input charset - * @param dest output charset - * @param os stream into which write filtered data + * @param os stream into which filtered data will be written + * @return a filtered output stream, or NULL if not supported */ - charsetFilteredOutputStream - (const charset& source, const charset& dest, outputStream& os); - - ~charsetFilteredOutputStream(); - - - outputStream& getNextOutputStream(); - - void write(const value_type* const data, const size_type count); - void flush(); - -private: - - // Maximum character width in any charset - enum { MAX_CHARACTER_WIDTH = 128 }; - - - void* m_desc; - - const charset m_sourceCharset; - const charset m_destCharset; - - outputStream& m_stream; - - // Buffer in which unconverted bytes are left until they can - // be converted (when more data arrives). The length should be - // large enough to contain any character in any charset. - value_type m_unconvBuffer[MAX_CHARACTER_WIDTH]; - size_type m_unconvCount; - - // Buffer used for conversion. Avoids declaring it in write(). - // Should be at least MAX_CHARACTER_WIDTH * MAX_CHARACTER_WIDTH. - value_type m_outputBuffer[32768]; + virtual ref getFilteredOutputStream(utility::outputStream& os) = 0; }; -} // utility - - } // vmime #endif // VMIME_CHARSETCONVERTER_HPP_INCLUDED - diff --git a/vmime/options.hpp b/vmime/charsetConverterOptions.hpp similarity index 53% rename from vmime/options.hpp rename to vmime/charsetConverterOptions.hpp index 1413b1a3..ee75c1cb 100644 --- a/vmime/options.hpp +++ b/vmime/charsetConverterOptions.hpp @@ -21,8 +21,8 @@ // the GNU General Public License cover the whole combination. // -#ifndef VMIME_OPTIONS_HPP_INCLUDED -#define VMIME_OPTIONS_HPP_INCLUDED +#ifndef VMIME_CHARSETCONVERTEROPTIONS_HPP_INCLUDED +#define VMIME_CHARSETCONVERTEROPTIONS_HPP_INCLUDED #include "vmime/base.hpp" @@ -32,66 +32,22 @@ namespace vmime { -/** A class to set global options for VMime. +/** Options for charset conversion. */ -class options +class charsetConverterOptions : public object { -protected: - - /** Message-related options. - */ - class messageOptions - { - protected: - - friend class options; - - messageOptions() - : m_maxLineLength(lineLengthLimits::convenient) - { - } - - string::size_type m_maxLineLength; - - public: - - const string::size_type& maxLineLength() const { return (m_maxLineLength); } - string::size_type& maxLineLength() { return (m_maxLineLength); } - }; - - /** Multipart-related options. - */ - class multipartOptions - { - private: - - friend class options; - - multipartOptions(); - - string m_prologText; - string m_epilogText; - - public: - - const string& getPrologText() const; - void setPrologText(const string& prologText); - - const string& getEpilogText() const; - void setEpilogText(const string& epilogText); - }; - public: - static options* getInstance(); + charsetConverterOptions(); - multipartOptions multipart; - messageOptions message; + + /** Replace invalid sequences with this string. */ + string invalidSequence; }; } // vmime -#endif // VMIME_OPTIONS_HPP_INCLUDED +#endif // VMIME_CHARSETCONVERTEROPTIONS_HPP_INCLUDED diff --git a/vmime/charsetConverter_iconv.hpp b/vmime/charsetConverter_iconv.hpp new file mode 100644 index 00000000..77a6651c --- /dev/null +++ b/vmime/charsetConverter_iconv.hpp @@ -0,0 +1,124 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#ifndef VMIME_CHARSETCONVERTER_ICONV_HPP_INCLUDED +#define VMIME_CHARSETCONVERTER_ICONV_HPP_INCLUDED + + +#include "vmime/charsetConverter.hpp" + + +namespace vmime +{ + + +/** A generic charset converter which uses iconv library. + */ + +class charsetConverter_iconv : public charsetConverter +{ +public: + + /** Construct and initialize an iconv charset converter. + * + * @param source input charset + * @param dest output charset + * @param opts conversion options + */ + charsetConverter_iconv(const charset& source, const charset& dest, + const charsetConverterOptions& opts = charsetConverterOptions()); + + ~charsetConverter_iconv(); + + void convert(const string& in, string& out); + void convert(utility::inputStream& in, utility::outputStream& out); + + ref getFilteredOutputStream(utility::outputStream& os); + +private: + + void* m_desc; + + charset m_source; + charset m_dest; + + charsetConverterOptions m_options; +}; + + +namespace utility { + + +class charsetFilteredOutputStream_iconv : public charsetFilteredOutputStream +{ +public: + + /** Construct a new filter for the specified output stream. + * + * @param source input charset + * @param dest output charset + * @param os stream into which write filtered data + */ + charsetFilteredOutputStream_iconv + (const charset& source, const charset& dest, outputStream* os); + + ~charsetFilteredOutputStream_iconv(); + + + outputStream& getNextOutputStream(); + + void write(const value_type* const data, const size_type count); + void flush(); + +private: + + // Maximum character width in any charset + enum { MAX_CHARACTER_WIDTH = 128 }; + + + void* m_desc; + + const charset m_sourceCharset; + const charset m_destCharset; + + outputStream& m_stream; + + // Buffer in which unconverted bytes are left until they can + // be converted (when more data arrives). The length should be + // large enough to contain any character in any charset. + value_type m_unconvBuffer[MAX_CHARACTER_WIDTH]; + size_type m_unconvCount; + + // Buffer used for conversion. Avoids declaring it in write(). + // Should be at least MAX_CHARACTER_WIDTH * MAX_CHARACTER_WIDTH. + value_type m_outputBuffer[32768]; +}; + + +} // utility + + +} // vmime + + +#endif // VMIME_CHARSETCONVERTER_ICONV_HPP_INCLUDED diff --git a/vmime/charsetConverter_idna.hpp b/vmime/charsetConverter_idna.hpp new file mode 100644 index 00000000..d3b8b25f --- /dev/null +++ b/vmime/charsetConverter_idna.hpp @@ -0,0 +1,70 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#ifndef VMIME_CHARSETCONVERTER_IDNA_HPP_INCLUDED +#define VMIME_CHARSETCONVERTER_IDNA_HPP_INCLUDED + + +#include "vmime/charsetConverter.hpp" + + +namespace vmime +{ + + +/** A charset converter which can convert to and from Punycode (for IDNA). + */ + +class charsetConverter_idna : public charsetConverter +{ +public: + + /** Construct and initialize an IDNA charset converter. + * + * @param source input charset + * @param dest output charset + * @param opts conversion options + */ + charsetConverter_idna(const charset& source, const charset& dest, + const charsetConverterOptions& opts = charsetConverterOptions()); + + ~charsetConverter_idna(); + + void convert(const string& in, string& out); + void convert(utility::inputStream& in, utility::outputStream& out); + + ref getFilteredOutputStream(utility::outputStream& os); + +private: + + charset m_source; + charset m_dest; + + charsetConverterOptions m_options; +}; + + +} // vmime + + +#endif // VMIME_CHARSETCONVERTER_IDNA_HPP_INCLUDED diff --git a/vmime/component.hpp b/vmime/component.hpp index f12a2b6a..3dca5f66 100644 --- a/vmime/component.hpp +++ b/vmime/component.hpp @@ -30,6 +30,8 @@ #include "vmime/utility/seekableInputStream.hpp" #include "vmime/utility/parserInputStreamAdapter.hpp" #include "vmime/utility/outputStream.hpp" +#include "vmime/generationContext.hpp" +#include "vmime/parsingContext.hpp" namespace vmime @@ -47,19 +49,31 @@ public: component(); virtual ~component(); - /** Parse RFC-822/MIME data for this component. + /** Parse RFC-822/MIME data for this component, using the default + * parsing context. * * @param buffer input buffer */ void parse(const string& buffer); + /** Parse RFC-822/MIME data for this component. + * + * @param ctx parsing context + * @param buffer input buffer + */ + void parse(const parsingContext& ctx, const string& buffer); + /** Parse RFC-822/MIME data for this component. If stream is not seekable, * or if length is not specified, entire contents of the stream will * be loaded into memory before parsing. + * + * @param inputStream stream from which to read data + * @param length data length, in bytes (0 = unknown/not specified) */ void parse(ref inputStream, const utility::stream::size_type length); - /** Parse RFC-822/MIME data for this component. + /** Parse RFC-822/MIME data for this component, using the default + * parsing context. * * @param buffer input buffer * @param position current position in the input buffer @@ -72,9 +86,25 @@ public: const string::size_type end, string::size_type* newPosition = NULL); + /** Parse RFC-822/MIME data for this component. + * + * @param ctx parsing context + * @param buffer input buffer + * @param position current position in the input buffer + * @param end end position in the input buffer + * @param newPosition will receive the new position in the input buffer + */ + void parse + (const parsingContext& ctx, + const string& buffer, + const string::size_type position, + const string::size_type end, + string::size_type* newPosition = NULL); + /** Parse RFC-822/MIME data for this component. If stream is not seekable, * or if end position is not specified, entire contents of the stream will - * be loaded into memory before parsing. + * be loaded into memory before parsing. The default parsing context + * will be used. * * @param inputStream stream from which to read data * @param position current position in the input stream @@ -87,6 +117,23 @@ public: const utility::stream::size_type end, utility::stream::size_type* newPosition = NULL); + /** Parse RFC-822/MIME data for this component. If stream is not seekable, + * or if end position is not specified, entire contents of the stream will + * be loaded into memory before parsing. + * + * @param ctx parsing context + * @param inputStream stream from which to read data + * @param position current position in the input stream + * @param end end position in the input stream + * @param newPosition will receive the new position in the input stream + */ + void parse + (const parsingContext& ctx, + ref inputStream, + const utility::stream::size_type position, + const utility::stream::size_type end, + utility::stream::size_type* newPosition = NULL); + /** Generate RFC-2822/MIME data for this component. * * \deprecated Use the new generate() method, which takes an outputStream parameter. @@ -99,7 +146,7 @@ public: (const string::size_type maxLineLength = lineLengthLimits::infinite, const string::size_type curLinePos = 0) const; - /** Generate RFC-2822/MIME data for this component. + /** Generate RFC-2822/MIME data for this component, using the default generation context. * * @param outputStream output stream * @param maxLineLength maximum line length for output @@ -108,20 +155,20 @@ public: */ virtual void generate (utility::outputStream& outputStream, - const string::size_type maxLineLength = lineLengthLimits::infinite, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; - /** Generate RFC-2822/MIME data for this component. + /** Generate RFC-2822/MIME data for this component, using the default generation context. * + * @param ctx generation context * @param outputStream output stream * @param maxLineLength maximum line length for output * @param curLinePos length of the current line in the output buffer * @param newLinePos will receive the new line position (length of the last line written) */ virtual void generate - (ref outputStream, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& outputStream, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; @@ -168,20 +215,22 @@ protected: // AT LEAST ONE of these parseImpl() functions MUST be implemented in derived class virtual void parseImpl - (ref parser, + (const parsingContext& ctx, + ref parser, const utility::stream::size_type position, const utility::stream::size_type end, utility::stream::size_type* newPosition = NULL); virtual void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); virtual void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const = 0; diff --git a/vmime/constants.hpp b/vmime/constants.hpp index b5a1a0d4..59c89f0e 100644 --- a/vmime/constants.hpp +++ b/vmime/constants.hpp @@ -164,6 +164,8 @@ namespace vmime extern const string::value_type* const WINDOWS_1256; extern const string::value_type* const WINDOWS_1257; extern const string::value_type* const WINDOWS_1258; + + extern const string::value_type* const IDNA; } /** Constants for standard field names. */ diff --git a/vmime/contentDisposition.hpp b/vmime/contentDisposition.hpp index e876edef..69c0f313 100644 --- a/vmime/contentDisposition.hpp +++ b/vmime/contentDisposition.hpp @@ -79,14 +79,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/context.hpp b/vmime/context.hpp new file mode 100644 index 00000000..76cc0ee4 --- /dev/null +++ b/vmime/context.hpp @@ -0,0 +1,122 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#ifndef VMIME_CONTEXT_HPP_INCLUDED +#define VMIME_CONTEXT_HPP_INCLUDED + + +#include "vmime/base.hpp" +#include "vmime/charsetConverterOptions.hpp" + + +namespace vmime +{ + + +/** Holds configuration parameters used either for parsing or generating messages. + */ + +class context : public object +{ +public: + + virtual ~context(); + + /** Returns whether support for Internationalized Email Headers (RFC-6532) + * is enabled. + * + * @return true if RFC-6532 support is enabled, false otherwise + */ + bool getInternationalizedEmailSupport() const; + + /** Enables or disables support for Internationalized Email Headers (RFC-6532). + * This is disabled by default, and should be used only with servers + * which support it (eg. SMTP servers with SMTPUTF8 extension). + * + * @param support true if RFC-6532 support is enabled, false otherwise + */ + void setInternationalizedEmailSupport(const bool support); + + /** Returns options used currently for charset conversions by the parser and/or + * the generator. See charsetConverterOptions class for more information. + * + * @return current charset conversion options + */ + const charsetConverterOptions& getCharsetConversionOptions() const; + + /** Sets the options used currently for charset conversions by the parser and/or + * the generator. See charsetConverterOptions class for more information. + * + * @param opts new charset conversion options + */ + void setCharsetConversionOptions(const charsetConverterOptions& opts); + + /** Switches between contexts temporarily. + */ + template + class switcher + { + public: + + /** Switches to the specified context. + * Default context will temporarily use the data of the specified + * new context during the lifetime of this object. + * + * @param newCtx new context + */ + switcher(CTX_CLASS& newCtx) + : m_oldCtxData(CTX_CLASS::getDefaultContext()), m_newCtx(&newCtx) + { + CTX_CLASS::getDefaultContext().copyFrom(newCtx); + } + + /** Restores back saved context. + */ + ~switcher() + { + CTX_CLASS::getDefaultContext().copyFrom(m_oldCtxData); + } + + private: + + CTX_CLASS m_oldCtxData; + CTX_CLASS* m_newCtx; + }; + +protected: + + context(); + context(const context& ctx); + + virtual context& operator=(const context& ctx); + virtual void copyFrom(const context& ctx); + + bool m_internationalizedEmail; + charsetConverterOptions m_charsetConvOptions; +}; + + +} // vmime + + +#endif // VMIME_CONTEXT_HPP_INCLUDED diff --git a/vmime/dateTime.hpp b/vmime/dateTime.hpp index 0c1d3971..b7f5d273 100644 --- a/vmime/dateTime.hpp +++ b/vmime/dateTime.hpp @@ -243,14 +243,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/disposition.hpp b/vmime/disposition.hpp index 66dd8933..1c14f532 100644 --- a/vmime/disposition.hpp +++ b/vmime/disposition.hpp @@ -138,14 +138,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/emailAddress.hpp b/vmime/emailAddress.hpp new file mode 100644 index 00000000..e9512d98 --- /dev/null +++ b/vmime/emailAddress.hpp @@ -0,0 +1,121 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#ifndef VMIME_EMAILADDRESS_HPP_INCLUDED +#define VMIME_EMAILADDRESS_HPP_INCLUDED + + +#include "vmime/component.hpp" +#include "vmime/text.hpp" + + +namespace vmime +{ + + +/** An email address: local name and domain name (basic type). + */ + +class emailAddress : public component +{ +public: + + emailAddress(); + emailAddress(const emailAddress& eml); + emailAddress(const string& email); + emailAddress(const char* email); + emailAddress(const string& localName, const string& domainName); + emailAddress(const word& localName, const word& domainName); + + /** Return the local name of the address. + * + * @return local name of the address + */ + const word& getLocalName() const; + + /** Set the local name of the address. + * + * @param name local name of the address + */ + void setLocalName(const word& localName); + + /** Return the domain name of the address. + * + * @return domain name of the address + */ + const word& getDomainName() const; + + /** Set the domain name of the address. + * + * @param domain domain name of the address + */ + void setDomainName(const word& domainName); + + /** Returns whether this email address is empty. + * Address is considered as empty if the local part is not specified. + * + * @return true if the address is empty, false otherwise + */ + bool isEmpty() const; + + // Comparison + bool operator==(const class emailAddress& eml) const; + bool operator!=(const class emailAddress& eml) const; + + // Assignment + void copyFrom(const component& other); + ref clone() const; + emailAddress& operator=(const emailAddress& other); + + const std::vector > getChildComponents(); + +protected: + + word m_localName; + word m_domainName; + +public: + + using component::parse; + using component::generate; + + // Component parsing & assembling + void parseImpl + (const parsingContext& ctx, + const string& buffer, + const string::size_type position, + const string::size_type end, + string::size_type* newPosition = NULL); + + void generateImpl + (const generationContext& ctx, + utility::outputStream& os, + const string::size_type curLinePos = 0, + string::size_type* newLinePos = NULL) const; +}; + + +} // vmime + + +#endif // VMIME_EMAILADDRESS_HPP_INCLUDED diff --git a/vmime/encoding.hpp b/vmime/encoding.hpp index e1d91bb5..4266856f 100644 --- a/vmime/encoding.hpp +++ b/vmime/encoding.hpp @@ -152,14 +152,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/generationContext.hpp b/vmime/generationContext.hpp new file mode 100644 index 00000000..cf7e7499 --- /dev/null +++ b/vmime/generationContext.hpp @@ -0,0 +1,106 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#ifndef VMIME_GENERATIONCONTEXT_HPP_INCLUDED +#define VMIME_GENERATIONCONTEXT_HPP_INCLUDED + + +#include "vmime/context.hpp" + + +namespace vmime +{ + + +/** Holds configuration parameters used for generating messages. + */ + +class generationContext : public context +{ +public: + + generationContext(); + generationContext(const generationContext& ctx); + + /** Returns the current maximum line length used when generating messages. + * + * @return current maximum line length, in bytes + */ + string::size_type getMaxLineLength() const; + + /** Sets the maximum line length used when generating messages. + * You may use the constants lineLengthLimits::convenient, + * lineLengthLimits::max and lineLengthLimits::infinite. + * + * @param maxLineLength new maximum line length, in bytes + */ + void setMaxLineLength(const string::size_type maxLineLength); + + /** Returns the current prolog text used when generating MIME body parts. + * + * @return current MIME prolog text + */ + const string getPrologText() const; + + /** Sets the prolog text used when generating MIME body parts. This text + * appears before the part, and should be displayed by MUAs which do not + * support MIME. This should be 7-bit ASCII text only. + * + * @param prologText MIME prolog text + */ + void setPrologText(const string& prologText); + + /** Returns the current epilog text used when generating MIME body parts. + * + * @return current MIME epilog text + */ + const string getEpilogText() const; + + /** Sets the epilog text used when generating MIME body parts. This test + * appears after the part, and should be displayed by MUAs which do not + * support MIME. This should be 7-bit ASCII text only. + */ + void setEpilogText(const string& epilogText); + + /** Returns the default context used for generating messages. + * + * @return a reference to the default generation context + */ + static generationContext& getDefaultContext(); + + generationContext& operator=(const generationContext& ctx); + void copyFrom(const generationContext& ctx); + +protected: + + string::size_type m_maxLineLength; + + string m_prologText; + string m_epilogText; +}; + + +} // vmime + + +#endif // VMIME_GENERATIONCONTEXT_HPP_INCLUDED diff --git a/vmime/header.hpp b/vmime/header.hpp index 6f5bf6d7..cb44c41d 100644 --- a/vmime/header.hpp +++ b/vmime/header.hpp @@ -263,14 +263,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/headerField.hpp b/vmime/headerField.hpp index cfc1252e..857ee051 100644 --- a/vmime/headerField.hpp +++ b/vmime/headerField.hpp @@ -121,20 +121,22 @@ public: protected: void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; static ref parseNext - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); diff --git a/vmime/mailbox.hpp b/vmime/mailbox.hpp index 06305f4e..ec071db3 100644 --- a/vmime/mailbox.hpp +++ b/vmime/mailbox.hpp @@ -25,6 +25,7 @@ #define VMIME_MAILBOX_HPP_INCLUDED +#include "vmime/emailAddress.hpp" #include "vmime/address.hpp" #include "vmime/text.hpp" @@ -45,8 +46,8 @@ public: mailbox(); mailbox(const mailbox& mbox); - mailbox(const string& email); - mailbox(const text& name, const string& email); + mailbox(const emailAddress& email); + mailbox(const text& name, const emailAddress& email); /** Return the full name of the mailbox (empty if not specified). * @@ -64,13 +65,13 @@ public: * * @return email of the mailbox */ - const string& getEmail() const; + const emailAddress& getEmail() const; /** Set the email of the mailbox. * * @param email email of the mailbox */ - void setEmail(const string& email); + void setEmail(const emailAddress& email); // Comparison bool operator==(const class mailbox& mailbox) const; @@ -93,7 +94,7 @@ public: protected: text m_name; - string m_email; + emailAddress m_email; public: @@ -102,14 +103,15 @@ public: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/mailboxField.hpp b/vmime/mailboxField.hpp index ca281415..55368643 100644 --- a/vmime/mailboxField.hpp +++ b/vmime/mailboxField.hpp @@ -52,7 +52,9 @@ protected: public: - void parse(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); + void parse(const parsingContext& ctx, const string& buffer, + const string::size_type position, const string::size_type end, + string::size_type* newPosition = NULL); }; diff --git a/vmime/mailboxGroup.hpp b/vmime/mailboxGroup.hpp index 09567798..509e06b7 100644 --- a/vmime/mailboxGroup.hpp +++ b/vmime/mailboxGroup.hpp @@ -169,14 +169,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/mailboxList.hpp b/vmime/mailboxList.hpp index 987d6a8f..d08cf2bb 100644 --- a/vmime/mailboxList.hpp +++ b/vmime/mailboxList.hpp @@ -159,14 +159,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/mediaType.hpp b/vmime/mediaType.hpp index b929f0db..866810fa 100644 --- a/vmime/mediaType.hpp +++ b/vmime/mediaType.hpp @@ -99,14 +99,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/message.hpp b/vmime/message.hpp index 3c70e52d..ebb767e7 100644 --- a/vmime/message.hpp +++ b/vmime/message.hpp @@ -26,7 +26,7 @@ #include "vmime/bodyPart.hpp" -#include "vmime/options.hpp" +#include "vmime/generationContext.hpp" namespace vmime @@ -42,26 +42,16 @@ public: message(); - public: + using bodyPart::parse; + using bodyPart::generate; + // Override default generate() functions so that we can change // the default 'maxLineLength' value - void generate - (utility::outputStream& os, - const string::size_type maxLineLength = options::getInstance()->message.maxLineLength(), - const string::size_type curLinePos = 0, - string::size_type* newLinePos = NULL) const; - const string generate - (const string::size_type maxLineLength = options::getInstance()->message.maxLineLength(), + (const string::size_type maxLineLength = generationContext::getDefaultContext().getMaxLineLength(), const string::size_type curLinePos = 0) const; - - void generate - (ref os, - const string::size_type maxLineLength = lineLengthLimits::infinite, - const string::size_type curLinePos = 0, - string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/messageId.hpp b/vmime/messageId.hpp index cfca9cfb..b8c4693f 100644 --- a/vmime/messageId.hpp +++ b/vmime/messageId.hpp @@ -108,14 +108,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; @@ -128,7 +129,8 @@ protected: * @return a new message-id object, or null if no more message-id can be parsed from the input buffer */ static ref parseNext - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition); diff --git a/vmime/messageIdSequence.hpp b/vmime/messageIdSequence.hpp index 1d1f9509..8ec7292a 100644 --- a/vmime/messageIdSequence.hpp +++ b/vmime/messageIdSequence.hpp @@ -152,14 +152,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/parameter.hpp b/vmime/parameter.hpp index 546600d0..82314575 100644 --- a/vmime/parameter.hpp +++ b/vmime/parameter.hpp @@ -125,20 +125,21 @@ public: protected: void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; private: - void parse(const std::vector & chunks); + void parse(const parsingContext& ctx, const std::vector & chunks); string m_name; diff --git a/vmime/parameterizedHeaderField.hpp b/vmime/parameterizedHeaderField.hpp index b3f8fc43..ee54d0a2 100644 --- a/vmime/parameterizedHeaderField.hpp +++ b/vmime/parameterizedHeaderField.hpp @@ -181,14 +181,15 @@ private: protected: void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/parsingContext.hpp b/vmime/parsingContext.hpp new file mode 100644 index 00000000..0ca194c5 --- /dev/null +++ b/vmime/parsingContext.hpp @@ -0,0 +1,59 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002-2013 Vincent Richard +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#ifndef VMIME_PARSINGCONTEXT_HPP_INCLUDED +#define VMIME_PARSINGCONTEXT_HPP_INCLUDED + + +#include "vmime/context.hpp" + + +namespace vmime +{ + + +/** Holds configuration parameters used for parsing messages. + */ + +class parsingContext : public context +{ +public: + + parsingContext(); + parsingContext(const parsingContext& ctx); + + /** Returns the default context used for parsing messages. + * + * @return a reference to the default parsing context + */ + static parsingContext& getDefaultContext(); + +protected: + +}; + + +} // vmime + + +#endif // VMIME_PARSINGCONTEXT_HPP_INCLUDED diff --git a/vmime/path.hpp b/vmime/path.hpp index 1fb58384..d2077857 100644 --- a/vmime/path.hpp +++ b/vmime/path.hpp @@ -86,14 +86,15 @@ protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/relay.hpp b/vmime/relay.hpp index 96e021c6..20547d98 100644 --- a/vmime/relay.hpp +++ b/vmime/relay.hpp @@ -88,14 +88,15 @@ private: protected: void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; }; diff --git a/vmime/text.hpp b/vmime/text.hpp index 7ead7d6b..83281200 100644 --- a/vmime/text.hpp +++ b/vmime/text.hpp @@ -135,9 +135,11 @@ public: * specified destination charset. * * @param dest output charset + * @param opts options for charset conversion * @return text decoded in the specified charset */ - const string getConvertedText(const charset& dest) const; + const string getConvertedText(const charset& dest, + const charsetConverterOptions& opts = charsetConverterOptions()) const; /** Return the unconverted (raw) data of all words. This is the * concatenation of the results returned by getBuffer() on @@ -194,21 +196,23 @@ public: FORCE_NO_ENCODING = (1 << 0), /**< Just fold lines, don't encode them. */ FORCE_ENCODING = (1 << 1), /**< Encode lines even if they are plain ASCII text. */ NO_NEW_LINE_SEQUENCE = (1 << 2), /**< Use CRLF instead of new-line sequence (CRLF + TAB). */ - QUOTE_IF_POSSIBLE = (1 << 3) /**< Use quoting instead of encoding when possible (even if FORCE_ENCODING is specified). */ + QUOTE_IF_POSSIBLE = (1 << 3), /**< Use quoting instead of encoding when possible (even if FORCE_ENCODING is specified). */ + QUOTE_IF_NEEDED = (1 << 4) /**< Use quoting instead of encoding if needed (eg. whitespaces and/or special chars). */ }; /** Encode and fold text in respect to RFC-2047. * + * @param ctx generation context * @param os output stream * @param maxLineLength maximum line length for output * @param firstLineOffset the first line length (may be useful if the current output line is not empty) * @param lastLineLength will receive the length of the last line written * @param flags encoding flags (see EncodeAndFoldFlags) */ - void encodeAndFold(utility::outputStream& os, const string::size_type maxLineLength, + void encodeAndFold(const generationContext& ctx, utility::outputStream& os, const string::size_type firstLineOffset, string::size_type* lastLineLength, const int flags) const; - /** Decode and unfold text (RFC-2047). + /** Decode and unfold text (RFC-2047), using the default parsing context. * * @param in input string * @return new text object @@ -216,6 +220,14 @@ public: static ref decodeAndUnfold(const string& in); /** Decode and unfold text (RFC-2047). + * + * @param ctx parsingContext + * @param in input string + * @return new text object + */ + static ref decodeAndUnfold(const parsingContext& ctx, const string& in); + + /** Decode and unfold text (RFC-2047), using the default parsing context. * * @param in input string * @param generateInExisting if not NULL, the resulting text will be generated @@ -226,18 +238,31 @@ public: */ static text* decodeAndUnfold(const string& in, text* generateInExisting); + /** Decode and unfold text (RFC-2047). + * + * @param ctx parsing context + * @param in input string + * @param generateInExisting if not NULL, the resulting text will be generated + * in the specified object instead of a new created object (in this case, the + * function returns the same pointer). Can be used to avoid copying the + * resulting object into an existing object. + * @return new text object or existing object if generateInExisting != NULL + */ + static text* decodeAndUnfold(const parsingContext& ctx, const string& in, text* generateInExisting); + protected: // Component parsing & assembling void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; diff --git a/vmime/utility/stringUtils.hpp b/vmime/utility/stringUtils.hpp index 535e6905..7fcdc622 100644 --- a/vmime/utility/stringUtils.hpp +++ b/vmime/utility/stringUtils.hpp @@ -104,6 +104,14 @@ public: */ static string::size_type countASCIIchars(const string::const_iterator begin, const string::const_iterator end); + /** Returns whether the specified string is composed exclusively + * of 7-bit ASCII characters. + * + * @param str string to test + * @return true if the string is ASCII-only, false otherwise + */ + static bool is7bit(const string& str); + /** Returns the position of the first non 7-bit US-ASCII character in a string. * * @param begin start position @@ -153,6 +161,25 @@ public: * @return unquoted string */ static const string unquote(const string& str); + + /** Determines whether the specified string needs to be quoted. + * + * @param str string to test + * @param specialChars list of characters that will cause the + * string to be quoted + * @return true if the string needs to be quoted, false otherwise + */ + static bool needQuoting(const string& str, + const string& specialChars = " \t\"(),:;<>@[\\]"); + + /** Quotes the specified string. + * + * @param str string to quote + * @param escapeSpecialChars list of characters that will be escaped + * @param escapeChar character that will be used for escaping (eg. '\') + * @return quoted string + */ + static string quote(const string& str, const string& escapeSpecialChars, const string& escapeChar); }; diff --git a/vmime/vmime.hpp b/vmime/vmime.hpp index bb140354..e7dd3959 100644 --- a/vmime/vmime.hpp +++ b/vmime/vmime.hpp @@ -31,7 +31,6 @@ // Base definitions #include "vmime/base.hpp" #include "vmime/exception.hpp" -#include "vmime/options.hpp" #include "vmime/platform.hpp" // Base components @@ -42,6 +41,7 @@ #include "vmime/text.hpp" #include "vmime/encoding.hpp" #include "vmime/contentDisposition.hpp" +#include "vmime/emailAddress.hpp" #include "vmime/mailbox.hpp" #include "vmime/mailboxGroup.hpp" #include "vmime/mailboxList.hpp" @@ -57,6 +57,9 @@ #include "vmime/stringContentHandler.hpp" #include "vmime/streamContentHandler.hpp" +#include "vmime/generationContext.hpp" +#include "vmime/parsingContext.hpp" + // Message components #include "vmime/message.hpp" diff --git a/vmime/word.hpp b/vmime/word.hpp index 5d350fa4..73312f6c 100644 --- a/vmime/word.hpp +++ b/vmime/word.hpp @@ -27,6 +27,7 @@ #include "vmime/headerFieldValue.hpp" #include "vmime/charset.hpp" +#include "vmime/charsetConverterOptions.hpp" namespace vmime @@ -94,9 +95,11 @@ public: /** Return the contained text converted to the specified charset. * * @param dest output charset + * @param opts options for charset conversion * @return word converted to the specified charset */ - const string getConvertedText(const charset& dest) const; + const string getConvertedText(const charset& dest, + const charsetConverterOptions& opts = charsetConverterOptions()) const; /** Replace data in this word by data in other word. * @@ -131,14 +134,15 @@ public: protected: void parseImpl - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generateImpl - (utility::outputStream& os, - const string::size_type maxLineLength = lineLengthLimits::infinite, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; @@ -148,8 +152,8 @@ public: #ifndef VMIME_BUILDING_DOC void generate - (utility::outputStream& os, - const string::size_type maxLineLength, + (const generationContext& ctx, + utility::outputStream& os, const string::size_type curLinePos, string::size_type* newLinePos, const int flags, @@ -161,7 +165,8 @@ public: private: static ref parseNext - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition, @@ -170,7 +175,8 @@ private: bool isFirst); static const std::vector > parseMultiple - (const string& buffer, + (const parsingContext& ctx, + const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition); diff --git a/vmime/wordEncoder.hpp b/vmime/wordEncoder.hpp index e43b7edb..acaef791 100644 --- a/vmime/wordEncoder.hpp +++ b/vmime/wordEncoder.hpp @@ -75,11 +75,12 @@ public: /** Test whether RFC-2047 encoding is needed. * + * @param ctx generation context * @param buffer buffer to analyze * @param charset charset of the buffer * @return true if encoding is needed, false otherwise. */ - static bool isEncodingNeeded(const string& buffer, const charset& charset); + static bool isEncodingNeeded(const generationContext& ctx, const string& buffer, const charset& charset); /** Guess the best RFC-2047 encoding to use for the specified buffer. *