aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/utility/encoder/qpEncoder.cpp271
-rw-r--r--src/wordEncoder.cpp45
-rw-r--r--vmime/utility/encoder/qpEncoder.hpp4
3 files changed, 173 insertions, 147 deletions
diff --git a/src/utility/encoder/qpEncoder.cpp b/src/utility/encoder/qpEncoder.cpp
index e20be9fe..aa95022f 100644
--- a/src/utility/encoder/qpEncoder.cpp
+++ b/src/utility/encoder/qpEncoder.cpp
@@ -51,10 +51,52 @@ const std::vector <string> qpEncoder::getAvailableProperties() const
-// Encoding table
+// Hex-encoding table
const unsigned char qpEncoder::sm_hexDigits[] = "0123456789ABCDEF";
-// Decoding table
+
+// RFC-2047 encoding table: we always encode RFC-2047 using the restricted
+// charset, that is the one used for 'phrase' in From/To/Cc/... headers.
+//
+// " The set of characters that may be used in a "Q"-encoded 'encoded-word'
+// is restricted to: <upper and lower case ASCII letters, decimal digits,
+// "!", "*", "+", "-", "/", "=", and "_" (underscore, ASCII 95.)>. "
+//
+// Two special cases:
+// - encode space (32) as underscore (95)
+// - encode underscore as hex (=5F)
+//
+// This is a quick lookup table:
+// '1' means "encode", '0' means "no encoding"
+//
+const unsigned char qpEncoder::sm_RFC2047EncodeTable[] =
+{
+ /* 0 NUL */ 1, /* 1 SOH */ 1, /* 2 STX */ 1, /* 3 ETX */ 1, /* 4 EOT */ 1, /* 5 ENQ */ 1,
+ /* 6 ACK */ 1, /* 7 BEL */ 1, /* 8 BS */ 1, /* 9 TAB */ 1, /* 10 LF */ 1, /* 11 VT */ 1,
+ /* 12 FF */ 1, /* 13 CR */ 1, /* 14 SO */ 1, /* 15 SI */ 1, /* 16 DLE */ 1, /* 17 DC1 */ 1,
+ /* 18 DC2 */ 1, /* 19 DC3 */ 1, /* 20 DC4 */ 1, /* 21 NAK */ 1, /* 22 SYN */ 1, /* 23 ETB */ 1,
+ /* 24 CAN */ 1, /* 25 EM */ 1, /* 26 SUB */ 1, /* 27 ESC */ 1, /* 28 FS */ 1, /* 29 GS */ 1,
+ /* 30 RS */ 1, /* 31 US */ 1, /* 32 SPACE*/ 1, /* 33 ! */ 0, /* 34 " */ 1, /* 35 # */ 1,
+ /* 36 $ */ 1, /* 37 % */ 1, /* 38 & */ 1, /* 39 ' */ 1, /* 40 ( */ 1, /* 41 ) */ 1,
+ /* 42 * */ 0, /* 43 + */ 0, /* 44 , */ 1, /* 45 - */ 0, /* 46 . */ 1, /* 47 / */ 0,
+ /* 48 0 */ 0, /* 49 1 */ 0, /* 50 2 */ 0, /* 51 3 */ 0, /* 52 4 */ 0, /* 53 5 */ 0,
+ /* 54 6 */ 0, /* 55 7 */ 0, /* 56 8 */ 0, /* 57 9 */ 0, /* 58 : */ 1, /* 59 ; */ 1,
+ /* 60 < */ 1, /* 61 = */ 1, /* 62 > */ 1, /* 63 ? */ 1, /* 64 @ */ 1, /* 65 A */ 0,
+ /* 66 B */ 0, /* 67 C */ 0, /* 68 D */ 0, /* 69 E */ 0, /* 70 F */ 0, /* 71 G */ 0,
+ /* 72 H */ 0, /* 73 I */ 0, /* 74 J */ 0, /* 75 K */ 0, /* 76 L */ 0, /* 77 M */ 0,
+ /* 78 N */ 0, /* 79 O */ 0, /* 80 P */ 0, /* 81 Q */ 0, /* 82 R */ 0, /* 83 S */ 0,
+ /* 84 T */ 0, /* 85 U */ 0, /* 86 V */ 0, /* 87 W */ 0, /* 88 X */ 0, /* 89 Y */ 0,
+ /* 90 Z */ 0, /* 91 [ */ 1, /* 92 " */ 1, /* 93 ] */ 1, /* 94 ^ */ 1, /* 95 _ */ 1,
+ /* 96 ` */ 1, /* 97 a */ 0, /* 98 b */ 0, /* 99 c */ 0, /* 100 d */ 0, /* 101 e */ 0,
+ /* 102 f */ 0, /* 103 g */ 0, /* 104 h */ 0, /* 105 i */ 0, /* 106 j */ 0, /* 107 k */ 0,
+ /* 108 l */ 0, /* 109 m */ 0, /* 110 n */ 0, /* 111 o */ 0, /* 112 p */ 0, /* 113 q */ 0,
+ /* 114 r */ 0, /* 115 s */ 0, /* 116 t */ 0, /* 117 u */ 0, /* 118 v */ 0, /* 119 w */ 0,
+ /* 120 x */ 0, /* 121 y */ 0, /* 122 z */ 0, /* 123 { */ 1, /* 124 | */ 1, /* 125 } */ 1,
+ /* 126 ~ */ 1, /* 127 DEL */ 1
+};
+
+
+// Hex-decoding table
const unsigned char qpEncoder::sm_hexDecodeTable[256] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -76,6 +118,36 @@ const unsigned char qpEncoder::sm_hexDecodeTable[256] =
};
+// static
+bool qpEncoder::RFC2047_isEncodingNeededForChar(const unsigned char c)
+{
+ return (c >= 128 || sm_RFC2047EncodeTable[c] != 0);
+}
+
+
+// static
+int qpEncoder::RFC2047_getEncodedLength(const unsigned char c)
+{
+ if (c >= 128 || sm_RFC2047EncodeTable[c] != 0)
+ {
+ if (c == 32) // space
+ {
+ // Encoded as "_"
+ return 1;
+ }
+ else
+ {
+ // Hex encoding
+ return 3;
+ }
+ }
+ else
+ {
+ return 1; // no encoding
+ }
+}
+
+
#ifndef VMIME_BUILDING_DOC
#define QP_ENCODE_HEX(x) \
@@ -83,7 +155,7 @@ const unsigned char qpEncoder::sm_hexDecodeTable[256] =
outBuffer[outBufferPos + 1] = sm_hexDigits[x >> 4]; \
outBuffer[outBufferPos + 2] = sm_hexDigits[x & 0xF]; \
outBufferPos += 3; \
- curCol += 3;
+ curCol += 3
#define QP_WRITE(s, x, l) s.write(reinterpret_cast <utility::stream::value_type*>(x), l)
@@ -145,34 +217,51 @@ utility::stream::size_type qpEncoder::encode(utility::inputStream& in,
// Get the next char and encode it
const unsigned char c = static_cast <unsigned char>(buffer[bufferPos++]);
- switch (c)
- {
- case '.':
+ if (rfc2047)
{
- if (!rfc2047 && curCol == 0)
+ if (c >= 128 || sm_RFC2047EncodeTable[c] != 0)
{
- // If a '.' appears at the beginning of a line, we encode it to
- // to avoid problems with SMTP servers... ("\r\n.\r\n" means the
- // end of data transmission).
- QP_ENCODE_HEX('.')
- continue;
+ if (c == 32) // space
+ {
+ // RFC-2047, Page 5, 4.2. The "Q" encoding:
+ // << The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be
+ // represented as "_" (underscore, ASCII 95.). >>
+ outBuffer[outBufferPos++] = '_';
+ ++curCol;
+ }
+ else
+ {
+ // Other characters: '=' + hexadecimal encoding
+ QP_ENCODE_HEX(c);
+ }
+ }
+ else
+ {
+ // No encoding
+ outBuffer[outBufferPos++] = c;
+ ++curCol;
}
-
- outBuffer[outBufferPos++] = '.';
- ++curCol;
- break;
}
- case ' ':
+ else
{
- // RFC-2047, Page 5, 4.2. The "Q" encoding:
- // << The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be
- // represented as "_" (underscore, ASCII 95.). >>
- if (rfc2047)
+ switch (c)
{
- outBuffer[outBufferPos++] = '_';
+ case 46: // .
+ {
+ if (curCol == 0)
+ {
+ // If a '.' appears at the beginning of a line, we encode it to
+ // to avoid problems with SMTP servers... ("\r\n.\r\n" means the
+ // end of data transmission).
+ QP_ENCODE_HEX('.');
+ continue;
+ }
+
+ outBuffer[outBufferPos++] = '.';
++curCol;
+ break;
}
- else
+ case 32: // space
{
// Need to get more data?
if (bufferPos >= bufferLength)
@@ -192,100 +281,74 @@ utility::stream::size_type qpEncoder::encode(utility::inputStream& in,
outBuffer[outBufferPos++] = ' ';
++curCol;
}
- }
- break;
- }
- case '\t':
- {
- QP_ENCODE_HEX(c)
- break;
- }
- case '\r':
- case '\n':
- {
- // Text mode (where using CRLF or LF or ... does not
- // care for a new line...)
- if (text)
- {
- outBuffer[outBufferPos++] = c;
- ++curCol;
+ break;
}
- // Binary mode (where CR and LF bytes are important!)
- else
+ case 9: // TAB
{
- QP_ENCODE_HEX(c)
- }
-
- break;
- }
- case '=':
- {
- QP_ENCODE_HEX('=')
- break;
- }
- // RFC-2047 'especials' characters
- case ',':
- case ';':
- case ':':
- case '_':
- case '@':
- case '(':
- case ')':
- case '<':
- case '>':
- case '[':
- case ']':
- case '"':
- {
- if (rfc2047)
- {
- QP_ENCODE_HEX(c)
+ QP_ENCODE_HEX(c);
+ break;
}
- else
+ case 13: // CR
+ case 10: // LF
{
- outBuffer[outBufferPos++] = c;
- ++curCol;
- }
+ // Text mode (where using CRLF or LF or ... does not
+ // care for a new line...)
+ if (text)
+ {
+ outBuffer[outBufferPos++] = c;
+ ++curCol;
+ }
+ // Binary mode (where CR and LF bytes are important!)
+ else
+ {
+ QP_ENCODE_HEX(c);
+ }
- break;
- }
- /*
- Rule #2: (Literal representation) Octets with decimal values of 33
- through 60 inclusive, and 62 through 126, inclusive, MAY be
- represented as the ASCII characters which correspond to those
- octets (EXCLAMATION POINT through LESS THAN, and GREATER THAN
- through TILDE, respectively).
- */
- default:
- {
- //if ((c >= 33 && c <= 60) || (c >= 62 && c <= 126))
- if (c >= 33 && c <= 126 && c != 61 && c != 63)
- {
- outBuffer[outBufferPos++] = c;
- ++curCol;
+ break;
}
- // Other characters: '=' + hexadecimal encoding
- else
+ case 61: // =
{
- QP_ENCODE_HEX(c)
+ QP_ENCODE_HEX('=');
+ break;
}
+ /*
+ Rule #2: (Literal representation) Octets with decimal values of 33
+ through 60 inclusive, and 62 through 126, inclusive, MAY be
+ represented as the ASCII characters which correspond to those
+ octets (EXCLAMATION POINT through LESS THAN, and GREATER THAN
+ through TILDE, respectively).
+ */
+ default:
+
+ //if ((c >= 33 && c <= 60) || (c >= 62 && c <= 126))
+ if (c >= 33 && c <= 126 && c != 61 && c != 63)
+ {
+ outBuffer[outBufferPos++] = c;
+ ++curCol;
+ }
+ // Other characters: '=' + hexadecimal encoding
+ else
+ {
+ QP_ENCODE_HEX(c);
+ }
- break;
- }
+ break;
- }
+ } // switch (c)
- // Soft line break : "=\r\n"
- if (cutLines && curCol >= maxLineLength - 1)
- {
- outBuffer[outBufferPos] = '=';
- outBuffer[outBufferPos + 1] = '\r';
- outBuffer[outBufferPos + 2] = '\n';
+ // Soft line break : "=\r\n"
+ if (cutLines && curCol >= maxLineLength - 1)
+ {
+ outBuffer[outBufferPos] = '=';
+ outBuffer[outBufferPos + 1] = '\r';
+ outBuffer[outBufferPos + 2] = '\n';
- outBufferPos += 3;
- curCol = 0;
- }
+ outBufferPos += 3;
+ curCol = 0;
+ }
+
+ } // !rfc2047
++inTotal;
diff --git a/src/wordEncoder.cpp b/src/wordEncoder.cpp
index 22994edf..67bd7a1d 100644
--- a/src/wordEncoder.cpp
+++ b/src/wordEncoder.cpp
@@ -150,29 +150,9 @@ const string wordEncoder::getNextChunk(const string::size_type maxLength)
while ((inputCount == 0 || outputCount < maxLength) && (inputCount < remaining))
{
const unsigned char c = m_buffer[m_pos + inputCount];
- bool encoded = true;
-
- switch (c)
- {
- case ',':
- case ';':
- case ':':
- case '_':
- case '=':
-
- encoded = true;
- break;
-
- default:
-
- if (c >= 33 && c <= 126 && c != 61)
- encoded = false;
-
- break;
- }
inputCount++;
- outputCount += (encoded ? 3 : 1);
+ outputCount += utility::encoder::qpEncoder::RFC2047_getEncodedLength(c);
}
// Encode chunk
@@ -217,28 +197,7 @@ const string wordEncoder::getNextChunk(const string::size_type maxLength)
for (string::size_type i = 0, n = encodeBytes.length() ; i < n ; ++i)
{
const unsigned char c = encodeBytes[i];
- bool encoded = true;
-
- switch (c)
- {
- case ',':
- case ';':
- case ':':
- case '_':
- case '=':
-
- encoded = true;
- break;
-
- default:
-
- if (c >= 33 && c <= 126 && c != 61)
- encoded = false;
-
- break;
- }
-
- outputCount += (encoded ? 3 : 1);
+ outputCount += utility::encoder::qpEncoder::RFC2047_getEncodedLength(c);
}
}
diff --git a/vmime/utility/encoder/qpEncoder.hpp b/vmime/utility/encoder/qpEncoder.hpp
index 098b4c82..a969126e 100644
--- a/vmime/utility/encoder/qpEncoder.hpp
+++ b/vmime/utility/encoder/qpEncoder.hpp
@@ -47,10 +47,14 @@ public:
const std::vector <string> getAvailableProperties() const;
+ static bool RFC2047_isEncodingNeededForChar(const unsigned char c);
+ static int RFC2047_getEncodedLength(const unsigned char c);
+
protected:
static const unsigned char sm_hexDigits[17];
static const unsigned char sm_hexDecodeTable[256];
+ static const unsigned char sm_RFC2047EncodeTable[128];
};