aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVincent Richard <[email protected]>2013-06-13 10:00:42 +0000
committerVincent Richard <[email protected]>2013-06-13 10:00:42 +0000
commit2e5574b14672f540e4ae2c06620c621eb2f79ec6 (patch)
tree2431d91aece7d85889faa98b3c66e4e45a85139c
parentAdded support for SMTPUTF8 extension (RFC-6531). (diff)
downloadvmime-2e5574b14672f540e4ae2c06620c621eb2f79ec6.tar.gz
vmime-2e5574b14672f540e4ae2c06620c621eb2f79ec6.zip
Added support for transport padding in boundary (issue #38).
-rw-r--r--src/body.cpp223
-rw-r--r--tests/parser/bodyPartTest.cpp42
-rw-r--r--vmime/body.hpp17
3 files changed, 173 insertions, 109 deletions
diff --git a/src/body.cpp b/src/body.cpp
index 8c599b99..14c14fd6 100644
--- a/src/body.cpp
+++ b/src/body.cpp
@@ -53,6 +53,79 @@ body::~body()
}
+// static
+utility::stream::size_type body::findNextBoundaryPosition
+ (ref <utility::parserInputStreamAdapter> parser, const string& boundary,
+ const utility::stream::size_type position, const utility::stream::size_type end,
+ utility::stream::size_type* boundaryStart, utility::stream::size_type* boundaryEnd)
+{
+ utility::stream::size_type pos = position;
+
+ while (pos != utility::stream::npos && pos < end)
+ {
+ pos = parser->findNext(boundary, pos);
+
+ if (pos == utility::stream::npos)
+ break; // not found
+
+ if (pos != 0)
+ {
+ // Skip transport padding bytes (SPACE or HTAB), if any
+ utility::stream::size_type advance = 0;
+
+ while (pos != 0)
+ {
+ parser->seek(pos - advance - 1);
+
+ const utility::stream::value_type c = parser->peekByte();
+
+ if (c == ' ' || c == '\t')
+ ++advance;
+ else
+ break;
+ }
+
+ // Ensure the bytes before boundary are "[LF]--": boundary should be
+ // at the beginning of a line, and should start with "--"
+ if (pos - advance >= 3)
+ {
+ parser->seek(pos - advance - 3);
+
+ if (parser->matchBytes("\n--", 3))
+ {
+ parser->seek(pos + boundary.length());
+
+ const utility::stream::value_type next = parser->peekByte();
+
+ // Boundary should be followed by a new line or a dash
+ if (next == '\r' || next == '\n' || next == '-')
+ {
+ // Get rid of the "[CR]" just before "[LF]--", if any
+ if (pos - advance >= 4)
+ {
+ parser->seek(pos - advance - 4);
+
+ if (parser->peekByte() == '\r')
+ advance++;
+ }
+
+ *boundaryStart = pos - advance - 3;
+ *boundaryEnd = pos + boundary.length();
+
+ return pos;
+ }
+ }
+ }
+ }
+
+ // Boundary is a prefix of another, continue the search
+ pos++;
+ }
+
+ return pos;
+}
+
+
void body::parseImpl
(const parsingContext& /* ctx */,
ref <utility::parserInputStreamAdapter> parser,
@@ -126,17 +199,23 @@ void body::parseImpl
buffer[sizeof(buffer) / sizeof(buffer[0]) - 1] = '\0';
+ // Skip transport padding bytes (SPACE or HTAB), if any
+ utility::stream::size_type boundarySkip = 0;
+
+ while (boundarySkip < bufferLen && parserHelpers::isSpace(buffer[boundarySkip]))
+ ++boundarySkip;
+
// Extract boundary from buffer (stop at first CR or LF).
// We have to stop after a reasonnably long boundary length (100)
// not to take the whole body contents for a boundary...
string::value_type boundaryBytes[100];
string::size_type boundaryLen = 0;
- for (string::value_type c = buffer[0] ;
+ for (string::value_type c = buffer[boundarySkip] ;
boundaryLen < bufferLen && boundaryLen < 100 && !(c == '\r' || c == '\n') ;
- c = buffer[++boundaryLen])
+ ++boundaryLen, c = buffer[boundarySkip + boundaryLen])
{
- boundaryBytes[boundaryLen] = buffer[boundaryLen];
+ boundaryBytes[boundaryLen] = c;
}
if (boundaryLen >= 1 && boundaryLen < 100)
@@ -167,104 +246,60 @@ void body::parseImpl
// This is a multi-part body
if (isMultipart && !boundary.empty())
{
- const string boundarySep("--" + boundary);
-
utility::stream::size_type partStart = position;
utility::stream::size_type pos = position;
bool lastPart = false;
- while (pos != utility::stream::npos && pos < end)
- {
- pos = parser->findNext(boundarySep, pos);
-
- if (pos == utility::stream::npos)
- break; // not found
-
- if (pos != 0)
- {
- parser->seek(pos - 1);
-
- if (parser->peekByte() != '\n')
- {
- // Boundary is not at a beginning of a line
- pos++;
- continue;
- }
-
- parser->skip(1 + boundarySep.length());
- }
- else
- {
- parser->seek(pos + boundarySep.length());
- }
-
- const utility::stream::value_type next = parser->peekByte();
-
- if (next == '\r' || next == '\n' || next == '-')
- break;
-
- // Boundary is a prefix of another, continue the search
- pos++;
- }
-
- if (pos != utility::stream::npos && pos < end)
- {
- vmime::text text;
- text.parse(parser, position, pos);
-
- m_prologText = text.getWholeBuffer();
- }
+ // Find the first boundary
+ utility::stream::size_type boundaryStart, boundaryEnd;
+ pos = findNextBoundaryPosition(parser, boundary, pos, end, &boundaryStart, &boundaryEnd);
for (int index = 0 ; !lastPart && (pos != utility::stream::npos) && (pos < end) ; ++index)
{
- utility::stream::size_type partEnd = pos;
-
- // Get rid of the [CR]LF just before the boundary string
- if (pos >= (position + 1))
- {
- parser->seek(pos - 1);
-
- if (parser->peekByte() == '\n')
- --partEnd;
- }
-
- if (pos >= (position + 2))
- {
- parser->seek(pos - 2);
-
- if (parser->peekByte() == '\r')
- --partEnd;
- }
+ utility::stream::size_type partEnd = boundaryStart;
// Check whether it is the last part (boundary terminated by "--")
- pos += boundarySep.length();
- parser->seek(pos);
+ parser->seek(boundaryEnd);
- if (pos + 1 < end && parser->matchBytes("--", 2))
+ if (boundaryEnd + 1 < end && parser->matchBytes("--", 2))
{
lastPart = true;
- pos += 2;
+ boundaryEnd += 2;
}
// RFC #1521, Page 31:
// "...(If a boundary appears to end with white space, the
// white space must be presumed to have been added by a
// gateway, and must be deleted.)..."
- parser->seek(pos);
- pos += parser->skipIf(parserHelpers::isSpaceOrTab, end);
+ parser->seek(boundaryEnd);
+ boundaryEnd += parser->skipIf(parserHelpers::isSpaceOrTab, end);
// End of boundary line
- if (pos + 1 < end && parser->matchBytes("\r\n", 2))
+ if (boundaryEnd + 1 < end && parser->matchBytes("\r\n", 2))
{
- pos += 2;
+ boundaryEnd += 2;
}
- else if (pos < end && parser->peekByte() == '\n')
+ else if (boundaryEnd < end && parser->peekByte() == '\n')
{
- ++pos;
+ ++boundaryEnd;
}
- if (index > 0)
+ if (index == 0)
+ {
+ if (partEnd > partStart)
+ {
+ vmime::text text;
+ text.parse(parser, partStart, partEnd);
+
+ m_prologText = text.getWholeBuffer();
+ }
+ else
+ {
+ m_prologText = "";
+ }
+ }
+ else // index > 0
{
ref <bodyPart> part = vmime::create <bodyPart>();
@@ -279,41 +314,11 @@ void body::parseImpl
m_parts.push_back(part);
}
- partStart = pos;
-
- while (pos != utility::stream::npos && pos < end)
- {
- pos = parser->findNext(boundarySep, pos);
-
- if (pos == utility::stream::npos)
- break; // not found
-
- if (pos != 0)
- {
- parser->seek(pos - 1);
-
- if (parser->peekByte() != '\n')
- {
- // Boundary is not at a beginning of a line
- pos++;
- continue;
- }
+ partStart = boundaryEnd;
- parser->skip(1 + boundarySep.length());
- }
- else
- {
- parser->seek(pos + boundarySep.length());
- }
-
- const utility::stream::value_type next = parser->peekByte();
-
- if (next == '\r' || next == '\n' || next == '-')
- break;
-
- // Boundary is a prefix of another, continue the search
- pos++;
- }
+ // Find the next boundary
+ pos = findNextBoundaryPosition
+ (parser, boundary, boundaryEnd, end, &boundaryStart, &boundaryEnd);
}
m_contents = vmime::create <emptyContentHandler>();
diff --git a/tests/parser/bodyPartTest.cpp b/tests/parser/bodyPartTest.cpp
index 487b3160..453982e0 100644
--- a/tests/parser/bodyPartTest.cpp
+++ b/tests/parser/bodyPartTest.cpp
@@ -30,10 +30,12 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
VMIME_TEST(testParse)
VMIME_TEST(testGenerate)
VMIME_TEST(testParseGuessBoundary)
+ VMIME_TEST(testParseGuessBoundaryWithTransportPadding)
VMIME_TEST(testParseMissingLastBoundary)
VMIME_TEST(testPrologEpilog)
VMIME_TEST(testPrologEncoding)
VMIME_TEST(testSuccessiveBoundaries)
+ VMIME_TEST(testTransportPaddingInBoundary)
VMIME_TEST(testGenerate7bit)
VMIME_TEST(testTextUsageForQPEncoding)
VMIME_TEST(testParseVeryBigMessage)
@@ -200,6 +202,24 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
VASSERT_EQ("part2-body", "", extractContents(p.getBody()->getPartAt(1)->getBody()->getContents()));
}
+ void testTransportPaddingInBoundary()
+ {
+ vmime::string str =
+ "Content-Type: multipart/mixed; boundary=\"MY-BOUNDARY\""
+ "\r\n\r\n"
+ "-- \t MY-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n"
+ "--MY-BOUNDARY\r\n"
+ "-- MY-BOUNDARY--\r\n";
+
+ vmime::bodyPart p;
+ p.parse(str);
+
+ VASSERT_EQ("count", 2, p.getBody()->getPartCount());
+
+ VASSERT_EQ("part1-body", "BODY1", extractContents(p.getBody()->getPartAt(0)->getBody()->getContents()));
+ VASSERT_EQ("part2-body", "", extractContents(p.getBody()->getPartAt(1)->getBody()->getContents()));
+ }
+
/** Ensure '7bit' encoding is used when body is 7-bit only. */
void testGenerate7bit()
{
@@ -256,6 +276,28 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
VASSERT_EQ("part2-body", "BODY2", extractContents(p.getBody()->getPartAt(1)->getBody()->getContents()));
}
+ void testParseGuessBoundaryWithTransportPadding()
+ {
+ // Boundary is not specified in "Content-Type" field
+ // Parser will try to guess it from message contents.
+ // Transport padding white spaces should be ignored.
+
+ vmime::string str =
+ "Content-Type: multipart/mixed"
+ "\r\n\r\n"
+ "-- \t UNKNOWN-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n"
+ "--UNKNOWN-BOUNDARY\r\nHEADER2\r\n\r\nBODY2\r\n"
+ "--UNKNOWN-BOUNDARY--";
+
+ vmime::bodyPart p;
+ p.parse(str);
+
+ VASSERT_EQ("count", 2, p.getBody()->getPartCount());
+
+ VASSERT_EQ("part1-body", "BODY1", extractContents(p.getBody()->getPartAt(0)->getBody()->getContents()));
+ VASSERT_EQ("part2-body", "BODY2", extractContents(p.getBody()->getPartAt(1)->getBody()->getContents()));
+ }
+
void testParseVeryBigMessage()
{
// When parsing from a seekable input stream, body contents should not
diff --git a/vmime/body.hpp b/vmime/body.hpp
index f9781eeb..50d67807 100644
--- a/vmime/body.hpp
+++ b/vmime/body.hpp
@@ -301,6 +301,23 @@ private:
protected:
+ /** Finds the next boundary position in the parsing buffer.
+ *
+ * @param parser parser object
+ * @param boundary boundary string (without "--" nor CR/LF)
+ * @param position start position
+ * @param end end position
+ * @param boundaryStart will hold the start position of the boundary (including any
+ * CR/LF and "--" before the boundary)
+ * @param boundaryEnd will hold the end position of the boundary (position just
+ * before the CRLF or "--" which follows)
+ * @return the position of the boundary string, or stream::npos if not found
+ */
+ utility::stream::size_type findNextBoundaryPosition
+ (ref <utility::parserInputStreamAdapter> parser, const string& boundary,
+ const utility::stream::size_type position, const utility::stream::size_type end,
+ utility::stream::size_type* boundaryStart, utility::stream::size_type* boundaryEnd);
+
// Component parsing & assembling
void parseImpl
(const parsingContext& ctx,