Added support for transport padding in boundary (issue #38).

2013-06-13 12:00:42 +02:00 · 2013-06-13 12:00:42 +02:00 · 2e5574b146
commit 2e5574b146
parent 93c8d3a071
3 changed files with 173 additions and 109 deletions
--- a/src/body.cpp
+++ b/src/body.cpp
@ -53,6 +53,79 @@ body::~body()
 }


+// static
+utility::stream::size_type body::findNextBoundaryPosition
+	(ref <utility::parserInputStreamAdapter> parser, const string& boundary,
+	 const utility::stream::size_type position, const utility::stream::size_type end,
+	 utility::stream::size_type* boundaryStart, utility::stream::size_type* boundaryEnd)
+{
+	utility::stream::size_type pos = position;
+
+	while (pos != utility::stream::npos && pos < end)
+	{
+		pos = parser->findNext(boundary, pos);
+
+		if (pos == utility::stream::npos)
+			break;  // not found
+
+		if (pos != 0)
+		{
+			// Skip transport padding bytes (SPACE or HTAB), if any
+			utility::stream::size_type advance = 0;
+
+			while (pos != 0)
+			{
+				parser->seek(pos - advance - 1);
+
+				const utility::stream::value_type c = parser->peekByte();
+
+				if (c == ' ' || c == '\t')
+					++advance;
+				else
+					break;
+			}
+
+			// Ensure the bytes before boundary are "[LF]--": boundary should be
+			// at the beginning of a line, and should start with "--"
+			if (pos - advance >= 3)
+			{
+				parser->seek(pos - advance - 3);
+
+				if (parser->matchBytes("\n--", 3))
+				{
+					parser->seek(pos + boundary.length());
+
+					const utility::stream::value_type next = parser->peekByte();
+
+					// Boundary should be followed by a new line or a dash
+					if (next == '\r' || next == '\n' || next == '-')
+					{
+						// Get rid of the "[CR]" just before "[LF]--", if any
+						if (pos - advance >= 4)
+						{
+							parser->seek(pos - advance - 4);
+
+							if (parser->peekByte() == '\r')
+								advance++;
+						}
+
+						*boundaryStart = pos - advance - 3;
+						*boundaryEnd = pos + boundary.length();
+
+						return pos;
+					}
+				}
+			}
+		}
+
+		// Boundary is a prefix of another, continue the search
+		pos++;
+	}
+
+	return pos;
+}
+
+
 void body::parseImpl
 	(const parsingContext& /* ctx */,
 	 ref <utility::parserInputStreamAdapter> parser,
@ -126,17 +199,23 @@ void body::parseImpl

 					buffer[sizeof(buffer) / sizeof(buffer[0]) - 1] = '\0';

+					// Skip transport padding bytes (SPACE or HTAB), if any
+					utility::stream::size_type boundarySkip = 0;
+
+					while (boundarySkip < bufferLen && parserHelpers::isSpace(buffer[boundarySkip]))
+						++boundarySkip;
+
 					// Extract boundary from buffer (stop at first CR or LF).
 					// We have to stop after a reasonnably long boundary length (100)
 					// not to take the whole body contents for a boundary...
 					string::value_type boundaryBytes[100];
 					string::size_type boundaryLen = 0;

-					for (string::value_type c = buffer[0] ;
+					for (string::value_type c = buffer[boundarySkip] ;
 					     boundaryLen < bufferLen && boundaryLen < 100 && !(c == '\r' || c == '\n') ;
-					     c = buffer[++boundaryLen])
+					     ++boundaryLen, c = buffer[boundarySkip + boundaryLen])
 					{
-						boundaryBytes[boundaryLen] = buffer[boundaryLen];
+						boundaryBytes[boundaryLen] = c;
 					}

 					if (boundaryLen >= 1 && boundaryLen < 100)
@ -167,104 +246,60 @@ void body::parseImpl
 	// This is a multi-part body
 	if (isMultipart && !boundary.empty())
 	{
-		const string boundarySep("--" + boundary);
-
 		utility::stream::size_type partStart = position;
 		utility::stream::size_type pos = position;

 		bool lastPart = false;

-		while (pos != utility::stream::npos && pos < end)
-		{
-			pos = parser->findNext(boundarySep, pos);
-
-			if (pos == utility::stream::npos)
-				break;  // not found
-
-			if (pos != 0)
-			{
-				parser->seek(pos - 1);
-
-				if (parser->peekByte() != '\n')
-				{
-					// Boundary is not at a beginning of a line
-					pos++;
-					continue;
-				}
-
-				parser->skip(1 + boundarySep.length());
-			}
-			else
-			{
-				parser->seek(pos + boundarySep.length());
-			}
-
-			const utility::stream::value_type next = parser->peekByte();
-
-			if (next == '\r' || next == '\n' || next == '-')
-				break;
-
-			// Boundary is a prefix of another, continue the search
-			pos++;
-		}
-
-		if (pos != utility::stream::npos && pos < end)
-		{
-			vmime::text text;
-			text.parse(parser, position, pos);
-
-			m_prologText = text.getWholeBuffer();
-		}
+		// Find the first boundary
+		utility::stream::size_type boundaryStart, boundaryEnd;
+		pos = findNextBoundaryPosition(parser, boundary, pos, end, &boundaryStart, &boundaryEnd);

 		for (int index = 0 ; !lastPart && (pos != utility::stream::npos) && (pos < end) ; ++index)
 		{
-			utility::stream::size_type partEnd = pos;
-
-			// Get rid of the [CR]LF just before the boundary string
-			if (pos >= (position + 1))
-			{
-				parser->seek(pos - 1);
-
-				if (parser->peekByte() == '\n')
-					--partEnd;
-			}
-
-			if (pos >= (position + 2))
-			{
-				parser->seek(pos - 2);
-
-				if (parser->peekByte() == '\r')
-					--partEnd;
-			}
+			utility::stream::size_type partEnd = boundaryStart;

 			// Check whether it is the last part (boundary terminated by "--")
-			pos += boundarySep.length();
-			parser->seek(pos);
+			parser->seek(boundaryEnd);

-			if (pos + 1 < end && parser->matchBytes("--", 2))
+			if (boundaryEnd + 1 < end && parser->matchBytes("--", 2))
 			{
 				lastPart = true;
-				pos += 2;
+				boundaryEnd += 2;
 			}

 			// RFC #1521, Page 31:
 			// "...(If a boundary appears to end with white space, the
 			//  white space must be presumed to have been added by a
 			//  gateway, and must be deleted.)..."
-			parser->seek(pos);
-			pos += parser->skipIf(parserHelpers::isSpaceOrTab, end);
+			parser->seek(boundaryEnd);
+			boundaryEnd += parser->skipIf(parserHelpers::isSpaceOrTab, end);

 			// End of boundary line
-			if (pos + 1 < end && parser->matchBytes("\r\n", 2))
+			if (boundaryEnd + 1 < end && parser->matchBytes("\r\n", 2))
 			{
-				pos += 2;
+				boundaryEnd += 2;
 			}
-			else if (pos < end && parser->peekByte() == '\n')
+			else if (boundaryEnd < end && parser->peekByte() == '\n')
 			{
-				++pos;
+				++boundaryEnd;
 			}

-			if (index > 0)
+			if (index == 0)
+			{
+				if (partEnd > partStart)
+				{
+					vmime::text text;
+					text.parse(parser, partStart, partEnd);
+
+					m_prologText = text.getWholeBuffer();
+				}
+				else
+				{
+					m_prologText = "";
+				}
+			}
+			else // index > 0
 			{
 				ref <bodyPart> part = vmime::create <bodyPart>();

@ -279,41 +314,11 @@ void body::parseImpl
 				m_parts.push_back(part);
 			}

-			partStart = pos;
+			partStart = boundaryEnd;

-			while (pos != utility::stream::npos && pos < end)
-			{
-				pos = parser->findNext(boundarySep, pos);
-
-				if (pos == utility::stream::npos)
-					break;  // not found
-
-				if (pos != 0)
-				{
-					parser->seek(pos - 1);
-
-					if (parser->peekByte() != '\n')
-					{
-						// Boundary is not at a beginning of a line
-						pos++;
-						continue;
-					}
-
-					parser->skip(1 + boundarySep.length());
-				}
-				else
-				{
-					parser->seek(pos + boundarySep.length());
-				}
-
-				const utility::stream::value_type next = parser->peekByte();
-
-				if (next == '\r' || next == '\n' || next == '-')
-					break;
-
-				// Boundary is a prefix of another, continue the search
-				pos++;
-			}
+			// Find the next boundary
+			pos = findNextBoundaryPosition
+				(parser, boundary, boundaryEnd, end, &boundaryStart, &boundaryEnd);
 		}

 		m_contents = vmime::create <emptyContentHandler>();
--- a/tests/parser/bodyPartTest.cpp
+++ b/tests/parser/bodyPartTest.cpp
@ -30,10 +30,12 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
 		VMIME_TEST(testParse)
 		VMIME_TEST(testGenerate)
 		VMIME_TEST(testParseGuessBoundary)
+		VMIME_TEST(testParseGuessBoundaryWithTransportPadding)
 		VMIME_TEST(testParseMissingLastBoundary)
 		VMIME_TEST(testPrologEpilog)
 		VMIME_TEST(testPrologEncoding)
 		VMIME_TEST(testSuccessiveBoundaries)
+		VMIME_TEST(testTransportPaddingInBoundary)
 		VMIME_TEST(testGenerate7bit)
 		VMIME_TEST(testTextUsageForQPEncoding)
 		VMIME_TEST(testParseVeryBigMessage)
@ -200,6 +202,24 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
 		VASSERT_EQ("part2-body", "", extractContents(p.getBody()->getPartAt(1)->getBody()->getContents()));
 	}

+	void testTransportPaddingInBoundary()
+	{
+		vmime::string str =
+			"Content-Type: multipart/mixed; boundary=\"MY-BOUNDARY\""
+			"\r\n\r\n"
+			"--  \t MY-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n"
+			"--MY-BOUNDARY\r\n"
+			"-- MY-BOUNDARY--\r\n";
+
+		vmime::bodyPart p;
+		p.parse(str);
+
+		VASSERT_EQ("count", 2, p.getBody()->getPartCount());
+
+		VASSERT_EQ("part1-body", "BODY1", extractContents(p.getBody()->getPartAt(0)->getBody()->getContents()));
+		VASSERT_EQ("part2-body", "", extractContents(p.getBody()->getPartAt(1)->getBody()->getContents()));
+	}
+
 	/** Ensure '7bit' encoding is used when body is 7-bit only. */
 	void testGenerate7bit()
 	{
@ -256,6 +276,28 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
 		VASSERT_EQ("part2-body", "BODY2", extractContents(p.getBody()->getPartAt(1)->getBody()->getContents()));
 	}

+	void testParseGuessBoundaryWithTransportPadding()
+	{
+		// Boundary is not specified in "Content-Type" field
+		// Parser will try to guess it from message contents.
+		// Transport padding white spaces should be ignored.
+
+		vmime::string str =
+			"Content-Type: multipart/mixed"
+			"\r\n\r\n"
+			"--  \t UNKNOWN-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n"
+			"--UNKNOWN-BOUNDARY\r\nHEADER2\r\n\r\nBODY2\r\n"
+			"--UNKNOWN-BOUNDARY--";
+
+		vmime::bodyPart p;
+		p.parse(str);
+
+		VASSERT_EQ("count", 2, p.getBody()->getPartCount());
+
+		VASSERT_EQ("part1-body", "BODY1", extractContents(p.getBody()->getPartAt(0)->getBody()->getContents()));
+		VASSERT_EQ("part2-body", "BODY2", extractContents(p.getBody()->getPartAt(1)->getBody()->getContents()));
+	}
+
 	void testParseVeryBigMessage()
 	{
 		// When parsing from a seekable input stream, body contents should not
--- a/vmime/body.hpp
+++ b/vmime/body.hpp
@ -301,6 +301,23 @@ private:

 protected:

+	/** Finds the next boundary position in the parsing buffer.
+	  *
+	  * @param parser parser object
+	  * @param boundary boundary string (without "--" nor CR/LF)
+	  * @param position start position
+	  * @param end end position
+	  * @param boundaryStart will hold the start position of the boundary (including any
+	  * CR/LF and "--" before the boundary)
+	  * @param boundaryEnd will hold the end position of the boundary (position just
+	  * before the CRLF or "--" which follows)
+	  * @return the position of the boundary string, or stream::npos if not found
+	  */
+	utility::stream::size_type findNextBoundaryPosition
+		(ref <utility::parserInputStreamAdapter> parser, const string& boundary,
+		 const utility::stream::size_type position, const utility::stream::size_type end,
+		 utility::stream::size_type* boundaryStart, utility::stream::size_type* boundaryEnd);
+
 	// Component parsing & assembling
 	void parseImpl
 		(const parsingContext& ctx,