Merge pull request #227 from Kopano-dev/boundaryprefix

Boundary marker parsing: WS rules and EQ check
author: Vincent Richard <[email protected]> 2019-10-07 08:50:30 +0000
committer: GitHub <[email protected]> 2019-10-07 08:50:30 +0000
commit: ab340b561d8012c2837c92abdf273a678921c3b7 (patch)
tree: 8bf4749284a3d6cece5aa00bb84ce5744afb1e0e
parent: Merge pull request #225 from 0xd34df00d/patch-1 (diff)
parent: Skip delimiter lines that are not exactly equal to the boundary (diff)
download: vmime-ab340b561d8012c2837c92abdf273a678921c3b7.tar.gz
vmime-ab340b561d8012c2837c92abdf273a678921c3b7.zip
2 files changed, 93 insertions, 54 deletions
diff --git a/src/vmime/body.cpp b/src/vmime/body.cpp
index 3757026d..a3875b9d 100644
--- a/src/vmime/body.cpp
+++ b/src/vmime/body.cpp
@@ -53,6 +53,11 @@ body::~body() {
 }
 
 
+/*
+ * boundaryStart: will become the index for "\r\n--marker"
+ * boundaryEnd:   will become the index after "marker", i.e. index for potential trailing "--", "\r\n", etc.
+ * return value:  index for "marker"
+ */
 // static
 size_t body::findNextBoundaryPosition(
 	const shared_ptr <utility::parserInputStreamAdapter>& parser,
@@ -65,7 +70,7 @@ size_t body::findNextBoundaryPosition(
 
 	size_t pos = position;
 
-	while (pos != npos && pos < end) {
+	for (; pos != npos && pos < end; ++pos) {
 
 		pos = parser->findNext(boundary, pos);
 
@@ -73,60 +78,43 @@ size_t body::findNextBoundaryPosition(
 			break;  // not found
 		}
 
-		if (pos != 0) {
-
-			// Skip transport padding bytes (SPACE or HTAB), if any
-			size_t advance = 0;
-
-			while (pos != 0) {
-
-				parser->seek(pos - advance - 1);
-
-				const byte_t c = parser->peekByte();
-
-				if (c == ' ' || c == '\t') {
-					++advance;
-				} else {
-					break;
-				}
-			}
-
-			// Ensure the bytes before boundary are "[LF]--": boundary should be
-			// at the beginning of a line, and should start with "--"
-			if (pos - advance >= 3) {
-
-				parser->seek(pos - advance - 3);
-
-				if (parser->matchBytes("\n--", 3)) {
-
-					parser->seek(pos + boundary.length());
-
-					const byte_t next = parser->peekByte();
-
-					// Boundary should be followed by a new line or a dash
-					if (next == '\r' || next == '\n' || next == '-') {
+		if (pos == 0) {
+			// Boundary is a prefix of another, continue the search (same for the other "continue"s)
+			continue;
+		}
 
-						// Get rid of the "[CR]" just before "[LF]--", if any
-						if (pos - advance >= 4) {
+		// Ensure the bytes before boundary are "[LF]--": boundary should be
+		// at the beginning of a line, and should start with "--"
+		if (pos < 3) {
+			continue;
+		}
+		parser->seek(pos - 3);
+		if (!parser->matchBytes("\n--", 3)) {
+			continue;
+		}
 
-							parser->seek(pos - advance - 4);
+		parser->seek(pos + boundary.length());
 
-							if (parser->peekByte() == '\r') {
-								advance++;
-							}
-						}
+		const byte_t next = parser->peekByte();
 
-						*boundaryStart = pos - advance - 3;
-						*boundaryEnd = pos + boundary.length();
+		// Boundary should be followed by a new line or a dash
+		if (!isspace(next) && next != '-') {
+			continue;
+		}
 
-						return pos;
-					}
-				}
+		// Get rid of the "[CR]" just before "[LF]--", if any
+		size_t backwards = 0;
+		if (pos >= 4) {
+			parser->seek(pos - 4);
+			if (parser->peekByte() == '\r') {
+				++backwards;
 			}
 		}
 
-		// Boundary is a prefix of another, continue the search
-		pos++;
+		*boundaryStart = pos - backwards - 3;
+		*boundaryEnd = pos + boundary.length();
+
+		return pos;
 	}
 
 	return pos;
@@ -276,10 +264,10 @@ void body::parseImpl(
 				boundaryEnd += 2;
 			}
 
-			// RFC #1521, Page 31:
-			// "...(If a boundary appears to end with white space, the
-			//  white space must be presumed to have been added by a
-			//  gateway, and must be deleted.)..."
+			// RFC 2046 §5.1.1 page 22: """If a boundary delimiter
+			// line appears to end with white space, the white
+			// space must be presumed to have been added by a
+			// gateway, and must be deleted."""
 			parser->seek(boundaryEnd);
 			boundaryEnd += parser->skipIf(parserHelpers::isSpaceOrTab, end);
 
@@ -288,6 +276,19 @@ void body::parseImpl(
 				boundaryEnd += 2;
 			} else if (boundaryEnd < end && parser->peekByte() == '\n') {
 				++boundaryEnd;
+			} else if (boundaryEnd == end) {
+			} else {
+				/*
+				 * RFC 2046 §5.1.1 page 19: """[...] optional
+				 * linear whitespace, and a terminating
+				 * CRLF.""" — junk handling is left
+				 * unspecified, so we might as well skip it to
+				 * facilitate broken mails.
+				 */
+				boundaryEnd += parser->skipIf([](char_t c) { return c != '\n'; }, end);
+				pos = findNextBoundaryPosition(parser, boundary, boundaryEnd, end, &boundaryStart, &boundaryEnd);
+				--index;
+				continue;
 			}
 
 			if (index == 0) {
diff --git a/tests/parser/bodyPartTest.cpp b/tests/parser/bodyPartTest.cpp
index 91742553..3aaadd03 100644
--- a/tests/parser/bodyPartTest.cpp
+++ b/tests/parser/bodyPartTest.cpp
@@ -39,6 +39,7 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
 		VMIME_TEST(testGenerate7bit)
 		VMIME_TEST(testTextUsageForQPEncoding)
 		VMIME_TEST(testParseVeryBigMessage)
+		VMIME_TEST(testParseBoundaryPrefix)
 	VMIME_TEST_LIST_END
 
 
@@ -213,9 +214,9 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
 		vmime::string str =
 			"Content-Type: multipart/mixed; boundary=\"MY-BOUNDARY\""
 			"\r\n\r\n"
-			"--  \t MY-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n"
+			"--MY-BOUNDARY  \t \r\nHEADER1\r\n\r\nBODY1\r\n"
 			"--MY-BOUNDARY\r\n"
-			"-- MY-BOUNDARY--\r\n";
+			"--MY-BOUNDARY-- \r\n";
 
 		vmime::bodyPart p;
 		p.parse(str);
@@ -291,7 +292,7 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
 		vmime::string str =
 			"Content-Type: multipart/mixed"
 			"\r\n\r\n"
-			"--  \t UNKNOWN-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n"
+			"--UNKNOWN-BOUNDARY  \t \r\nHEADER1\r\n\r\nBODY1\r\n"
 			"--UNKNOWN-BOUNDARY\r\nHEADER2\r\n\r\nBODY2\r\n"
 			"--UNKNOWN-BOUNDARY--";
 
@@ -373,4 +374,41 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
 		VASSERT("2.2", vmime::dynamicCast <const vmime::streamContentHandler>(body2Cts) != NULL);
 	}
 
+	void testParseBoundaryPrefix() {
+		/*
+		 * Clients are not supposed to create boundary identifiers that
+		 * contain a prefix of another (RFC 2046 section 5.1), but alas
+		 * CANCOM FortiMail produces this garbage.
+		 */
+		vmime::string str =
+			"Content-Type: multipart/related; boundary=\"--b12\"\r\n"
+			"\r\n"
+			"----b12\r\n"
+			"Content-Type: multipart/alternative; boundary=\"--b12-1\"\r\n"
+			"\r\n"
+			"----b12-1\r\n"
+			"Content-Type: text/plain; charset=utf-8\r\n"
+			"\r\n"
+			"P11\r\n"
+			"----b12-1\r\n"
+			"Content-Type: text/html; charset=utf-8\r\n"
+			"\r\n"
+			"P12\r\n"
+			"----b12-1--\r\n"
+			"----b12\r\n"
+			"\r\n"
+			"P2\r\n"
+			"----b12--\r\n";
+
+		vmime::bodyPart relco;
+		relco.parse(str);
+		auto relbd = relco.getBody();
+		VASSERT_EQ("global-partcount", 2, relbd->getPartCount());
+		auto altbd = relbd->getPartAt(0)->getBody();
+		VASSERT_EQ("part1-partcount", 2, altbd->getPartCount());
+		VASSERT_EQ("part1.1-body", "P11", extractContents(altbd->getPartAt(0)->getBody()->getContents()));
+		VASSERT_EQ("part1.2-body", "P12", extractContents(altbd->getPartAt(1)->getBody()->getContents()));
+		VASSERT_EQ("part2-body", "P2", extractContents(relbd->getPartAt(1)->getBody()->getContents()));
+	}
+
 VMIME_TEST_SUITE_END
author	Vincent Richard <[email protected]>	2019-10-07 08:50:30 +0000
committer	GitHub <[email protected]>	2019-10-07 08:50:30 +0000
commit	ab340b561d8012c2837c92abdf273a678921c3b7 (patch)
tree	8bf4749284a3d6cece5aa00bb84ce5744afb1e0e
parent	Merge pull request #225 from 0xd34df00d/patch-1 (diff)
parent	Skip delimiter lines that are not exactly equal to the boundary (diff)
download	vmime-ab340b561d8012c2837c92abdf273a678921c3b7.tar.gz vmime-ab340b561d8012c2837c92abdf273a678921c3b7.zip