aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVincent Richard <[email protected]>2019-10-07 08:50:30 +0000
committerGitHub <[email protected]>2019-10-07 08:50:30 +0000
commitab340b561d8012c2837c92abdf273a678921c3b7 (patch)
tree8bf4749284a3d6cece5aa00bb84ce5744afb1e0e
parentMerge pull request #225 from 0xd34df00d/patch-1 (diff)
parentSkip delimiter lines that are not exactly equal to the boundary (diff)
downloadvmime-ab340b561d8012c2837c92abdf273a678921c3b7.tar.gz
vmime-ab340b561d8012c2837c92abdf273a678921c3b7.zip
Merge pull request #227 from Kopano-dev/boundaryprefix
Boundary marker parsing: WS rules and EQ check
-rw-r--r--src/vmime/body.cpp103
-rw-r--r--tests/parser/bodyPartTest.cpp44
2 files changed, 93 insertions, 54 deletions
diff --git a/src/vmime/body.cpp b/src/vmime/body.cpp
index 3757026d..a3875b9d 100644
--- a/src/vmime/body.cpp
+++ b/src/vmime/body.cpp
@@ -53,6 +53,11 @@ body::~body() {
}
+/*
+ * boundaryStart: will become the index for "\r\n--marker"
+ * boundaryEnd: will become the index after "marker", i.e. index for potential trailing "--", "\r\n", etc.
+ * return value: index for "marker"
+ */
// static
size_t body::findNextBoundaryPosition(
const shared_ptr <utility::parserInputStreamAdapter>& parser,
@@ -65,7 +70,7 @@ size_t body::findNextBoundaryPosition(
size_t pos = position;
- while (pos != npos && pos < end) {
+ for (; pos != npos && pos < end; ++pos) {
pos = parser->findNext(boundary, pos);
@@ -73,60 +78,43 @@ size_t body::findNextBoundaryPosition(
break; // not found
}
- if (pos != 0) {
-
- // Skip transport padding bytes (SPACE or HTAB), if any
- size_t advance = 0;
-
- while (pos != 0) {
-
- parser->seek(pos - advance - 1);
-
- const byte_t c = parser->peekByte();
-
- if (c == ' ' || c == '\t') {
- ++advance;
- } else {
- break;
- }
- }
-
- // Ensure the bytes before boundary are "[LF]--": boundary should be
- // at the beginning of a line, and should start with "--"
- if (pos - advance >= 3) {
-
- parser->seek(pos - advance - 3);
-
- if (parser->matchBytes("\n--", 3)) {
-
- parser->seek(pos + boundary.length());
-
- const byte_t next = parser->peekByte();
-
- // Boundary should be followed by a new line or a dash
- if (next == '\r' || next == '\n' || next == '-') {
+ if (pos == 0) {
+ // Boundary is a prefix of another, continue the search (same for the other "continue"s)
+ continue;
+ }
- // Get rid of the "[CR]" just before "[LF]--", if any
- if (pos - advance >= 4) {
+ // Ensure the bytes before boundary are "[LF]--": boundary should be
+ // at the beginning of a line, and should start with "--"
+ if (pos < 3) {
+ continue;
+ }
+ parser->seek(pos - 3);
+ if (!parser->matchBytes("\n--", 3)) {
+ continue;
+ }
- parser->seek(pos - advance - 4);
+ parser->seek(pos + boundary.length());
- if (parser->peekByte() == '\r') {
- advance++;
- }
- }
+ const byte_t next = parser->peekByte();
- *boundaryStart = pos - advance - 3;
- *boundaryEnd = pos + boundary.length();
+ // Boundary should be followed by a new line or a dash
+ if (!isspace(next) && next != '-') {
+ continue;
+ }
- return pos;
- }
- }
+ // Get rid of the "[CR]" just before "[LF]--", if any
+ size_t backwards = 0;
+ if (pos >= 4) {
+ parser->seek(pos - 4);
+ if (parser->peekByte() == '\r') {
+ ++backwards;
}
}
- // Boundary is a prefix of another, continue the search
- pos++;
+ *boundaryStart = pos - backwards - 3;
+ *boundaryEnd = pos + boundary.length();
+
+ return pos;
}
return pos;
@@ -276,10 +264,10 @@ void body::parseImpl(
boundaryEnd += 2;
}
- // RFC #1521, Page 31:
- // "...(If a boundary appears to end with white space, the
- // white space must be presumed to have been added by a
- // gateway, and must be deleted.)..."
+ // RFC 2046 §5.1.1 page 22: """If a boundary delimiter
+ // line appears to end with white space, the white
+ // space must be presumed to have been added by a
+ // gateway, and must be deleted."""
parser->seek(boundaryEnd);
boundaryEnd += parser->skipIf(parserHelpers::isSpaceOrTab, end);
@@ -288,6 +276,19 @@ void body::parseImpl(
boundaryEnd += 2;
} else if (boundaryEnd < end && parser->peekByte() == '\n') {
++boundaryEnd;
+ } else if (boundaryEnd == end) {
+ } else {
+ /*
+ * RFC 2046 §5.1.1 page 19: """[...] optional
+ * linear whitespace, and a terminating
+ * CRLF.""" — junk handling is left
+ * unspecified, so we might as well skip it to
+ * facilitate broken mails.
+ */
+ boundaryEnd += parser->skipIf([](char_t c) { return c != '\n'; }, end);
+ pos = findNextBoundaryPosition(parser, boundary, boundaryEnd, end, &boundaryStart, &boundaryEnd);
+ --index;
+ continue;
}
if (index == 0) {
diff --git a/tests/parser/bodyPartTest.cpp b/tests/parser/bodyPartTest.cpp
index 91742553..3aaadd03 100644
--- a/tests/parser/bodyPartTest.cpp
+++ b/tests/parser/bodyPartTest.cpp
@@ -39,6 +39,7 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
VMIME_TEST(testGenerate7bit)
VMIME_TEST(testTextUsageForQPEncoding)
VMIME_TEST(testParseVeryBigMessage)
+ VMIME_TEST(testParseBoundaryPrefix)
VMIME_TEST_LIST_END
@@ -213,9 +214,9 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
vmime::string str =
"Content-Type: multipart/mixed; boundary=\"MY-BOUNDARY\""
"\r\n\r\n"
- "-- \t MY-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n"
+ "--MY-BOUNDARY \t \r\nHEADER1\r\n\r\nBODY1\r\n"
"--MY-BOUNDARY\r\n"
- "-- MY-BOUNDARY--\r\n";
+ "--MY-BOUNDARY-- \r\n";
vmime::bodyPart p;
p.parse(str);
@@ -291,7 +292,7 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
vmime::string str =
"Content-Type: multipart/mixed"
"\r\n\r\n"
- "-- \t UNKNOWN-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n"
+ "--UNKNOWN-BOUNDARY \t \r\nHEADER1\r\n\r\nBODY1\r\n"
"--UNKNOWN-BOUNDARY\r\nHEADER2\r\n\r\nBODY2\r\n"
"--UNKNOWN-BOUNDARY--";
@@ -373,4 +374,41 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
VASSERT("2.2", vmime::dynamicCast <const vmime::streamContentHandler>(body2Cts) != NULL);
}
+ void testParseBoundaryPrefix() {
+ /*
+ * Clients are not supposed to create boundary identifiers that
+ * contain a prefix of another (RFC 2046 section 5.1), but alas
+ * CANCOM FortiMail produces this garbage.
+ */
+ vmime::string str =
+ "Content-Type: multipart/related; boundary=\"--b12\"\r\n"
+ "\r\n"
+ "----b12\r\n"
+ "Content-Type: multipart/alternative; boundary=\"--b12-1\"\r\n"
+ "\r\n"
+ "----b12-1\r\n"
+ "Content-Type: text/plain; charset=utf-8\r\n"
+ "\r\n"
+ "P11\r\n"
+ "----b12-1\r\n"
+ "Content-Type: text/html; charset=utf-8\r\n"
+ "\r\n"
+ "P12\r\n"
+ "----b12-1--\r\n"
+ "----b12\r\n"
+ "\r\n"
+ "P2\r\n"
+ "----b12--\r\n";
+
+ vmime::bodyPart relco;
+ relco.parse(str);
+ auto relbd = relco.getBody();
+ VASSERT_EQ("global-partcount", 2, relbd->getPartCount());
+ auto altbd = relbd->getPartAt(0)->getBody();
+ VASSERT_EQ("part1-partcount", 2, altbd->getPartCount());
+ VASSERT_EQ("part1.1-body", "P11", extractContents(altbd->getPartAt(0)->getBody()->getContents()));
+ VASSERT_EQ("part1.2-body", "P12", extractContents(altbd->getPartAt(1)->getBody()->getContents()));
+ VASSERT_EQ("part2-body", "P2", extractContents(relbd->getPartAt(1)->getBody()->getContents()));
+ }
+
VMIME_TEST_SUITE_END