From 6de75f0c9599b9ce66931ff9bf47655e35d8bffa Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 5 Oct 2019 10:26:33 +0200 Subject: [PATCH 1/4] Modernize RFC reference for boundary line characteristics --- src/vmime/body.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/vmime/body.cpp b/src/vmime/body.cpp index 3757026d..44679390 100644 --- a/src/vmime/body.cpp +++ b/src/vmime/body.cpp @@ -276,10 +276,10 @@ void body::parseImpl( boundaryEnd += 2; } - // RFC #1521, Page 31: - // "...(If a boundary appears to end with white space, the - // white space must be presumed to have been added by a - // gateway, and must be deleted.)..." + // RFC 2046 §5.1.1 page 22: """If a boundary delimiter + // line appears to end with white space, the white + // space must be presumed to have been added by a + // gateway, and must be deleted.""" parser->seek(boundaryEnd); boundaryEnd += parser->skipIf(parserHelpers::isSpaceOrTab, end); From c9119effd784055d27b71a567c3fbd0d6e7b03b0 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 4 Oct 2019 21:53:04 +0200 Subject: [PATCH 2/4] Reduce indent by 3 levels in findNextBoundary --- src/vmime/body.cpp | 96 +++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 47 deletions(-) diff --git a/src/vmime/body.cpp b/src/vmime/body.cpp index 44679390..b7f491a1 100644 --- a/src/vmime/body.cpp +++ b/src/vmime/body.cpp @@ -65,7 +65,7 @@ size_t body::findNextBoundaryPosition( size_t pos = position; - while (pos != npos && pos < end) { + for (; pos != npos && pos < end; ++pos) { pos = parser->findNext(boundary, pos); @@ -73,60 +73,62 @@ size_t body::findNextBoundaryPosition( break; // not found } - if (pos != 0) { + if (pos == 0) { + // Boundary is a prefix of another, continue the search (same for the other "continue"s) + continue; + } - // Skip transport padding bytes (SPACE or HTAB), if any - size_t advance = 0; + // Skip transport padding bytes (SPACE or HTAB), if any + size_t advance = 0; - while (pos != 0) { + while (pos != 0) { - parser->seek(pos - advance - 1); + parser->seek(pos - advance - 1); - const byte_t c = parser->peekByte(); + const byte_t c = parser->peekByte(); - if (c == ' ' || c == '\t') { - ++advance; - } else { - break; - } - } - - // Ensure the bytes before boundary are "[LF]--": boundary should be - // at the beginning of a line, and should start with "--" - if (pos - advance >= 3) { - - parser->seek(pos - advance - 3); - - if (parser->matchBytes("\n--", 3)) { - - parser->seek(pos + boundary.length()); - - const byte_t next = parser->peekByte(); - - // Boundary should be followed by a new line or a dash - if (next == '\r' || next == '\n' || next == '-') { - - // Get rid of the "[CR]" just before "[LF]--", if any - if (pos - advance >= 4) { - - parser->seek(pos - advance - 4); - - if (parser->peekByte() == '\r') { - advance++; - } - } - - *boundaryStart = pos - advance - 3; - *boundaryEnd = pos + boundary.length(); - - return pos; - } - } + if (c == ' ' || c == '\t') { + ++advance; + } else { + break; } } - // Boundary is a prefix of another, continue the search - pos++; + // Ensure the bytes before boundary are "[LF]--": boundary should be + // at the beginning of a line, and should start with "--" + if (pos - advance < 3) { + continue; + } + + parser->seek(pos - advance - 3); + + if (!parser->matchBytes("\n--", 3)) { + continue; + } + + parser->seek(pos + boundary.length()); + + const byte_t next = parser->peekByte(); + + // Boundary should be followed by a new line or a dash + if (next != '\r' && next != '\n' && next != '-') { + continue; + } + + // Get rid of the "[CR]" just before "[LF]--", if any + if (pos - advance >= 4) { + + parser->seek(pos - advance - 4); + + if (parser->peekByte() == '\r') { + advance++; + } + } + + *boundaryStart = pos - advance - 3; + *boundaryEnd = pos + boundary.length(); + + return pos; } return pos; From df32418df5af976566e6eecb2777ccab7eadf5b3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 4 Oct 2019 22:51:05 +0200 Subject: [PATCH 3/4] Disregard whitespace between leading boundary hyphens and marker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The way I read the RFC is that whitespace is not allowed before the boundary marker, only afterwards, so the checks for leading WS are removed, and the missing check for trailing WS is added. See RFC 2046 §5.1.1: """The boundary delimiter line is then defined as a line consisting entirely of two hyphen characters ("-", decimal value 45) followed by the boundary parameter value from the Content-Type header field, optional linear whitespace, and a terminating CRLF.""" --- src/vmime/body.cpp | 35 ++++++++--------------------------- tests/parser/bodyPartTest.cpp | 6 +++--- 2 files changed, 11 insertions(+), 30 deletions(-) diff --git a/src/vmime/body.cpp b/src/vmime/body.cpp index b7f491a1..4103f328 100644 --- a/src/vmime/body.cpp +++ b/src/vmime/body.cpp @@ -78,30 +78,12 @@ size_t body::findNextBoundaryPosition( continue; } - // Skip transport padding bytes (SPACE or HTAB), if any - size_t advance = 0; - - while (pos != 0) { - - parser->seek(pos - advance - 1); - - const byte_t c = parser->peekByte(); - - if (c == ' ' || c == '\t') { - ++advance; - } else { - break; - } - } - // Ensure the bytes before boundary are "[LF]--": boundary should be // at the beginning of a line, and should start with "--" - if (pos - advance < 3) { + if (pos < 3) { continue; } - - parser->seek(pos - advance - 3); - + parser->seek(pos - 3); if (!parser->matchBytes("\n--", 3)) { continue; } @@ -111,21 +93,20 @@ size_t body::findNextBoundaryPosition( const byte_t next = parser->peekByte(); // Boundary should be followed by a new line or a dash - if (next != '\r' && next != '\n' && next != '-') { + if (!isspace(next) && next != '-') { continue; } // Get rid of the "[CR]" just before "[LF]--", if any - if (pos - advance >= 4) { - - parser->seek(pos - advance - 4); - + size_t backwards = 0; + if (pos >= 4) { + parser->seek(pos - 4); if (parser->peekByte() == '\r') { - advance++; + ++backwards; } } - *boundaryStart = pos - advance - 3; + *boundaryStart = pos - backwards - 3; *boundaryEnd = pos + boundary.length(); return pos; diff --git a/tests/parser/bodyPartTest.cpp b/tests/parser/bodyPartTest.cpp index 91742553..062007d9 100644 --- a/tests/parser/bodyPartTest.cpp +++ b/tests/parser/bodyPartTest.cpp @@ -213,9 +213,9 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest) vmime::string str = "Content-Type: multipart/mixed; boundary=\"MY-BOUNDARY\"" "\r\n\r\n" - "-- \t MY-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n" + "--MY-BOUNDARY \t \r\nHEADER1\r\n\r\nBODY1\r\n" "--MY-BOUNDARY\r\n" - "-- MY-BOUNDARY--\r\n"; + "--MY-BOUNDARY-- \r\n"; vmime::bodyPart p; p.parse(str); @@ -291,7 +291,7 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest) vmime::string str = "Content-Type: multipart/mixed" "\r\n\r\n" - "-- \t UNKNOWN-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n" + "--UNKNOWN-BOUNDARY \t \r\nHEADER1\r\n\r\nBODY1\r\n" "--UNKNOWN-BOUNDARY\r\nHEADER2\r\n\r\nBODY2\r\n" "--UNKNOWN-BOUNDARY--"; From b06e9e6f86389864854ece1207cd30cfe6a874a2 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 5 Oct 2019 11:24:48 +0200 Subject: [PATCH 4/4] Skip delimiter lines that are not exactly equal to the boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is crap software out there that generates mails violating the prefix ban clause from RFC 2046 §5.1 ¶2. Switch vmime from a prefix match to an equality match, similar to what Alpine and Thunderbird do too. --- src/vmime/body.cpp | 18 +++++++++++++++++ tests/parser/bodyPartTest.cpp | 38 +++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/src/vmime/body.cpp b/src/vmime/body.cpp index 4103f328..a3875b9d 100644 --- a/src/vmime/body.cpp +++ b/src/vmime/body.cpp @@ -53,6 +53,11 @@ body::~body() { } +/* + * boundaryStart: will become the index for "\r\n--marker" + * boundaryEnd: will become the index after "marker", i.e. index for potential trailing "--", "\r\n", etc. + * return value: index for "marker" + */ // static size_t body::findNextBoundaryPosition( const shared_ptr & parser, @@ -271,6 +276,19 @@ void body::parseImpl( boundaryEnd += 2; } else if (boundaryEnd < end && parser->peekByte() == '\n') { ++boundaryEnd; + } else if (boundaryEnd == end) { + } else { + /* + * RFC 2046 §5.1.1 page 19: """[...] optional + * linear whitespace, and a terminating + * CRLF.""" — junk handling is left + * unspecified, so we might as well skip it to + * facilitate broken mails. + */ + boundaryEnd += parser->skipIf([](char_t c) { return c != '\n'; }, end); + pos = findNextBoundaryPosition(parser, boundary, boundaryEnd, end, &boundaryStart, &boundaryEnd); + --index; + continue; } if (index == 0) { diff --git a/tests/parser/bodyPartTest.cpp b/tests/parser/bodyPartTest.cpp index 062007d9..3aaadd03 100644 --- a/tests/parser/bodyPartTest.cpp +++ b/tests/parser/bodyPartTest.cpp @@ -39,6 +39,7 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest) VMIME_TEST(testGenerate7bit) VMIME_TEST(testTextUsageForQPEncoding) VMIME_TEST(testParseVeryBigMessage) + VMIME_TEST(testParseBoundaryPrefix) VMIME_TEST_LIST_END @@ -373,4 +374,41 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest) VASSERT("2.2", vmime::dynamicCast (body2Cts) != NULL); } + void testParseBoundaryPrefix() { + /* + * Clients are not supposed to create boundary identifiers that + * contain a prefix of another (RFC 2046 section 5.1), but alas + * CANCOM FortiMail produces this garbage. + */ + vmime::string str = + "Content-Type: multipart/related; boundary=\"--b12\"\r\n" + "\r\n" + "----b12\r\n" + "Content-Type: multipart/alternative; boundary=\"--b12-1\"\r\n" + "\r\n" + "----b12-1\r\n" + "Content-Type: text/plain; charset=utf-8\r\n" + "\r\n" + "P11\r\n" + "----b12-1\r\n" + "Content-Type: text/html; charset=utf-8\r\n" + "\r\n" + "P12\r\n" + "----b12-1--\r\n" + "----b12\r\n" + "\r\n" + "P2\r\n" + "----b12--\r\n"; + + vmime::bodyPart relco; + relco.parse(str); + auto relbd = relco.getBody(); + VASSERT_EQ("global-partcount", 2, relbd->getPartCount()); + auto altbd = relbd->getPartAt(0)->getBody(); + VASSERT_EQ("part1-partcount", 2, altbd->getPartCount()); + VASSERT_EQ("part1.1-body", "P11", extractContents(altbd->getPartAt(0)->getBody()->getContents())); + VASSERT_EQ("part1.2-body", "P12", extractContents(altbd->getPartAt(1)->getBody()->getContents())); + VASSERT_EQ("part2-body", "P2", extractContents(relbd->getPartAt(1)->getBody()->getContents())); + } + VMIME_TEST_SUITE_END