diff --git a/src/body.cpp b/src/body.cpp index 8c599b99..14c14fd6 100644 --- a/src/body.cpp +++ b/src/body.cpp @@ -53,6 +53,79 @@ body::~body() } +// static +utility::stream::size_type body::findNextBoundaryPosition + (ref parser, const string& boundary, + const utility::stream::size_type position, const utility::stream::size_type end, + utility::stream::size_type* boundaryStart, utility::stream::size_type* boundaryEnd) +{ + utility::stream::size_type pos = position; + + while (pos != utility::stream::npos && pos < end) + { + pos = parser->findNext(boundary, pos); + + if (pos == utility::stream::npos) + break; // not found + + if (pos != 0) + { + // Skip transport padding bytes (SPACE or HTAB), if any + utility::stream::size_type advance = 0; + + while (pos != 0) + { + parser->seek(pos - advance - 1); + + const utility::stream::value_type c = parser->peekByte(); + + if (c == ' ' || c == '\t') + ++advance; + else + break; + } + + // Ensure the bytes before boundary are "[LF]--": boundary should be + // at the beginning of a line, and should start with "--" + if (pos - advance >= 3) + { + parser->seek(pos - advance - 3); + + if (parser->matchBytes("\n--", 3)) + { + parser->seek(pos + boundary.length()); + + const utility::stream::value_type next = parser->peekByte(); + + // Boundary should be followed by a new line or a dash + if (next == '\r' || next == '\n' || next == '-') + { + // Get rid of the "[CR]" just before "[LF]--", if any + if (pos - advance >= 4) + { + parser->seek(pos - advance - 4); + + if (parser->peekByte() == '\r') + advance++; + } + + *boundaryStart = pos - advance - 3; + *boundaryEnd = pos + boundary.length(); + + return pos; + } + } + } + } + + // Boundary is a prefix of another, continue the search + pos++; + } + + return pos; +} + + void body::parseImpl (const parsingContext& /* ctx */, ref parser, @@ -126,17 +199,23 @@ void body::parseImpl buffer[sizeof(buffer) / sizeof(buffer[0]) - 1] = '\0'; + // Skip transport padding bytes (SPACE or HTAB), if any + utility::stream::size_type boundarySkip = 0; + + while (boundarySkip < bufferLen && parserHelpers::isSpace(buffer[boundarySkip])) + ++boundarySkip; + // Extract boundary from buffer (stop at first CR or LF). // We have to stop after a reasonnably long boundary length (100) // not to take the whole body contents for a boundary... string::value_type boundaryBytes[100]; string::size_type boundaryLen = 0; - for (string::value_type c = buffer[0] ; + for (string::value_type c = buffer[boundarySkip] ; boundaryLen < bufferLen && boundaryLen < 100 && !(c == '\r' || c == '\n') ; - c = buffer[++boundaryLen]) + ++boundaryLen, c = buffer[boundarySkip + boundaryLen]) { - boundaryBytes[boundaryLen] = buffer[boundaryLen]; + boundaryBytes[boundaryLen] = c; } if (boundaryLen >= 1 && boundaryLen < 100) @@ -167,104 +246,60 @@ void body::parseImpl // This is a multi-part body if (isMultipart && !boundary.empty()) { - const string boundarySep("--" + boundary); - utility::stream::size_type partStart = position; utility::stream::size_type pos = position; bool lastPart = false; - while (pos != utility::stream::npos && pos < end) - { - pos = parser->findNext(boundarySep, pos); - - if (pos == utility::stream::npos) - break; // not found - - if (pos != 0) - { - parser->seek(pos - 1); - - if (parser->peekByte() != '\n') - { - // Boundary is not at a beginning of a line - pos++; - continue; - } - - parser->skip(1 + boundarySep.length()); - } - else - { - parser->seek(pos + boundarySep.length()); - } - - const utility::stream::value_type next = parser->peekByte(); - - if (next == '\r' || next == '\n' || next == '-') - break; - - // Boundary is a prefix of another, continue the search - pos++; - } - - if (pos != utility::stream::npos && pos < end) - { - vmime::text text; - text.parse(parser, position, pos); - - m_prologText = text.getWholeBuffer(); - } + // Find the first boundary + utility::stream::size_type boundaryStart, boundaryEnd; + pos = findNextBoundaryPosition(parser, boundary, pos, end, &boundaryStart, &boundaryEnd); for (int index = 0 ; !lastPart && (pos != utility::stream::npos) && (pos < end) ; ++index) { - utility::stream::size_type partEnd = pos; - - // Get rid of the [CR]LF just before the boundary string - if (pos >= (position + 1)) - { - parser->seek(pos - 1); - - if (parser->peekByte() == '\n') - --partEnd; - } - - if (pos >= (position + 2)) - { - parser->seek(pos - 2); - - if (parser->peekByte() == '\r') - --partEnd; - } + utility::stream::size_type partEnd = boundaryStart; // Check whether it is the last part (boundary terminated by "--") - pos += boundarySep.length(); - parser->seek(pos); + parser->seek(boundaryEnd); - if (pos + 1 < end && parser->matchBytes("--", 2)) + if (boundaryEnd + 1 < end && parser->matchBytes("--", 2)) { lastPart = true; - pos += 2; + boundaryEnd += 2; } // RFC #1521, Page 31: // "...(If a boundary appears to end with white space, the // white space must be presumed to have been added by a // gateway, and must be deleted.)..." - parser->seek(pos); - pos += parser->skipIf(parserHelpers::isSpaceOrTab, end); + parser->seek(boundaryEnd); + boundaryEnd += parser->skipIf(parserHelpers::isSpaceOrTab, end); // End of boundary line - if (pos + 1 < end && parser->matchBytes("\r\n", 2)) + if (boundaryEnd + 1 < end && parser->matchBytes("\r\n", 2)) { - pos += 2; + boundaryEnd += 2; } - else if (pos < end && parser->peekByte() == '\n') + else if (boundaryEnd < end && parser->peekByte() == '\n') { - ++pos; + ++boundaryEnd; } - if (index > 0) + if (index == 0) + { + if (partEnd > partStart) + { + vmime::text text; + text.parse(parser, partStart, partEnd); + + m_prologText = text.getWholeBuffer(); + } + else + { + m_prologText = ""; + } + } + else // index > 0 { ref part = vmime::create (); @@ -279,41 +314,11 @@ void body::parseImpl m_parts.push_back(part); } - partStart = pos; + partStart = boundaryEnd; - while (pos != utility::stream::npos && pos < end) - { - pos = parser->findNext(boundarySep, pos); - - if (pos == utility::stream::npos) - break; // not found - - if (pos != 0) - { - parser->seek(pos - 1); - - if (parser->peekByte() != '\n') - { - // Boundary is not at a beginning of a line - pos++; - continue; - } - - parser->skip(1 + boundarySep.length()); - } - else - { - parser->seek(pos + boundarySep.length()); - } - - const utility::stream::value_type next = parser->peekByte(); - - if (next == '\r' || next == '\n' || next == '-') - break; - - // Boundary is a prefix of another, continue the search - pos++; - } + // Find the next boundary + pos = findNextBoundaryPosition + (parser, boundary, boundaryEnd, end, &boundaryStart, &boundaryEnd); } m_contents = vmime::create (); diff --git a/tests/parser/bodyPartTest.cpp b/tests/parser/bodyPartTest.cpp index 487b3160..453982e0 100644 --- a/tests/parser/bodyPartTest.cpp +++ b/tests/parser/bodyPartTest.cpp @@ -30,10 +30,12 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest) VMIME_TEST(testParse) VMIME_TEST(testGenerate) VMIME_TEST(testParseGuessBoundary) + VMIME_TEST(testParseGuessBoundaryWithTransportPadding) VMIME_TEST(testParseMissingLastBoundary) VMIME_TEST(testPrologEpilog) VMIME_TEST(testPrologEncoding) VMIME_TEST(testSuccessiveBoundaries) + VMIME_TEST(testTransportPaddingInBoundary) VMIME_TEST(testGenerate7bit) VMIME_TEST(testTextUsageForQPEncoding) VMIME_TEST(testParseVeryBigMessage) @@ -200,6 +202,24 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest) VASSERT_EQ("part2-body", "", extractContents(p.getBody()->getPartAt(1)->getBody()->getContents())); } + void testTransportPaddingInBoundary() + { + vmime::string str = + "Content-Type: multipart/mixed; boundary=\"MY-BOUNDARY\"" + "\r\n\r\n" + "-- \t MY-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n" + "--MY-BOUNDARY\r\n" + "-- MY-BOUNDARY--\r\n"; + + vmime::bodyPart p; + p.parse(str); + + VASSERT_EQ("count", 2, p.getBody()->getPartCount()); + + VASSERT_EQ("part1-body", "BODY1", extractContents(p.getBody()->getPartAt(0)->getBody()->getContents())); + VASSERT_EQ("part2-body", "", extractContents(p.getBody()->getPartAt(1)->getBody()->getContents())); + } + /** Ensure '7bit' encoding is used when body is 7-bit only. */ void testGenerate7bit() { @@ -256,6 +276,28 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest) VASSERT_EQ("part2-body", "BODY2", extractContents(p.getBody()->getPartAt(1)->getBody()->getContents())); } + void testParseGuessBoundaryWithTransportPadding() + { + // Boundary is not specified in "Content-Type" field + // Parser will try to guess it from message contents. + // Transport padding white spaces should be ignored. + + vmime::string str = + "Content-Type: multipart/mixed" + "\r\n\r\n" + "-- \t UNKNOWN-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n" + "--UNKNOWN-BOUNDARY\r\nHEADER2\r\n\r\nBODY2\r\n" + "--UNKNOWN-BOUNDARY--"; + + vmime::bodyPart p; + p.parse(str); + + VASSERT_EQ("count", 2, p.getBody()->getPartCount()); + + VASSERT_EQ("part1-body", "BODY1", extractContents(p.getBody()->getPartAt(0)->getBody()->getContents())); + VASSERT_EQ("part2-body", "BODY2", extractContents(p.getBody()->getPartAt(1)->getBody()->getContents())); + } + void testParseVeryBigMessage() { // When parsing from a seekable input stream, body contents should not diff --git a/vmime/body.hpp b/vmime/body.hpp index f9781eeb..50d67807 100644 --- a/vmime/body.hpp +++ b/vmime/body.hpp @@ -301,6 +301,23 @@ private: protected: + /** Finds the next boundary position in the parsing buffer. + * + * @param parser parser object + * @param boundary boundary string (without "--" nor CR/LF) + * @param position start position + * @param end end position + * @param boundaryStart will hold the start position of the boundary (including any + * CR/LF and "--" before the boundary) + * @param boundaryEnd will hold the end position of the boundary (position just + * before the CRLF or "--" which follows) + * @return the position of the boundary string, or stream::npos if not found + */ + utility::stream::size_type findNextBoundaryPosition + (ref parser, const string& boundary, + const utility::stream::size_type position, const utility::stream::size_type end, + utility::stream::size_type* boundaryStart, utility::stream::size_type* boundaryEnd); + // Component parsing & assembling void parseImpl (const parsingContext& ctx,