Merge pull request #227 from Kopano-dev/boundaryprefix
Boundary marker parsing: WS rules and EQ check
This commit is contained in:
commit
ab340b561d
@ -53,6 +53,11 @@ body::~body() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* boundaryStart: will become the index for "\r\n--marker"
|
||||||
|
* boundaryEnd: will become the index after "marker", i.e. index for potential trailing "--", "\r\n", etc.
|
||||||
|
* return value: index for "marker"
|
||||||
|
*/
|
||||||
// static
|
// static
|
||||||
size_t body::findNextBoundaryPosition(
|
size_t body::findNextBoundaryPosition(
|
||||||
const shared_ptr <utility::parserInputStreamAdapter>& parser,
|
const shared_ptr <utility::parserInputStreamAdapter>& parser,
|
||||||
@ -65,7 +70,7 @@ size_t body::findNextBoundaryPosition(
|
|||||||
|
|
||||||
size_t pos = position;
|
size_t pos = position;
|
||||||
|
|
||||||
while (pos != npos && pos < end) {
|
for (; pos != npos && pos < end; ++pos) {
|
||||||
|
|
||||||
pos = parser->findNext(boundary, pos);
|
pos = parser->findNext(boundary, pos);
|
||||||
|
|
||||||
@ -73,61 +78,44 @@ size_t body::findNextBoundaryPosition(
|
|||||||
break; // not found
|
break; // not found
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pos != 0) {
|
if (pos == 0) {
|
||||||
|
// Boundary is a prefix of another, continue the search (same for the other "continue"s)
|
||||||
// Skip transport padding bytes (SPACE or HTAB), if any
|
continue;
|
||||||
size_t advance = 0;
|
|
||||||
|
|
||||||
while (pos != 0) {
|
|
||||||
|
|
||||||
parser->seek(pos - advance - 1);
|
|
||||||
|
|
||||||
const byte_t c = parser->peekByte();
|
|
||||||
|
|
||||||
if (c == ' ' || c == '\t') {
|
|
||||||
++advance;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure the bytes before boundary are "[LF]--": boundary should be
|
// Ensure the bytes before boundary are "[LF]--": boundary should be
|
||||||
// at the beginning of a line, and should start with "--"
|
// at the beginning of a line, and should start with "--"
|
||||||
if (pos - advance >= 3) {
|
if (pos < 3) {
|
||||||
|
continue;
|
||||||
parser->seek(pos - advance - 3);
|
}
|
||||||
|
parser->seek(pos - 3);
|
||||||
if (parser->matchBytes("\n--", 3)) {
|
if (!parser->matchBytes("\n--", 3)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
parser->seek(pos + boundary.length());
|
parser->seek(pos + boundary.length());
|
||||||
|
|
||||||
const byte_t next = parser->peekByte();
|
const byte_t next = parser->peekByte();
|
||||||
|
|
||||||
// Boundary should be followed by a new line or a dash
|
// Boundary should be followed by a new line or a dash
|
||||||
if (next == '\r' || next == '\n' || next == '-') {
|
if (!isspace(next) && next != '-') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Get rid of the "[CR]" just before "[LF]--", if any
|
// Get rid of the "[CR]" just before "[LF]--", if any
|
||||||
if (pos - advance >= 4) {
|
size_t backwards = 0;
|
||||||
|
if (pos >= 4) {
|
||||||
parser->seek(pos - advance - 4);
|
parser->seek(pos - 4);
|
||||||
|
|
||||||
if (parser->peekByte() == '\r') {
|
if (parser->peekByte() == '\r') {
|
||||||
advance++;
|
++backwards;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
*boundaryStart = pos - advance - 3;
|
*boundaryStart = pos - backwards - 3;
|
||||||
*boundaryEnd = pos + boundary.length();
|
*boundaryEnd = pos + boundary.length();
|
||||||
|
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Boundary is a prefix of another, continue the search
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
@ -276,10 +264,10 @@ void body::parseImpl(
|
|||||||
boundaryEnd += 2;
|
boundaryEnd += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
// RFC #1521, Page 31:
|
// RFC 2046 §5.1.1 page 22: """If a boundary delimiter
|
||||||
// "...(If a boundary appears to end with white space, the
|
// line appears to end with white space, the white
|
||||||
// white space must be presumed to have been added by a
|
// space must be presumed to have been added by a
|
||||||
// gateway, and must be deleted.)..."
|
// gateway, and must be deleted."""
|
||||||
parser->seek(boundaryEnd);
|
parser->seek(boundaryEnd);
|
||||||
boundaryEnd += parser->skipIf(parserHelpers::isSpaceOrTab, end);
|
boundaryEnd += parser->skipIf(parserHelpers::isSpaceOrTab, end);
|
||||||
|
|
||||||
@ -288,6 +276,19 @@ void body::parseImpl(
|
|||||||
boundaryEnd += 2;
|
boundaryEnd += 2;
|
||||||
} else if (boundaryEnd < end && parser->peekByte() == '\n') {
|
} else if (boundaryEnd < end && parser->peekByte() == '\n') {
|
||||||
++boundaryEnd;
|
++boundaryEnd;
|
||||||
|
} else if (boundaryEnd == end) {
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* RFC 2046 §5.1.1 page 19: """[...] optional
|
||||||
|
* linear whitespace, and a terminating
|
||||||
|
* CRLF.""" — junk handling is left
|
||||||
|
* unspecified, so we might as well skip it to
|
||||||
|
* facilitate broken mails.
|
||||||
|
*/
|
||||||
|
boundaryEnd += parser->skipIf([](char_t c) { return c != '\n'; }, end);
|
||||||
|
pos = findNextBoundaryPosition(parser, boundary, boundaryEnd, end, &boundaryStart, &boundaryEnd);
|
||||||
|
--index;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (index == 0) {
|
if (index == 0) {
|
||||||
|
@ -39,6 +39,7 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
|
|||||||
VMIME_TEST(testGenerate7bit)
|
VMIME_TEST(testGenerate7bit)
|
||||||
VMIME_TEST(testTextUsageForQPEncoding)
|
VMIME_TEST(testTextUsageForQPEncoding)
|
||||||
VMIME_TEST(testParseVeryBigMessage)
|
VMIME_TEST(testParseVeryBigMessage)
|
||||||
|
VMIME_TEST(testParseBoundaryPrefix)
|
||||||
VMIME_TEST_LIST_END
|
VMIME_TEST_LIST_END
|
||||||
|
|
||||||
|
|
||||||
@ -213,9 +214,9 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
|
|||||||
vmime::string str =
|
vmime::string str =
|
||||||
"Content-Type: multipart/mixed; boundary=\"MY-BOUNDARY\""
|
"Content-Type: multipart/mixed; boundary=\"MY-BOUNDARY\""
|
||||||
"\r\n\r\n"
|
"\r\n\r\n"
|
||||||
"-- \t MY-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n"
|
"--MY-BOUNDARY \t \r\nHEADER1\r\n\r\nBODY1\r\n"
|
||||||
"--MY-BOUNDARY\r\n"
|
"--MY-BOUNDARY\r\n"
|
||||||
"-- MY-BOUNDARY--\r\n";
|
"--MY-BOUNDARY-- \r\n";
|
||||||
|
|
||||||
vmime::bodyPart p;
|
vmime::bodyPart p;
|
||||||
p.parse(str);
|
p.parse(str);
|
||||||
@ -291,7 +292,7 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
|
|||||||
vmime::string str =
|
vmime::string str =
|
||||||
"Content-Type: multipart/mixed"
|
"Content-Type: multipart/mixed"
|
||||||
"\r\n\r\n"
|
"\r\n\r\n"
|
||||||
"-- \t UNKNOWN-BOUNDARY\r\nHEADER1\r\n\r\nBODY1\r\n"
|
"--UNKNOWN-BOUNDARY \t \r\nHEADER1\r\n\r\nBODY1\r\n"
|
||||||
"--UNKNOWN-BOUNDARY\r\nHEADER2\r\n\r\nBODY2\r\n"
|
"--UNKNOWN-BOUNDARY\r\nHEADER2\r\n\r\nBODY2\r\n"
|
||||||
"--UNKNOWN-BOUNDARY--";
|
"--UNKNOWN-BOUNDARY--";
|
||||||
|
|
||||||
@ -373,4 +374,41 @@ VMIME_TEST_SUITE_BEGIN(bodyPartTest)
|
|||||||
VASSERT("2.2", vmime::dynamicCast <const vmime::streamContentHandler>(body2Cts) != NULL);
|
VASSERT("2.2", vmime::dynamicCast <const vmime::streamContentHandler>(body2Cts) != NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void testParseBoundaryPrefix() {
|
||||||
|
/*
|
||||||
|
* Clients are not supposed to create boundary identifiers that
|
||||||
|
* contain a prefix of another (RFC 2046 section 5.1), but alas
|
||||||
|
* CANCOM FortiMail produces this garbage.
|
||||||
|
*/
|
||||||
|
vmime::string str =
|
||||||
|
"Content-Type: multipart/related; boundary=\"--b12\"\r\n"
|
||||||
|
"\r\n"
|
||||||
|
"----b12\r\n"
|
||||||
|
"Content-Type: multipart/alternative; boundary=\"--b12-1\"\r\n"
|
||||||
|
"\r\n"
|
||||||
|
"----b12-1\r\n"
|
||||||
|
"Content-Type: text/plain; charset=utf-8\r\n"
|
||||||
|
"\r\n"
|
||||||
|
"P11\r\n"
|
||||||
|
"----b12-1\r\n"
|
||||||
|
"Content-Type: text/html; charset=utf-8\r\n"
|
||||||
|
"\r\n"
|
||||||
|
"P12\r\n"
|
||||||
|
"----b12-1--\r\n"
|
||||||
|
"----b12\r\n"
|
||||||
|
"\r\n"
|
||||||
|
"P2\r\n"
|
||||||
|
"----b12--\r\n";
|
||||||
|
|
||||||
|
vmime::bodyPart relco;
|
||||||
|
relco.parse(str);
|
||||||
|
auto relbd = relco.getBody();
|
||||||
|
VASSERT_EQ("global-partcount", 2, relbd->getPartCount());
|
||||||
|
auto altbd = relbd->getPartAt(0)->getBody();
|
||||||
|
VASSERT_EQ("part1-partcount", 2, altbd->getPartCount());
|
||||||
|
VASSERT_EQ("part1.1-body", "P11", extractContents(altbd->getPartAt(0)->getBody()->getContents()));
|
||||||
|
VASSERT_EQ("part1.2-body", "P12", extractContents(altbd->getPartAt(1)->getBody()->getContents()));
|
||||||
|
VASSERT_EQ("part2-body", "P2", extractContents(relbd->getPartAt(1)->getBody()->getContents()));
|
||||||
|
}
|
||||||
|
|
||||||
VMIME_TEST_SUITE_END
|
VMIME_TEST_SUITE_END
|
||||||
|
Loading…
Reference in New Issue
Block a user