From cc18aa39c15ffffdd622cb61c3c147c2bc1903aa Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 24 Jan 2019 13:14:50 +0100 Subject: [PATCH 1/2] tests: add more malformation tests to mailboxTest --- tests/parser/mailboxTest.cpp | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/parser/mailboxTest.cpp b/tests/parser/mailboxTest.cpp index 127bb422..a0d1b694 100644 --- a/tests/parser/mailboxTest.cpp +++ b/tests/parser/mailboxTest.cpp @@ -30,7 +30,7 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest) VMIME_TEST(testParse) VMIME_TEST(testEmptyEmailAddress) VMIME_TEST(testSeparatorInComment) - VMIME_TEST(testAddressInName) + VMIME_TEST(testMalformations) VMIME_TEST_LIST_END @@ -146,13 +146,28 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest) VASSERT_EQ("email2", "bbb@vmime.org", mbox2->getEmail()); } - void testAddressInName() { - + void testMalformations() { vmime::mailbox mbox; - mbox.parse("a@b.c "); + mbox.parse("a@b.c "); VASSERT_EQ("name", vmime::text("a@b.c"), mbox.getName()); VASSERT_EQ("email", "e@f.g", mbox.getEmail()); + + mbox.parse("a@b.c e@f.g "); + VASSERT_EQ("name", vmime::text("e@f.g"), mbox.getName()); + VASSERT_EQ("email", "h@i.j", mbox.getEmail()); + + mbox.parse("Foo "); + VASSERT_EQ("name", vmime::text("Foobar"), mbox.getName()); + VASSERT_EQ("email", "baz@quux.com", mbox.getEmail()); + + mbox.parse("Foo "); + VASSERT_EQ("name", vmime::text("Foo"), mbox.getName()); + VASSERT_EQ("email", "foo@x.com", mbox.getEmail()); + + mbox.parse("Foo Bar "); + VASSERT_EQ("name", vmime::text("Foo"), mbox.getName()); + VASSERT_EQ("email", "foo@x.com", mbox.getEmail()); } VMIME_TEST_SUITE_END From d1190b496faa754a757aba79f88dcd7e31d6d500 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 29 Nov 2018 21:25:47 +0100 Subject: [PATCH 2/2] Improve address parser for malformed mailbox specifications Spammers use "Name " to trick some parsers. My expectations as to what the outcome should be is presented in the updated mailboxTest.cpp. The DFA in mailbox::parseImpl is hereby redone so as to pick the rightmost address-looking portion as the address, rather than something in between. While doing so, it will also no longer mangle the name part anymore (it does this by keeping a "as_if_name" variable around until the end). --- src/vmime/mailbox.cpp | 357 +++++++++++++++++------------------ tests/parser/mailboxTest.cpp | 12 +- 2 files changed, 178 insertions(+), 191 deletions(-) diff --git a/src/vmime/mailbox.cpp b/src/vmime/mailbox.cpp index b2a577db..db5cc74e 100644 --- a/src/vmime/mailbox.cpp +++ b/src/vmime/mailbox.cpp @@ -80,189 +80,73 @@ void mailbox::parseImpl( const char* const pstart = buffer.data() + position; const char* p = pstart; - // Ignore blank spaces at the beginning - while (p < pend && parserHelpers::isSpace(*p)) ++p; - // Current state for parsing machine enum States { State_None, + State_String, State_Name, - State_Address + State_Bracket, + State_Address_Bracketed, + State_Address_Unbracketed, + State_NameAt, }; - States state = State_Name; // let's start with name, we will see later (*) + States state = State_None; // Temporary buffers for extracted name and address - string name; - string address; + string name, address; + unsigned int hold = 0; bool hadBrackets = false; while (p < pend) { - - if (state == State_Name) { - + if (state == State_None) { + while (p < pend && parserHelpers::isSpace(*p)) + ++p; + if (*p == ',' || *p == ';') + /* Completely new mailbox -- stop parsing this one. */ + break; if (*p == '<') { - - state = State_Address; + state = State_Bracket; continue; } - if (*p == '"') { // Quoted string - + state = State_String; ++p; - - bool escaped = false; - - while (p < pend) { - - if (escaped) { - - name += *p; - escaped = false; - - } else if (*p == '\\') { - - escaped = true; - - } else { - - if (*p == '"') { - - ++p; - break; - - } else { - - name += *p; - } - } - - ++p; - } - - } else { - - bool escaped = false; - int comment = 0; - - while (p < pend) { - - if (escaped) { - - if (!comment) name += *p; - escaped = false; - - } else if (comment) { - - if (*p == '\\') { - escaped = true; - } else if (*p == '(') { - ++comment; - } else if (*p == ')') { - --comment; - } - - } else if (*p == '\\') { - - escaped = true; - - } else if (*p == '(') { - - ++comment; - - } else if (*p == '<') { - - // Erase any space between display name and
- string::iterator q = name.end(); - - while (q != name.begin() && parserHelpers::isSpace(*(q - 1))) { - --q; - } - - name.erase(q, name.end()); - - break; - - } else if (/* parserHelpers::isSpace(*p) || */ *p == '@') { - - break; - - } else { - - name += *p; - } - - ++p; - } + continue; } + state = State_Name; + } else if (state == State_String) { + bool escaped = false; - if (p < pend && *p == '@') { - - // (*) Actually, we were parsing the local-part of an address - // and not a display name... - address = name; - name.clear(); - - bool escaped = false; - int comment = 0; - - while (p < pend) { - - if (escaped) { - - if (!comment) address += *p; - escaped = false; - - } else if (comment) { - - if (*p == '\\') { - escaped = true; - } else if (*p == '(') { - ++comment; - } else if (*p == ')') { - --comment; - } - - } else if (*p == '\\') { - - escaped = true; - - } else if (*p == '(') { - - ++comment; - - } else if (parserHelpers::isSpace(*p)) { - - break; - - } else { - - address += *p; - } - - ++p; + for (; p < pend; ++p) { + if (escaped) { + name += *p; + escaped = false; + continue; + } else if (*p == '\\') { + escaped = true; + continue; } - - } else { - - while (p < pend && parserHelpers::isSpace(*p)) ++p; - state = State_None; + if (*p == '"') { + ++p; + state = State_None; + break; + } + name += *p; } - - } else if (state == State_Address) { - + } else if (state == State_Address_Bracketed) { bool escaped = false; int comment = 0; - while (p < pend) { - + for (; p < pend; ++p) { if (escaped) { - - if (!comment) address += *p; + if (!comment) { + address += *p; + name += *p; + ++hold; + } escaped = false; - } else if (comment) { - if (*p == '\\') { escaped = true; } else if (*p == '(') { @@ -270,48 +154,151 @@ void mailbox::parseImpl( } else if (*p == ')') { --comment; } - } else if (*p == '(') { - ++comment; - } else if (*p == '\\') { - escaped = true; - } else if (*p == '<') { - - // If we found a '<' here, it means that the address - // starts _only_ here...and the stuff we have parsed - // before belongs actually to the display name! - name += address; - address.clear(); - - } else if (*p == '>') { - - hadBrackets = true; + state = State_Bracket; break; - - } else if (!parserHelpers::isSpace(*p)) { - + } else if (*p == '>') { + hadBrackets = true; + name += *p++; + ++hold; + state = State_Name; + break; + } else if (parserHelpers::isSpace(*p)) { + name += *p; + ++hold; + } else { address += *p; + name += *p; + ++hold; } - - ++p; } + } else if (state == State_Address_Unbracketed) { + bool escaped = false; + int comment = 0; - break; - - } else { - - while (p < pend && parserHelpers::isSpace(*p)) ++p; - - if (p < pend) { - state = State_Address; + for (; p < pend; ++p) { + if (escaped) { + if (!comment) { + address += *p; + name += *p; + ++hold; + } + escaped = false; + } else if (comment) { + if (*p == '\\') + escaped = true; + else if (*p == '(') + ++comment; + else if (*p == ')') + --comment; + } else if (*p == '(') { + ++comment; + } else if (*p == '\\') { + escaped = true; + } else if (*p == '<') { + state = State_Bracket; + break; + } else if (*p == '>') { + name += *p++; + ++hold; + state = State_None; + break; + } else if (*p == ',' || *p == ';') { + state = State_None; + break; + } else if (parserHelpers::isSpace(*p)) { + state = State_Name; + break; + } else { + address += *p; + name += *p; + ++hold; + } } + } else if (state == State_Name) { + bool escaped = false; + unsigned int comment = 0, at_hold = 0; + + for (; p < pend; ++p) { + if (escaped) { + if (!comment) { + name += *p; + ++at_hold; + } + escaped = false; + } else if (comment) { + if (*p == '\\') + escaped = true; + else if (*p == '(') + ++comment; + else if (*p == ')') + --comment; + } else if (*p == '\\') { + escaped = true; + } else if (*p == '(') { + ++comment; + } else if (*p == '<') { + state = State_Bracket; + break; + } else if (*p == '@') { + hold = at_hold; + state = State_NameAt; + break; + } else if (parserHelpers::isSpace(*p)) { + name += *p; + ++hold; + at_hold = 1; + } else { + name += *p; + hold = 0; + ++at_hold; + } + } + } else if (state == State_Bracket) { + string::const_iterator q = name.cend(); + unsigned int nh = 0; + while (nh < hold && q != name.cbegin() && parserHelpers::isSpace(*(q - 1))) { + --q; + ++nh; + } + hold = nh; + name += *p++; + ++hold; + if (!address.empty()) + // If we found a '<' here, it means that the address + // starts _only_ here...and the stuff we have parsed + // before belongs actually to the display name! + address.clear(); + state = State_Address_Bracketed; + } else if (state == State_NameAt) { + name += *p++; + ++hold; + // (*) Actually, we were parsing the local-part of an address + // and not a display name... + // Back up to the last whitespace and treat as address + string::const_iterator q = name.cend(); + unsigned int nh = 0; + while (nh < hold && q != name.cbegin() && !parserHelpers::isSpace(*(q - 1))) { + --q; + ++nh; + } + address.assign(q, name.cend()); + while (nh < hold && q != name.cbegin() && parserHelpers::isSpace(*(q - 1))) { + --q; + ++nh; + } + hold = nh; + state = State_Address_Unbracketed; } } + if (hold <= name.size()) + name.erase(name.size() - hold); + // Swap name and address when no address was found // (email address is mandatory, whereas name is optional). if (address.empty() && !name.empty() && !hadBrackets) { diff --git a/tests/parser/mailboxTest.cpp b/tests/parser/mailboxTest.cpp index a0d1b694..23d1b4ac 100644 --- a/tests/parser/mailboxTest.cpp +++ b/tests/parser/mailboxTest.cpp @@ -154,20 +154,20 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest) VASSERT_EQ("email", "e@f.g", mbox.getEmail()); mbox.parse("a@b.c e@f.g "); - VASSERT_EQ("name", vmime::text("e@f.g"), mbox.getName()); + VASSERT_EQ("name", vmime::text("a@b.c e@f.g"), mbox.getName()); VASSERT_EQ("email", "h@i.j", mbox.getEmail()); mbox.parse("Foo "); - VASSERT_EQ("name", vmime::text("Foobar"), mbox.getName()); + VASSERT_EQ("name", vmime::text("Foo "); - VASSERT_EQ("name", vmime::text("Foo"), mbox.getName()); - VASSERT_EQ("email", "foo@x.com", mbox.getEmail()); + VASSERT_EQ("name", vmime::text("Foo "), mbox.getName()); + VASSERT_EQ("email", "bar@x.com", mbox.getEmail()); mbox.parse("Foo Bar "); - VASSERT_EQ("name", vmime::text("Foo"), mbox.getName()); - VASSERT_EQ("email", "foo@x.com", mbox.getEmail()); + VASSERT_EQ("name", vmime::text("Foo Bar"), mbox.getName()); + VASSERT_EQ("email", "bar@y.com", mbox.getEmail()); } VMIME_TEST_SUITE_END