diff options
author | Jan Engelhardt <[email protected]> | 2018-11-29 20:25:47 +0000 |
---|---|---|
committer | Jan Engelhardt <[email protected]> | 2019-01-25 07:11:07 +0000 |
commit | d1190b496faa754a757aba79f88dcd7e31d6d500 (patch) | |
tree | 4a8a599c0e64d58818bebc761f94acdee189b847 | |
parent | tests: add more malformation tests to mailboxTest (diff) | |
download | vmime-d1190b496faa754a757aba79f88dcd7e31d6d500.tar.gz vmime-d1190b496faa754a757aba79f88dcd7e31d6d500.zip |
Improve address parser for malformed mailbox specifications
Spammers use "Name <addr> <addr>" to trick some parsers.
My expectations as to what the outcome should be is presented
in the updated mailboxTest.cpp.
The DFA in mailbox::parseImpl is hereby redone so as to pick the
rightmost address-looking portion as the address, rather than
something in between. While doing so, it will also no longer mangle
the name part anymore (it does this by keeping a "as_if_name"
variable around until the end).
-rw-r--r-- | src/vmime/mailbox.cpp | 347 | ||||
-rw-r--r-- | tests/parser/mailboxTest.cpp | 12 |
2 files changed, 173 insertions, 186 deletions
diff --git a/src/vmime/mailbox.cpp b/src/vmime/mailbox.cpp index b2a577db..db5cc74e 100644 --- a/src/vmime/mailbox.cpp +++ b/src/vmime/mailbox.cpp @@ -80,238 +80,225 @@ void mailbox::parseImpl( const char* const pstart = buffer.data() + position; const char* p = pstart; - // Ignore blank spaces at the beginning - while (p < pend && parserHelpers::isSpace(*p)) ++p; - // Current state for parsing machine enum States { State_None, + State_String, State_Name, - State_Address + State_Bracket, + State_Address_Bracketed, + State_Address_Unbracketed, + State_NameAt, }; - States state = State_Name; // let's start with name, we will see later (*) + States state = State_None; // Temporary buffers for extracted name and address - string name; - string address; + string name, address; + unsigned int hold = 0; bool hadBrackets = false; while (p < pend) { - - if (state == State_Name) { - + if (state == State_None) { + while (p < pend && parserHelpers::isSpace(*p)) + ++p; + if (*p == ',' || *p == ';') + /* Completely new mailbox -- stop parsing this one. */ + break; if (*p == '<') { - - state = State_Address; + state = State_Bracket; continue; } - if (*p == '"') { // Quoted string - + state = State_String; ++p; + continue; + } + state = State_Name; + } else if (state == State_String) { + bool escaped = false; - bool escaped = false; - - while (p < pend) { - - if (escaped) { - - name += *p; - escaped = false; - - } else if (*p == '\\') { - - escaped = true; - - } else { - - if (*p == '"') { - - ++p; - break; - - } else { - - name += *p; - } - } - - ++p; + for (; p < pend; ++p) { + if (escaped) { + name += *p; + escaped = false; + continue; + } else if (*p == '\\') { + escaped = true; + continue; } - - } else { - - bool escaped = false; - int comment = 0; - - while (p < pend) { - - if (escaped) { - - if (!comment) name += *p; - escaped = false; - - } else if (comment) { - - if (*p == '\\') { - escaped = true; - } else if (*p == '(') { - ++comment; - } else if (*p == ')') { - --comment; - } - - } else if (*p == '\\') { - - escaped = true; - - } else if (*p == '(') { - - ++comment; - - } else if (*p == '<') { - - // Erase any space between display name and <address> - string::iterator q = name.end(); - - while (q != name.begin() && parserHelpers::isSpace(*(q - 1))) { - --q; - } - - name.erase(q, name.end()); - - break; - - } else if (/* parserHelpers::isSpace(*p) || */ *p == '@') { - - break; - - } else { - - name += *p; - } - + if (*p == '"') { ++p; + state = State_None; + break; } + name += *p; } + } else if (state == State_Address_Bracketed) { + bool escaped = false; + int comment = 0; - if (p < pend && *p == '@') { - - // (*) Actually, we were parsing the local-part of an address - // and not a display name... - address = name; - name.clear(); - - bool escaped = false; - int comment = 0; - - while (p < pend) { - - if (escaped) { - - if (!comment) address += *p; - escaped = false; - - } else if (comment) { - - if (*p == '\\') { - escaped = true; - } else if (*p == '(') { - ++comment; - } else if (*p == ')') { - --comment; - } - - } else if (*p == '\\') { - + for (; p < pend; ++p) { + if (escaped) { + if (!comment) { + address += *p; + name += *p; + ++hold; + } + escaped = false; + } else if (comment) { + if (*p == '\\') { escaped = true; - } else if (*p == '(') { - ++comment; - - } else if (parserHelpers::isSpace(*p)) { - - break; - - } else { - - address += *p; + } else if (*p == ')') { + --comment; } - - ++p; + } else if (*p == '(') { + ++comment; + } else if (*p == '\\') { + escaped = true; + } else if (*p == '<') { + state = State_Bracket; + break; + } else if (*p == '>') { + hadBrackets = true; + name += *p++; + ++hold; + state = State_Name; + break; + } else if (parserHelpers::isSpace(*p)) { + name += *p; + ++hold; + } else { + address += *p; + name += *p; + ++hold; } - - } else { - - while (p < pend && parserHelpers::isSpace(*p)) ++p; - state = State_None; } - - } else if (state == State_Address) { - + } else if (state == State_Address_Unbracketed) { bool escaped = false; int comment = 0; - while (p < pend) { - + for (; p < pend; ++p) { if (escaped) { - - if (!comment) address += *p; + if (!comment) { + address += *p; + name += *p; + ++hold; + } escaped = false; - } else if (comment) { - - if (*p == '\\') { + if (*p == '\\') escaped = true; - } else if (*p == '(') { + else if (*p == '(') ++comment; - } else if (*p == ')') { + else if (*p == ')') --comment; - } - } else if (*p == '(') { - ++comment; - } else if (*p == '\\') { - escaped = true; - } else if (*p == '<') { - - // If we found a '<' here, it means that the address - // starts _only_ here...and the stuff we have parsed - // before belongs actually to the display name! - name += address; - address.clear(); - + state = State_Bracket; + break; } else if (*p == '>') { - - hadBrackets = true; + name += *p++; + ++hold; + state = State_None; break; - - } else if (!parserHelpers::isSpace(*p)) { - + } else if (*p == ',' || *p == ';') { + state = State_None; + break; + } else if (parserHelpers::isSpace(*p)) { + state = State_Name; + break; + } else { address += *p; + name += *p; + ++hold; } - - ++p; } + } else if (state == State_Name) { + bool escaped = false; + unsigned int comment = 0, at_hold = 0; - break; - - } else { - - while (p < pend && parserHelpers::isSpace(*p)) ++p; - - if (p < pend) { - state = State_Address; + for (; p < pend; ++p) { + if (escaped) { + if (!comment) { + name += *p; + ++at_hold; + } + escaped = false; + } else if (comment) { + if (*p == '\\') + escaped = true; + else if (*p == '(') + ++comment; + else if (*p == ')') + --comment; + } else if (*p == '\\') { + escaped = true; + } else if (*p == '(') { + ++comment; + } else if (*p == '<') { + state = State_Bracket; + break; + } else if (*p == '@') { + hold = at_hold; + state = State_NameAt; + break; + } else if (parserHelpers::isSpace(*p)) { + name += *p; + ++hold; + at_hold = 1; + } else { + name += *p; + hold = 0; + ++at_hold; + } } + } else if (state == State_Bracket) { + string::const_iterator q = name.cend(); + unsigned int nh = 0; + while (nh < hold && q != name.cbegin() && parserHelpers::isSpace(*(q - 1))) { + --q; + ++nh; + } + hold = nh; + name += *p++; + ++hold; + if (!address.empty()) + // If we found a '<' here, it means that the address + // starts _only_ here...and the stuff we have parsed + // before belongs actually to the display name! + address.clear(); + state = State_Address_Bracketed; + } else if (state == State_NameAt) { + name += *p++; + ++hold; + // (*) Actually, we were parsing the local-part of an address + // and not a display name... + // Back up to the last whitespace and treat as address + string::const_iterator q = name.cend(); + unsigned int nh = 0; + while (nh < hold && q != name.cbegin() && !parserHelpers::isSpace(*(q - 1))) { + --q; + ++nh; + } + address.assign(q, name.cend()); + while (nh < hold && q != name.cbegin() && parserHelpers::isSpace(*(q - 1))) { + --q; + ++nh; + } + hold = nh; + state = State_Address_Unbracketed; } } + if (hold <= name.size()) + name.erase(name.size() - hold); + // Swap name and address when no address was found // (email address is mandatory, whereas name is optional). if (address.empty() && !name.empty() && !hadBrackets) { diff --git a/tests/parser/mailboxTest.cpp b/tests/parser/mailboxTest.cpp index a0d1b694..23d1b4ac 100644 --- a/tests/parser/mailboxTest.cpp +++ b/tests/parser/mailboxTest.cpp @@ -154,20 +154,20 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest) VASSERT_EQ("email", "[email protected]", mbox.getEmail()); - VASSERT_EQ("name", vmime::text("[email protected]"), mbox.getName()); + VASSERT_EQ("name", vmime::text("[email protected] [email protected]"), mbox.getName()); VASSERT_EQ("email", "[email protected]", mbox.getEmail()); mbox.parse("Foo <bar<[email protected]>"); - VASSERT_EQ("name", vmime::text("Foobar"), mbox.getName()); + VASSERT_EQ("name", vmime::text("Foo <bar"), mbox.getName()); VASSERT_EQ("email", "[email protected]", mbox.getEmail()); mbox.parse("Foo <[email protected]> <[email protected]>"); - VASSERT_EQ("name", vmime::text("Foo"), mbox.getName()); - VASSERT_EQ("email", "[email protected]", mbox.getEmail()); + VASSERT_EQ("name", vmime::text("Foo <[email protected]>"), mbox.getName()); + VASSERT_EQ("email", "[email protected]", mbox.getEmail()); mbox.parse("Foo <[email protected]> Bar <[email protected]>"); - VASSERT_EQ("name", vmime::text("Foo"), mbox.getName()); - VASSERT_EQ("email", "[email protected]", mbox.getEmail()); + VASSERT_EQ("name", vmime::text("Foo <[email protected]> Bar"), mbox.getName()); + VASSERT_EQ("email", "[email protected]", mbox.getEmail()); } VMIME_TEST_SUITE_END |