aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Engelhardt <[email protected]>2018-11-29 20:25:47 +0000
committerJan Engelhardt <[email protected]>2019-01-25 07:11:07 +0000
commitd1190b496faa754a757aba79f88dcd7e31d6d500 (patch)
tree4a8a599c0e64d58818bebc761f94acdee189b847
parenttests: add more malformation tests to mailboxTest (diff)
downloadvmime-d1190b496faa754a757aba79f88dcd7e31d6d500.tar.gz
vmime-d1190b496faa754a757aba79f88dcd7e31d6d500.zip
Improve address parser for malformed mailbox specifications
Spammers use "Name <addr> <addr>" to trick some parsers. My expectations as to what the outcome should be is presented in the updated mailboxTest.cpp. The DFA in mailbox::parseImpl is hereby redone so as to pick the rightmost address-looking portion as the address, rather than something in between. While doing so, it will also no longer mangle the name part anymore (it does this by keeping a "as_if_name" variable around until the end).
-rw-r--r--src/vmime/mailbox.cpp347
-rw-r--r--tests/parser/mailboxTest.cpp12
2 files changed, 173 insertions, 186 deletions
diff --git a/src/vmime/mailbox.cpp b/src/vmime/mailbox.cpp
index b2a577db..db5cc74e 100644
--- a/src/vmime/mailbox.cpp
+++ b/src/vmime/mailbox.cpp
@@ -80,238 +80,225 @@ void mailbox::parseImpl(
const char* const pstart = buffer.data() + position;
const char* p = pstart;
- // Ignore blank spaces at the beginning
- while (p < pend && parserHelpers::isSpace(*p)) ++p;
-
// Current state for parsing machine
enum States {
State_None,
+ State_String,
State_Name,
- State_Address
+ State_Bracket,
+ State_Address_Bracketed,
+ State_Address_Unbracketed,
+ State_NameAt,
};
- States state = State_Name; // let's start with name, we will see later (*)
+ States state = State_None;
// Temporary buffers for extracted name and address
- string name;
- string address;
+ string name, address;
+ unsigned int hold = 0;
bool hadBrackets = false;
while (p < pend) {
-
- if (state == State_Name) {
-
+ if (state == State_None) {
+ while (p < pend && parserHelpers::isSpace(*p))
+ ++p;
+ if (*p == ',' || *p == ';')
+ /* Completely new mailbox -- stop parsing this one. */
+ break;
if (*p == '<') {
-
- state = State_Address;
+ state = State_Bracket;
continue;
}
-
if (*p == '"') { // Quoted string
-
+ state = State_String;
++p;
+ continue;
+ }
+ state = State_Name;
+ } else if (state == State_String) {
+ bool escaped = false;
- bool escaped = false;
-
- while (p < pend) {
-
- if (escaped) {
-
- name += *p;
- escaped = false;
-
- } else if (*p == '\\') {
-
- escaped = true;
-
- } else {
-
- if (*p == '"') {
-
- ++p;
- break;
-
- } else {
-
- name += *p;
- }
- }
-
- ++p;
+ for (; p < pend; ++p) {
+ if (escaped) {
+ name += *p;
+ escaped = false;
+ continue;
+ } else if (*p == '\\') {
+ escaped = true;
+ continue;
}
-
- } else {
-
- bool escaped = false;
- int comment = 0;
-
- while (p < pend) {
-
- if (escaped) {
-
- if (!comment) name += *p;
- escaped = false;
-
- } else if (comment) {
-
- if (*p == '\\') {
- escaped = true;
- } else if (*p == '(') {
- ++comment;
- } else if (*p == ')') {
- --comment;
- }
-
- } else if (*p == '\\') {
-
- escaped = true;
-
- } else if (*p == '(') {
-
- ++comment;
-
- } else if (*p == '<') {
-
- // Erase any space between display name and <address>
- string::iterator q = name.end();
-
- while (q != name.begin() && parserHelpers::isSpace(*(q - 1))) {
- --q;
- }
-
- name.erase(q, name.end());
-
- break;
-
- } else if (/* parserHelpers::isSpace(*p) || */ *p == '@') {
-
- break;
-
- } else {
-
- name += *p;
- }
-
+ if (*p == '"') {
++p;
+ state = State_None;
+ break;
}
+ name += *p;
}
+ } else if (state == State_Address_Bracketed) {
+ bool escaped = false;
+ int comment = 0;
- if (p < pend && *p == '@') {
-
- // (*) Actually, we were parsing the local-part of an address
- // and not a display name...
- address = name;
- name.clear();
-
- bool escaped = false;
- int comment = 0;
-
- while (p < pend) {
-
- if (escaped) {
-
- if (!comment) address += *p;
- escaped = false;
-
- } else if (comment) {
-
- if (*p == '\\') {
- escaped = true;
- } else if (*p == '(') {
- ++comment;
- } else if (*p == ')') {
- --comment;
- }
-
- } else if (*p == '\\') {
-
+ for (; p < pend; ++p) {
+ if (escaped) {
+ if (!comment) {
+ address += *p;
+ name += *p;
+ ++hold;
+ }
+ escaped = false;
+ } else if (comment) {
+ if (*p == '\\') {
escaped = true;
-
} else if (*p == '(') {
-
++comment;
-
- } else if (parserHelpers::isSpace(*p)) {
-
- break;
-
- } else {
-
- address += *p;
+ } else if (*p == ')') {
+ --comment;
}
-
- ++p;
+ } else if (*p == '(') {
+ ++comment;
+ } else if (*p == '\\') {
+ escaped = true;
+ } else if (*p == '<') {
+ state = State_Bracket;
+ break;
+ } else if (*p == '>') {
+ hadBrackets = true;
+ name += *p++;
+ ++hold;
+ state = State_Name;
+ break;
+ } else if (parserHelpers::isSpace(*p)) {
+ name += *p;
+ ++hold;
+ } else {
+ address += *p;
+ name += *p;
+ ++hold;
}
-
- } else {
-
- while (p < pend && parserHelpers::isSpace(*p)) ++p;
- state = State_None;
}
-
- } else if (state == State_Address) {
-
+ } else if (state == State_Address_Unbracketed) {
bool escaped = false;
int comment = 0;
- while (p < pend) {
-
+ for (; p < pend; ++p) {
if (escaped) {
-
- if (!comment) address += *p;
+ if (!comment) {
+ address += *p;
+ name += *p;
+ ++hold;
+ }
escaped = false;
-
} else if (comment) {
-
- if (*p == '\\') {
+ if (*p == '\\')
escaped = true;
- } else if (*p == '(') {
+ else if (*p == '(')
++comment;
- } else if (*p == ')') {
+ else if (*p == ')')
--comment;
- }
-
} else if (*p == '(') {
-
++comment;
-
} else if (*p == '\\') {
-
escaped = true;
-
} else if (*p == '<') {
-
- // If we found a '<' here, it means that the address
- // starts _only_ here...and the stuff we have parsed
- // before belongs actually to the display name!
- name += address;
- address.clear();
-
+ state = State_Bracket;
+ break;
} else if (*p == '>') {
-
- hadBrackets = true;
+ name += *p++;
+ ++hold;
+ state = State_None;
break;
-
- } else if (!parserHelpers::isSpace(*p)) {
-
+ } else if (*p == ',' || *p == ';') {
+ state = State_None;
+ break;
+ } else if (parserHelpers::isSpace(*p)) {
+ state = State_Name;
+ break;
+ } else {
address += *p;
+ name += *p;
+ ++hold;
}
-
- ++p;
}
+ } else if (state == State_Name) {
+ bool escaped = false;
+ unsigned int comment = 0, at_hold = 0;
- break;
-
- } else {
-
- while (p < pend && parserHelpers::isSpace(*p)) ++p;
-
- if (p < pend) {
- state = State_Address;
+ for (; p < pend; ++p) {
+ if (escaped) {
+ if (!comment) {
+ name += *p;
+ ++at_hold;
+ }
+ escaped = false;
+ } else if (comment) {
+ if (*p == '\\')
+ escaped = true;
+ else if (*p == '(')
+ ++comment;
+ else if (*p == ')')
+ --comment;
+ } else if (*p == '\\') {
+ escaped = true;
+ } else if (*p == '(') {
+ ++comment;
+ } else if (*p == '<') {
+ state = State_Bracket;
+ break;
+ } else if (*p == '@') {
+ hold = at_hold;
+ state = State_NameAt;
+ break;
+ } else if (parserHelpers::isSpace(*p)) {
+ name += *p;
+ ++hold;
+ at_hold = 1;
+ } else {
+ name += *p;
+ hold = 0;
+ ++at_hold;
+ }
}
+ } else if (state == State_Bracket) {
+ string::const_iterator q = name.cend();
+ unsigned int nh = 0;
+ while (nh < hold && q != name.cbegin() && parserHelpers::isSpace(*(q - 1))) {
+ --q;
+ ++nh;
+ }
+ hold = nh;
+ name += *p++;
+ ++hold;
+ if (!address.empty())
+ // If we found a '<' here, it means that the address
+ // starts _only_ here...and the stuff we have parsed
+ // before belongs actually to the display name!
+ address.clear();
+ state = State_Address_Bracketed;
+ } else if (state == State_NameAt) {
+ name += *p++;
+ ++hold;
+ // (*) Actually, we were parsing the local-part of an address
+ // and not a display name...
+ // Back up to the last whitespace and treat as address
+ string::const_iterator q = name.cend();
+ unsigned int nh = 0;
+ while (nh < hold && q != name.cbegin() && !parserHelpers::isSpace(*(q - 1))) {
+ --q;
+ ++nh;
+ }
+ address.assign(q, name.cend());
+ while (nh < hold && q != name.cbegin() && parserHelpers::isSpace(*(q - 1))) {
+ --q;
+ ++nh;
+ }
+ hold = nh;
+ state = State_Address_Unbracketed;
}
}
+ if (hold <= name.size())
+ name.erase(name.size() - hold);
+
// Swap name and address when no address was found
// (email address is mandatory, whereas name is optional).
if (address.empty() && !name.empty() && !hadBrackets) {
diff --git a/tests/parser/mailboxTest.cpp b/tests/parser/mailboxTest.cpp
index a0d1b694..23d1b4ac 100644
--- a/tests/parser/mailboxTest.cpp
+++ b/tests/parser/mailboxTest.cpp
@@ -154,20 +154,20 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest)
VASSERT_EQ("email", "[email protected]", mbox.getEmail());
- VASSERT_EQ("name", vmime::text("[email protected]"), mbox.getName());
+ VASSERT_EQ("name", vmime::text("[email protected] [email protected]"), mbox.getName());
VASSERT_EQ("email", "[email protected]", mbox.getEmail());
mbox.parse("Foo <bar<[email protected]>");
- VASSERT_EQ("name", vmime::text("Foobar"), mbox.getName());
+ VASSERT_EQ("name", vmime::text("Foo <bar"), mbox.getName());
VASSERT_EQ("email", "[email protected]", mbox.getEmail());
mbox.parse("Foo <[email protected]> <[email protected]>");
- VASSERT_EQ("name", vmime::text("Foo"), mbox.getName());
- VASSERT_EQ("email", "[email protected]", mbox.getEmail());
+ VASSERT_EQ("name", vmime::text("Foo <[email protected]>"), mbox.getName());
+ VASSERT_EQ("email", "[email protected]", mbox.getEmail());
mbox.parse("Foo <[email protected]> Bar <[email protected]>");
- VASSERT_EQ("name", vmime::text("Foo"), mbox.getName());
- VASSERT_EQ("email", "[email protected]", mbox.getEmail());
+ VASSERT_EQ("name", vmime::text("Foo <[email protected]> Bar"), mbox.getName());
+ VASSERT_EQ("email", "[email protected]", mbox.getEmail());
}
VMIME_TEST_SUITE_END