From cc18aa39c15ffffdd622cb61c3c147c2bc1903aa Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 24 Jan 2019 13:14:50 +0100 Subject: tests: add more malformation tests to mailboxTest --- tests/parser/mailboxTest.cpp | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/parser/mailboxTest.cpp b/tests/parser/mailboxTest.cpp index 127bb422..a0d1b694 100644 --- a/tests/parser/mailboxTest.cpp +++ b/tests/parser/mailboxTest.cpp @@ -30,7 +30,7 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest) VMIME_TEST(testParse) VMIME_TEST(testEmptyEmailAddress) VMIME_TEST(testSeparatorInComment) - VMIME_TEST(testAddressInName) + VMIME_TEST(testMalformations) VMIME_TEST_LIST_END @@ -146,13 +146,28 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest) VASSERT_EQ("email2", "bbb@vmime.org", mbox2->getEmail()); } - void testAddressInName() { - + void testMalformations() { vmime::mailbox mbox; - mbox.parse("a@b.c "); + mbox.parse("a@b.c "); VASSERT_EQ("name", vmime::text("a@b.c"), mbox.getName()); VASSERT_EQ("email", "e@f.g", mbox.getEmail()); + + mbox.parse("a@b.c e@f.g "); + VASSERT_EQ("name", vmime::text("e@f.g"), mbox.getName()); + VASSERT_EQ("email", "h@i.j", mbox.getEmail()); + + mbox.parse("Foo "); + VASSERT_EQ("name", vmime::text("Foobar"), mbox.getName()); + VASSERT_EQ("email", "baz@quux.com", mbox.getEmail()); + + mbox.parse("Foo "); + VASSERT_EQ("name", vmime::text("Foo"), mbox.getName()); + VASSERT_EQ("email", "foo@x.com", mbox.getEmail()); + + mbox.parse("Foo Bar "); + VASSERT_EQ("name", vmime::text("Foo"), mbox.getName()); + VASSERT_EQ("email", "foo@x.com", mbox.getEmail()); } VMIME_TEST_SUITE_END -- cgit v1.2.3 From d1190b496faa754a757aba79f88dcd7e31d6d500 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 29 Nov 2018 21:25:47 +0100 Subject: Improve address parser for malformed mailbox specifications Spammers use "Name " to trick some parsers. My expectations as to what the outcome should be is presented in the updated mailboxTest.cpp. The DFA in mailbox::parseImpl is hereby redone so as to pick the rightmost address-looking portion as the address, rather than something in between. While doing so, it will also no longer mangle the name part anymore (it does this by keeping a "as_if_name" variable around until the end). --- src/vmime/mailbox.cpp | 347 +++++++++++++++++++++---------------------- tests/parser/mailboxTest.cpp | 12 +- 2 files changed, 173 insertions(+), 186 deletions(-) diff --git a/src/vmime/mailbox.cpp b/src/vmime/mailbox.cpp index b2a577db..db5cc74e 100644 --- a/src/vmime/mailbox.cpp +++ b/src/vmime/mailbox.cpp @@ -80,238 +80,225 @@ void mailbox::parseImpl( const char* const pstart = buffer.data() + position; const char* p = pstart; - // Ignore blank spaces at the beginning - while (p < pend && parserHelpers::isSpace(*p)) ++p; - // Current state for parsing machine enum States { State_None, + State_String, State_Name, - State_Address + State_Bracket, + State_Address_Bracketed, + State_Address_Unbracketed, + State_NameAt, }; - States state = State_Name; // let's start with name, we will see later (*) + States state = State_None; // Temporary buffers for extracted name and address - string name; - string address; + string name, address; + unsigned int hold = 0; bool hadBrackets = false; while (p < pend) { - - if (state == State_Name) { - + if (state == State_None) { + while (p < pend && parserHelpers::isSpace(*p)) + ++p; + if (*p == ',' || *p == ';') + /* Completely new mailbox -- stop parsing this one. */ + break; if (*p == '<') { - - state = State_Address; + state = State_Bracket; continue; } - if (*p == '"') { // Quoted string - + state = State_String; ++p; + continue; + } + state = State_Name; + } else if (state == State_String) { + bool escaped = false; - bool escaped = false; - - while (p < pend) { - - if (escaped) { - - name += *p; - escaped = false; - - } else if (*p == '\\') { - - escaped = true; - - } else { - - if (*p == '"') { - - ++p; - break; - - } else { - - name += *p; - } - } - - ++p; + for (; p < pend; ++p) { + if (escaped) { + name += *p; + escaped = false; + continue; + } else if (*p == '\\') { + escaped = true; + continue; } - - } else { - - bool escaped = false; - int comment = 0; - - while (p < pend) { - - if (escaped) { - - if (!comment) name += *p; - escaped = false; - - } else if (comment) { - - if (*p == '\\') { - escaped = true; - } else if (*p == '(') { - ++comment; - } else if (*p == ')') { - --comment; - } - - } else if (*p == '\\') { - - escaped = true; - - } else if (*p == '(') { - - ++comment; - - } else if (*p == '<') { - - // Erase any space between display name and
- string::iterator q = name.end(); - - while (q != name.begin() && parserHelpers::isSpace(*(q - 1))) { - --q; - } - - name.erase(q, name.end()); - - break; - - } else if (/* parserHelpers::isSpace(*p) || */ *p == '@') { - - break; - - } else { - - name += *p; - } - + if (*p == '"') { ++p; + state = State_None; + break; } + name += *p; } + } else if (state == State_Address_Bracketed) { + bool escaped = false; + int comment = 0; - if (p < pend && *p == '@') { - - // (*) Actually, we were parsing the local-part of an address - // and not a display name... - address = name; - name.clear(); - - bool escaped = false; - int comment = 0; - - while (p < pend) { - - if (escaped) { - - if (!comment) address += *p; - escaped = false; - - } else if (comment) { - - if (*p == '\\') { - escaped = true; - } else if (*p == '(') { - ++comment; - } else if (*p == ')') { - --comment; - } - - } else if (*p == '\\') { - + for (; p < pend; ++p) { + if (escaped) { + if (!comment) { + address += *p; + name += *p; + ++hold; + } + escaped = false; + } else if (comment) { + if (*p == '\\') { escaped = true; - } else if (*p == '(') { - ++comment; - - } else if (parserHelpers::isSpace(*p)) { - - break; - - } else { - - address += *p; + } else if (*p == ')') { + --comment; } - - ++p; + } else if (*p == '(') { + ++comment; + } else if (*p == '\\') { + escaped = true; + } else if (*p == '<') { + state = State_Bracket; + break; + } else if (*p == '>') { + hadBrackets = true; + name += *p++; + ++hold; + state = State_Name; + break; + } else if (parserHelpers::isSpace(*p)) { + name += *p; + ++hold; + } else { + address += *p; + name += *p; + ++hold; } - - } else { - - while (p < pend && parserHelpers::isSpace(*p)) ++p; - state = State_None; } - - } else if (state == State_Address) { - + } else if (state == State_Address_Unbracketed) { bool escaped = false; int comment = 0; - while (p < pend) { - + for (; p < pend; ++p) { if (escaped) { - - if (!comment) address += *p; + if (!comment) { + address += *p; + name += *p; + ++hold; + } escaped = false; - } else if (comment) { - - if (*p == '\\') { + if (*p == '\\') escaped = true; - } else if (*p == '(') { + else if (*p == '(') ++comment; - } else if (*p == ')') { + else if (*p == ')') --comment; - } - } else if (*p == '(') { - ++comment; - } else if (*p == '\\') { - escaped = true; - } else if (*p == '<') { - - // If we found a '<' here, it means that the address - // starts _only_ here...and the stuff we have parsed - // before belongs actually to the display name! - name += address; - address.clear(); - + state = State_Bracket; + break; } else if (*p == '>') { - - hadBrackets = true; + name += *p++; + ++hold; + state = State_None; break; - - } else if (!parserHelpers::isSpace(*p)) { - + } else if (*p == ',' || *p == ';') { + state = State_None; + break; + } else if (parserHelpers::isSpace(*p)) { + state = State_Name; + break; + } else { address += *p; + name += *p; + ++hold; } - - ++p; } + } else if (state == State_Name) { + bool escaped = false; + unsigned int comment = 0, at_hold = 0; - break; - - } else { - - while (p < pend && parserHelpers::isSpace(*p)) ++p; - - if (p < pend) { - state = State_Address; + for (; p < pend; ++p) { + if (escaped) { + if (!comment) { + name += *p; + ++at_hold; + } + escaped = false; + } else if (comment) { + if (*p == '\\') + escaped = true; + else if (*p == '(') + ++comment; + else if (*p == ')') + --comment; + } else if (*p == '\\') { + escaped = true; + } else if (*p == '(') { + ++comment; + } else if (*p == '<') { + state = State_Bracket; + break; + } else if (*p == '@') { + hold = at_hold; + state = State_NameAt; + break; + } else if (parserHelpers::isSpace(*p)) { + name += *p; + ++hold; + at_hold = 1; + } else { + name += *p; + hold = 0; + ++at_hold; + } } + } else if (state == State_Bracket) { + string::const_iterator q = name.cend(); + unsigned int nh = 0; + while (nh < hold && q != name.cbegin() && parserHelpers::isSpace(*(q - 1))) { + --q; + ++nh; + } + hold = nh; + name += *p++; + ++hold; + if (!address.empty()) + // If we found a '<' here, it means that the address + // starts _only_ here...and the stuff we have parsed + // before belongs actually to the display name! + address.clear(); + state = State_Address_Bracketed; + } else if (state == State_NameAt) { + name += *p++; + ++hold; + // (*) Actually, we were parsing the local-part of an address + // and not a display name... + // Back up to the last whitespace and treat as address + string::const_iterator q = name.cend(); + unsigned int nh = 0; + while (nh < hold && q != name.cbegin() && !parserHelpers::isSpace(*(q - 1))) { + --q; + ++nh; + } + address.assign(q, name.cend()); + while (nh < hold && q != name.cbegin() && parserHelpers::isSpace(*(q - 1))) { + --q; + ++nh; + } + hold = nh; + state = State_Address_Unbracketed; } } + if (hold <= name.size()) + name.erase(name.size() - hold); + // Swap name and address when no address was found // (email address is mandatory, whereas name is optional). if (address.empty() && !name.empty() && !hadBrackets) { diff --git a/tests/parser/mailboxTest.cpp b/tests/parser/mailboxTest.cpp index a0d1b694..23d1b4ac 100644 --- a/tests/parser/mailboxTest.cpp +++ b/tests/parser/mailboxTest.cpp @@ -154,20 +154,20 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest) VASSERT_EQ("email", "e@f.g", mbox.getEmail()); mbox.parse("a@b.c e@f.g "); - VASSERT_EQ("name", vmime::text("e@f.g"), mbox.getName()); + VASSERT_EQ("name", vmime::text("a@b.c e@f.g"), mbox.getName()); VASSERT_EQ("email", "h@i.j", mbox.getEmail()); mbox.parse("Foo "); - VASSERT_EQ("name", vmime::text("Foobar"), mbox.getName()); + VASSERT_EQ("name", vmime::text("Foo "); - VASSERT_EQ("name", vmime::text("Foo"), mbox.getName()); - VASSERT_EQ("email", "foo@x.com", mbox.getEmail()); + VASSERT_EQ("name", vmime::text("Foo "), mbox.getName()); + VASSERT_EQ("email", "bar@x.com", mbox.getEmail()); mbox.parse("Foo Bar "); - VASSERT_EQ("name", vmime::text("Foo"), mbox.getName()); - VASSERT_EQ("email", "foo@x.com", mbox.getEmail()); + VASSERT_EQ("name", vmime::text("Foo Bar"), mbox.getName()); + VASSERT_EQ("email", "bar@y.com", mbox.getEmail()); } VMIME_TEST_SUITE_END -- cgit v1.2.3 From e1faa925936b919ef164aea7791950826b59fd8b Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 6 Feb 2019 23:43:18 +0100 Subject: Unbreak own hostname qualification on POSIX systems Partial revert commit v0.9.2-6-g9a3d6880 (issue #160), because invoking getaddrinfo(NULL, ... AI_CANONNAME) is illegal and never succeeds. --- src/vmime/platforms/posix/posixHandler.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/vmime/platforms/posix/posixHandler.cpp b/src/vmime/platforms/posix/posixHandler.cpp index f8861315..eecdbea9 100644 --- a/src/vmime/platforms/posix/posixHandler.cpp +++ b/src/vmime/platforms/posix/posixHandler.cpp @@ -159,6 +159,10 @@ static inline bool isAcceptableHostname(const vmime::string& str) { const vmime::string posixHandler::getHostName() const { + char hostname[256]; + + ::gethostname(hostname, sizeof(hostname)); + hostname[sizeof(hostname)-1] = '\0'; // Try to get official canonical name of this host struct addrinfo hints; @@ -169,8 +173,7 @@ const vmime::string posixHandler::getHostName() const { struct addrinfo* info; - if (getaddrinfo(NULL, "http", &hints, &info) == 0) { - + if (getaddrinfo(hostname, "http", &hints, &info) == 0) { // First, try to get a Fully-Qualified Domain Name (FQDN) for (struct addrinfo* p = info ; p != NULL ; p = p->ai_next) { @@ -202,11 +205,6 @@ const vmime::string posixHandler::getHostName() const { freeaddrinfo(info); } - // Get host name - char hostname[256]; - ::gethostname(hostname, sizeof(hostname)); - hostname[sizeof(hostname) - 1] = '\0'; - if (::strlen(hostname) == 0 || !isAcceptableHostname(hostname)) { ::strcpy(hostname, "localhost.localdomain"); } -- cgit v1.2.3