From cc18aa39c15ffffdd622cb61c3c147c2bc1903aa Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@inai.de>
Date: Thu, 24 Jan 2019 13:14:50 +0100
Subject: tests: add more malformation tests to mailboxTest

---
 tests/parser/mailboxTest.cpp | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/tests/parser/mailboxTest.cpp b/tests/parser/mailboxTest.cpp
index 127bb422..a0d1b694 100644
--- a/tests/parser/mailboxTest.cpp
+++ b/tests/parser/mailboxTest.cpp
@@ -30,7 +30,7 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest)
 		VMIME_TEST(testParse)
 		VMIME_TEST(testEmptyEmailAddress)
 		VMIME_TEST(testSeparatorInComment)
-		VMIME_TEST(testAddressInName)
+		VMIME_TEST(testMalformations)
 	VMIME_TEST_LIST_END
 
 
@@ -146,13 +146,28 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest)
 		VASSERT_EQ("email2", "bbb@vmime.org", mbox2->getEmail());
 	}
 
-	void testAddressInName() {
-
+	void testMalformations() {
 		vmime::mailbox mbox;
-		mbox.parse("a@b.c <e@f.g>");
 
+		mbox.parse("a@b.c <e@f.g>");
 		VASSERT_EQ("name", vmime::text("a@b.c"), mbox.getName());
 		VASSERT_EQ("email", "e@f.g", mbox.getEmail());
+
+		mbox.parse("a@b.c e@f.g <h@i.j>");
+		VASSERT_EQ("name", vmime::text("e@f.g"), mbox.getName());
+		VASSERT_EQ("email", "h@i.j", mbox.getEmail());
+
+		mbox.parse("Foo <bar<baz@quux.com>");
+		VASSERT_EQ("name", vmime::text("Foobar"), mbox.getName());
+		VASSERT_EQ("email", "baz@quux.com", mbox.getEmail());
+
+		mbox.parse("Foo <foo@x.com> <bar@x.com>");
+		VASSERT_EQ("name", vmime::text("Foo"), mbox.getName());
+		VASSERT_EQ("email", "foo@x.com", mbox.getEmail());
+
+		mbox.parse("Foo <foo@x.com> Bar <bar@y.com>");
+		VASSERT_EQ("name", vmime::text("Foo"), mbox.getName());
+		VASSERT_EQ("email", "foo@x.com", mbox.getEmail());
 	}
 
 VMIME_TEST_SUITE_END
-- 
cgit v1.2.3


From d1190b496faa754a757aba79f88dcd7e31d6d500 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@inai.de>
Date: Thu, 29 Nov 2018 21:25:47 +0100
Subject: Improve address parser for malformed mailbox specifications

Spammers use "Name <addr> <addr>" to trick some parsers.
My expectations as to what the outcome should be is presented
in the updated mailboxTest.cpp.

The DFA in mailbox::parseImpl is hereby redone so as to pick the
rightmost address-looking portion as the address, rather than
something in between. While doing so, it will also no longer mangle
the name part anymore (it does this by keeping a "as_if_name"
variable around until the end).
---
 src/vmime/mailbox.cpp        | 347 +++++++++++++++++++++----------------------
 tests/parser/mailboxTest.cpp |  12 +-
 2 files changed, 173 insertions(+), 186 deletions(-)

diff --git a/src/vmime/mailbox.cpp b/src/vmime/mailbox.cpp
index b2a577db..db5cc74e 100644
--- a/src/vmime/mailbox.cpp
+++ b/src/vmime/mailbox.cpp
@@ -80,238 +80,225 @@ void mailbox::parseImpl(
 	const char* const pstart = buffer.data() + position;
 	const char* p = pstart;
 
-	// Ignore blank spaces at the beginning
-	while (p < pend && parserHelpers::isSpace(*p)) ++p;
-
 	// Current state for parsing machine
 	enum States {
 		State_None,
+		State_String,
 		State_Name,
-		State_Address
+		State_Bracket,
+		State_Address_Bracketed,
+		State_Address_Unbracketed,
+		State_NameAt,
 	};
 
-	States state = State_Name;   // let's start with name, we will see later (*)
+	States state = State_None;
 
 	// Temporary buffers for extracted name and address
-	string name;
-	string address;
+	string name, address;
+	unsigned int hold = 0;
 	bool hadBrackets = false;
 
 	while (p < pend) {
-
-		if (state == State_Name) {
-
+		if (state == State_None) {
+			while (p < pend && parserHelpers::isSpace(*p))
+				++p;
+			if (*p == ',' || *p == ';')
+				/* Completely new mailbox -- stop parsing this one. */
+				break;
 			if (*p == '<') {
-
-				state = State_Address;
+				state = State_Bracket;
 				continue;
 			}
-
 			if (*p == '"') {  // Quoted string
-
+				state = State_String;
 				++p;
+				continue;
+			}
+			state = State_Name;
+		} else if (state == State_String) {
+			bool escaped = false;
 
-				bool escaped = false;
-
-				while (p < pend) {
-
-					if (escaped) {
-
-						name += *p;
-						escaped = false;
-
-					} else if (*p == '\\') {
-
-						escaped = true;
-
-					} else {
-
-						if (*p == '"') {
-
-							++p;
-							break;
-
-						} else {
-
-							name += *p;
-						}
-					}
-
-					++p;
+			for (; p < pend; ++p) {
+				if (escaped) {
+					name += *p;
+					escaped = false;
+					continue;
+				} else if (*p == '\\') {
+					escaped = true;
+					continue;
 				}
-
-			} else {
-
-				bool escaped = false;
-				int comment = 0;
-
-				while (p < pend) {
-
-					if (escaped) {
-
-						if (!comment) name += *p;
-						escaped = false;
-
-					} else if (comment) {
-
-						if (*p == '\\') {
-							escaped = true;
-						} else if (*p == '(') {
-							++comment;
-						} else if (*p == ')') {
-							--comment;
-						}
-
-					} else if (*p == '\\') {
-
-						escaped = true;
-
-					} else if (*p == '(') {
-
-						++comment;
-
-					} else if (*p == '<') {
-
-						// Erase any space between display name and <address>
-						string::iterator q = name.end();
-
-						while (q != name.begin() && parserHelpers::isSpace(*(q - 1))) {
-							--q;
-						}
-
-						name.erase(q, name.end());
-
-						break;
-
-					} else if (/* parserHelpers::isSpace(*p) || */ *p == '@') {
-
-						break;
-
-					} else {
-
-						name += *p;
-					}
-
+				if (*p == '"') {
 					++p;
+					state = State_None;
+					break;
 				}
+				name += *p;
 			}
+		} else if (state == State_Address_Bracketed) {
+			bool escaped = false;
+			int comment = 0;
 
-			if (p < pend && *p == '@') {
-
-				// (*) Actually, we were parsing the local-part of an address
-				// and not a display name...
-				address = name;
-				name.clear();
-
-				bool escaped = false;
-				int comment = 0;
-
-				while (p < pend) {
-
-					if (escaped) {
-
-						if (!comment) address += *p;
-						escaped = false;
-
-					} else if (comment) {
-
-						if (*p == '\\') {
-							escaped = true;
-						} else if (*p == '(') {
-							++comment;
-						} else if (*p == ')') {
-							--comment;
-						}
-
-					} else if (*p == '\\') {
-
+			for (; p < pend; ++p) {
+				if (escaped) {
+					if (!comment) {
+						address += *p;
+						name += *p;
+						++hold;
+					}
+					escaped = false;
+				} else if (comment) {
+					if (*p == '\\') {
 						escaped = true;
-
 					} else if (*p == '(') {
-
 						++comment;
-
-					} else if (parserHelpers::isSpace(*p)) {
-
-						break;
-
-					} else {
-
-						address += *p;
+					} else if (*p == ')') {
+						--comment;
 					}
-
-					++p;
+				} else if (*p == '(') {
+					++comment;
+				} else if (*p == '\\') {
+					escaped = true;
+				} else if (*p == '<') {
+					state = State_Bracket;
+					break;
+				} else if (*p == '>') {
+					hadBrackets = true;
+					name += *p++;
+					++hold;
+					state = State_Name;
+					break;
+				} else if (parserHelpers::isSpace(*p)) {
+					name += *p;
+					++hold;
+				} else {
+					address += *p;
+					name += *p;
+					++hold;
 				}
-
-			} else {
-
-				while (p < pend && parserHelpers::isSpace(*p)) ++p;
-				state = State_None;
 			}
-
-		} else if (state == State_Address) {
-
+		} else if (state == State_Address_Unbracketed) {
 			bool escaped = false;
 			int comment = 0;
 
-			while (p < pend) {
-
+			for (; p < pend; ++p) {
 				if (escaped) {
-
-					if (!comment) address += *p;
+					if (!comment) {
+						address += *p;
+						name += *p;
+						++hold;
+					}
 					escaped = false;
-
 				} else if (comment) {
-
-					if (*p == '\\') {
+					if (*p == '\\')
 						escaped = true;
-					} else if (*p == '(') {
+					else if (*p == '(')
 						++comment;
-					} else if (*p == ')') {
+					else if (*p == ')')
 						--comment;
-					}
-
 				} else if (*p == '(') {
-
 					++comment;
-
 				} else if (*p == '\\') {
-
 					escaped = true;
-
 				} else if (*p == '<') {
-
-					// If we found a '<' here, it means that the address
-					// starts _only_ here...and the stuff we have parsed
-					// before belongs actually to the display name!
-					name += address;
-					address.clear();
-
+					state = State_Bracket;
+					break;
 				} else if (*p == '>') {
-
-					hadBrackets = true;
+					name += *p++;
+					++hold;
+					state = State_None;
 					break;
-
-				} else if (!parserHelpers::isSpace(*p)) {
-
+				} else if (*p == ',' || *p == ';') {
+					state = State_None;
+					break;
+				} else if (parserHelpers::isSpace(*p)) {
+					state = State_Name;
+					break;
+				} else {
 					address += *p;
+					name += *p;
+					++hold;
 				}
-
-				++p;
 			}
+		} else if (state == State_Name) {
+			bool escaped = false;
+			unsigned int comment = 0, at_hold = 0;
 
-			break;
-
-		} else {
-
-			while (p < pend && parserHelpers::isSpace(*p)) ++p;
-
-			if (p < pend) {
-				state = State_Address;
+			for (; p < pend; ++p) {
+				if (escaped) {
+					if (!comment) {
+						name += *p;
+						++at_hold;
+					}
+					escaped = false;
+				} else if (comment) {
+					if (*p == '\\')
+						escaped = true;
+					else if (*p == '(')
+						++comment;
+					else if (*p == ')')
+						--comment;
+				} else if (*p == '\\') {
+					escaped = true;
+				} else if (*p == '(') {
+					++comment;
+				} else if (*p == '<') {
+					state = State_Bracket;
+					break;
+				} else if (*p == '@') {
+					hold = at_hold;
+					state = State_NameAt;
+					break;
+				} else if (parserHelpers::isSpace(*p)) {
+					name += *p;
+					++hold;
+					at_hold = 1;
+				} else {
+					name += *p;
+					hold = 0;
+					++at_hold;
+				}
 			}
+		} else if (state == State_Bracket) {
+			string::const_iterator q = name.cend();
+			unsigned int nh = 0;
+			while (nh < hold && q != name.cbegin() && parserHelpers::isSpace(*(q - 1))) {
+				--q;
+				++nh;
+			}
+			hold = nh;
+			name += *p++;
+			++hold;
+			if (!address.empty())
+				// If we found a '<' here, it means that the address
+				// starts _only_ here...and the stuff we have parsed
+				// before belongs actually to the display name!
+				address.clear();
+			state = State_Address_Bracketed;
+		} else if (state == State_NameAt) {
+			name += *p++;
+			++hold;
+			// (*) Actually, we were parsing the local-part of an address
+			// and not a display name...
+			// Back up to the last whitespace and treat as address
+			string::const_iterator q = name.cend();
+			unsigned int nh = 0;
+			while (nh < hold && q != name.cbegin() && !parserHelpers::isSpace(*(q - 1))) {
+				--q;
+				++nh;
+			}
+			address.assign(q, name.cend());
+			while (nh < hold && q != name.cbegin() && parserHelpers::isSpace(*(q - 1))) {
+				--q;
+				++nh;
+			}
+			hold = nh;
+			state = State_Address_Unbracketed;
 		}
 	}
 
+	if (hold <= name.size())
+		name.erase(name.size() - hold);
+
 	// Swap name and address when no address was found
 	// (email address is mandatory, whereas name is optional).
 	if (address.empty() && !name.empty() && !hadBrackets) {
diff --git a/tests/parser/mailboxTest.cpp b/tests/parser/mailboxTest.cpp
index a0d1b694..23d1b4ac 100644
--- a/tests/parser/mailboxTest.cpp
+++ b/tests/parser/mailboxTest.cpp
@@ -154,20 +154,20 @@ VMIME_TEST_SUITE_BEGIN(mailboxTest)
 		VASSERT_EQ("email", "e@f.g", mbox.getEmail());
 
 		mbox.parse("a@b.c e@f.g <h@i.j>");
-		VASSERT_EQ("name", vmime::text("e@f.g"), mbox.getName());
+		VASSERT_EQ("name", vmime::text("a@b.c e@f.g"), mbox.getName());
 		VASSERT_EQ("email", "h@i.j", mbox.getEmail());
 
 		mbox.parse("Foo <bar<baz@quux.com>");
-		VASSERT_EQ("name", vmime::text("Foobar"), mbox.getName());
+		VASSERT_EQ("name", vmime::text("Foo <bar"), mbox.getName());
 		VASSERT_EQ("email", "baz@quux.com", mbox.getEmail());
 
 		mbox.parse("Foo <foo@x.com> <bar@x.com>");
-		VASSERT_EQ("name", vmime::text("Foo"), mbox.getName());
-		VASSERT_EQ("email", "foo@x.com", mbox.getEmail());
+		VASSERT_EQ("name", vmime::text("Foo <foo@x.com>"), mbox.getName());
+		VASSERT_EQ("email", "bar@x.com", mbox.getEmail());
 
 		mbox.parse("Foo <foo@x.com> Bar <bar@y.com>");
-		VASSERT_EQ("name", vmime::text("Foo"), mbox.getName());
-		VASSERT_EQ("email", "foo@x.com", mbox.getEmail());
+		VASSERT_EQ("name", vmime::text("Foo <foo@x.com> Bar"), mbox.getName());
+		VASSERT_EQ("email", "bar@y.com", mbox.getEmail());
 	}
 
 VMIME_TEST_SUITE_END
-- 
cgit v1.2.3


From e1faa925936b919ef164aea7791950826b59fd8b Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@inai.de>
Date: Wed, 6 Feb 2019 23:43:18 +0100
Subject: Unbreak own hostname qualification on POSIX systems

Partial revert commit v0.9.2-6-g9a3d6880 (issue #160), because
invoking getaddrinfo(NULL, ... AI_CANONNAME) is illegal and never
succeeds.
---
 src/vmime/platforms/posix/posixHandler.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/vmime/platforms/posix/posixHandler.cpp b/src/vmime/platforms/posix/posixHandler.cpp
index f8861315..eecdbea9 100644
--- a/src/vmime/platforms/posix/posixHandler.cpp
+++ b/src/vmime/platforms/posix/posixHandler.cpp
@@ -159,6 +159,10 @@ static inline bool isAcceptableHostname(const vmime::string& str) {
 
 
 const vmime::string posixHandler::getHostName() const {
+	char hostname[256];
+
+	::gethostname(hostname, sizeof(hostname));
+	hostname[sizeof(hostname)-1] = '\0';
 
 	// Try to get official canonical name of this host
 	struct addrinfo hints;
@@ -169,8 +173,7 @@ const vmime::string posixHandler::getHostName() const {
 
 	struct addrinfo* info;
 
-	if (getaddrinfo(NULL, "http", &hints, &info) == 0) {
-
+	if (getaddrinfo(hostname, "http", &hints, &info) == 0) {
 		// First, try to get a Fully-Qualified Domain Name (FQDN)
 		for (struct addrinfo* p = info ; p != NULL ; p = p->ai_next) {
 
@@ -202,11 +205,6 @@ const vmime::string posixHandler::getHostName() const {
 		freeaddrinfo(info);
 	}
 
-	// Get host name
-	char hostname[256];
-	::gethostname(hostname, sizeof(hostname));
-	hostname[sizeof(hostname) - 1] = '\0';
-
 	if (::strlen(hostname) == 0 || !isAcceptableHostname(hostname)) {
 		::strcpy(hostname, "localhost.localdomain");
 	}
-- 
cgit v1.2.3