add option to control parser invalid line behavior

2017-02-27 11:04:33 -05:00 · 2017-02-27 11:04:33 -05:00 · 68fd4e1e42
commit 68fd4e1e42
parent 05a65a3bfa
3 changed files with 136 additions and 90 deletions
--- a/src/vmime/headerField.cpp
+++ b/src/vmime/headerField.cpp
@ -103,117 +103,129 @@ shared_ptr <headerField> headerField::parseNext
 			return null;
 		}
-		// This line may be a field description
+    // This line may be a field description
-		if (!parserHelpers::isSpace(c))
+    if (!parserHelpers::isSpace(c))
-		{
+    {
-			const size_t nameStart = pos;  // remember the start position of the line
+      const size_t nameStart = pos;  // remember the start position of the line
-			while (pos < end && (buffer[pos] != ':' && !parserHelpers::isSpace(buffer[pos])))
+      while (pos < end && (buffer[pos] != ':' && !parserHelpers::isSpace(buffer[pos])))
-				++pos;
+        ++pos;
-			const size_t nameEnd = pos;
+      const size_t nameEnd = pos;
-			while (pos < end && (buffer[pos] == ' ' || buffer[pos] == '\t'))
+      while (pos < end && (buffer[pos] == ' ' || buffer[pos] == '\t'))
-				++pos;
+        ++pos;
-			if (buffer[pos] != ':')
+      if (buffer[pos] != ':')
-			{
+      {
-				// Humm...does not seem to be a valid header line.
+        switch (ctx.getHeaderParseErrorRecoveryMethod()) {
-				// Skip this error and advance to the next line
+          case vmime::headerParseRecoveryMethod::SKIP_LINE:
-				pos = nameStart;
+            // Humm...does not seem to be a valid header line.
            // Skip this error and advance to the next line
            pos = nameStart;
-				while (pos < end && buffer[pos] != '\n')
+            while (pos < end && buffer[pos] != '\n')
-					++pos;
+              ++pos;
-				if (pos < end && buffer[pos] == '\n')
+            if (pos < end && buffer[pos] == '\n')
-					++pos;
+              ++pos;
-			}
+            break;
 			else
 			{
 				// Extract the field name
 				const string name(buffer.begin() + nameStart,
 				                  buffer.begin() + nameEnd);
-				// Skip ':' character
+//          case vmime::headerParseRecoveryMethod::APPEND_TO_PREVIOUS_LINE:
-				while (pos < end && buffer[pos] == ':')
+//            // TODO Implement this...
-					++pos;
+//            break;
-				// Skip spaces between ':' and the field contents
+          case vmime::headerParseRecoveryMethod::ASSUME_END_OF_HEADERS:
-				while (pos < end && (buffer[pos] == ' ' || buffer[pos] == '\t'))
+            return null;
-					++pos;
+            break;
        }
      }
      else
      {
        // Extract the field name
        const string name(buffer.begin() + nameStart,
                          buffer.begin() + nameEnd);
-				const size_t contentsStart = pos;
+        // Skip ':' character
-				size_t contentsEnd = 0;
+        while (pos < end && buffer[pos] == ':')
          ++pos;
-				bool firstLine = true;
+        // Skip spaces between ':' and the field contents
        while (pos < end && (buffer[pos] == ' ' || buffer[pos] == '\t'))
          ++pos;
-				// Parse field value, taking care of line folding (value on multiple lines)
+        const size_t contentsStart = pos;
-				for (size_t eol = 0 ; parserHelpers::findEOL(buffer, pos, end, &eol) ; pos = eol)
+        size_t contentsEnd = 0;
 				{
 					// If the line does not start with a folding indicator (SPACE or TAB),
 					// and this is not the first line, then stop parsing lines
 					if (!firstLine && !(buffer[pos] == ' ' || buffer[pos] == '\t'))
 						break;
-					contentsEnd = eol;
+        bool firstLine = true;
 					firstLine = false;
 				}
-				if (pos == end && contentsEnd == 0)
+        // Parse field value, taking care of line folding (value on multiple lines)
-				{
+        for (size_t eol = 0 ; parserHelpers::findEOL(buffer, pos, end, &eol) ; pos = eol)
-					// End of data, and no CRLF was found at the end
+        {
-					contentsEnd = end;
+          // If the line does not start with a folding indicator (SPACE or TAB),
-				}
+          // and this is not the first line, then stop parsing lines
          if (!firstLine && !(buffer[pos] == ' ' || buffer[pos] == '\t'))
            break;
-				// Strip spaces from end of header lines
+          contentsEnd = eol;
-				while (contentsEnd > contentsStart &&
+          firstLine = false;
-				       (buffer[contentsEnd - 1] == ' ' || buffer[contentsEnd - 1] == '\t' ||
+        }
 				        buffer[contentsEnd - 1] == '\r' || buffer[contentsEnd - 1] == '\n'))
 				{
 					contentsEnd--;
 				}
-				// Return a new field
+        if (pos == end && contentsEnd == 0)
-				shared_ptr <headerField> field = headerFieldFactory::getInstance()->create(name);
+        {
          // End of data, and no CRLF was found at the end
          contentsEnd = end;
        }
-				field->parse(ctx, buffer, contentsStart, contentsEnd, NULL);
+        // Strip spaces from end of header lines
-				field->setParsedBounds(nameStart, pos);
+        while (contentsEnd > contentsStart &&
               (buffer[contentsEnd - 1] == ' ' || buffer[contentsEnd - 1] == '\t' ||
                buffer[contentsEnd - 1] == '\r' || buffer[contentsEnd - 1] == '\n'))
        {
          contentsEnd--;
        }
-				if (newPosition)
+        // Return a new field
-					*newPosition = pos;
+        shared_ptr <headerField> field = headerFieldFactory::getInstance()->create(name);
-				return (field);
+        field->parse(ctx, buffer, contentsStart, contentsEnd, NULL);
-			}
+        field->setParsedBounds(nameStart, pos);
 		}
 		else
 		{
 			// If the line contains only space characters, we assume it is
 			// the end of the headers.
 			while (pos < end && (buffer[pos] == ' ' || buffer[pos] == '\t'))
 				++pos;
-			if (pos < end && buffer[pos] == '\n')
+        if (newPosition)
-			{
+          *newPosition = pos;
 				if (newPosition)
 					*newPosition = pos + 1;   // LF: illegal
-				return null;
+        return (field);
-			}
+      }
-			else if (pos + 1 < end && buffer[pos] == '\r' && buffer[pos + 1] == '\n')
+    }
-			{
+    else
-				if (newPosition)
+    {
-					*newPosition = pos + 2;   // CR+LF
+      // If the line contains only space characters, we assume it is
      // the end of the headers.
      while (pos < end && (buffer[pos] == ' ' || buffer[pos] == '\t'))
        ++pos;
-				return null;
+      if (pos < end && buffer[pos] == '\n')
-			}
+      {
        if (newPosition)
          *newPosition = pos + 1;   // LF: illegal
-			// Skip this error and advance to the next line
+        return null;
-			while (pos < end && buffer[pos] != '\n')
+      }
-				++pos;
+      else if (pos + 1 < end && buffer[pos] == '\r' && buffer[pos + 1] == '\n')
      {
        if (newPosition)
          *newPosition = pos + 2;   // CR+LF
-			if (buffer[pos] == '\n')
+        return null;
-				++pos;
+      }
-		}
+
      // Skip this error and advance to the next line
      while (pos < end && buffer[pos] != '\n')
        ++pos;
      if (buffer[pos] == '\n')
        ++pos;
    }
 	}
 	if (newPosition)
--- a/src/vmime/parsingContext.cpp
+++ b/src/vmime/parsingContext.cpp
@ -28,13 +28,13 @@ namespace vmime
 {
-parsingContext::parsingContext()
+parsingContext::parsingContext() : m_headerParseErrorRecovery(vmime::headerParseRecoveryMethod::SKIP_LINE)
 {
 }
 parsingContext::parsingContext(const parsingContext& ctx)
-	: context(ctx)
+	: context(ctx), m_headerParseErrorRecovery(vmime::headerParseRecoveryMethod::SKIP_LINE)
 {
 }
@ -45,5 +45,16 @@ parsingContext& parsingContext::getDefaultContext()
 	return ctx;
 }
 headerParseRecoveryMethod::headerLineError parsingContext::getHeaderParseErrorRecoveryMethod() const
 {
  return m_headerParseErrorRecovery;
 }
 void parsingContext::setHeaderParseErrorRecoveryMethod(headerParseRecoveryMethod::headerLineError recoveryMethod)
 {
  m_headerParseErrorRecovery = recoveryMethod;
 }
 } // vmime
--- a/src/vmime/parsingContext.hpp
+++ b/src/vmime/parsingContext.hpp
@ -31,6 +31,15 @@
 namespace vmime
 {
  /** Provides runtime configurable options to provide flexibility in header parsing
   */
  struct headerParseRecoveryMethod {
    enum headerLineError {
      SKIP_LINE = 0,
      /* APPEND_TO_PREVIOUS_LINE = 1, */
      ASSUME_END_OF_HEADERS = 2
    };
  };
 /** Holds configuration parameters used for parsing messages.
  */
@ -48,8 +57,22 @@ public:
 	  */
 	static parsingContext& getDefaultContext();
  /** Sets the recovery method when parsing a header encounters an error such as a failed fold or missing new line.
    *
    * @param recoveryMethod is one of vmime::headerParseRecoveryMethod.  Defaults to vmime::headerParseRecoveryMethod::SKIP_LINE.
    */
  void setHeaderParseErrorRecoveryMethod(headerParseRecoveryMethod::headerLineError recoveryMethod);
  /** Return the recovery method when parsing a header encounters an error.
    *
    * @return is an enum from vmime::headerParseRecoveryMethod
    */
  headerParseRecoveryMethod::headerLineError getHeaderParseErrorRecoveryMethod() const;
 protected:
  headerParseRecoveryMethod::headerLineError m_headerParseErrorRecovery;
 };