diff --git a/src/header.cpp b/src/header.cpp index 886283ba..db6c7f89 100644 --- a/src/header.cpp +++ b/src/header.cpp @@ -64,152 +64,10 @@ void header::parse(const string& buffer, const string::size_type position, while (pos < end) { - char_t c = buffer[pos]; + headerField* field = headerField::parseNext(buffer, pos, end, &pos); + if (field == NULL) break; - // Check for end of headers (empty line): although RFC-822 recommends - // to use CRLF for header/body separator (see 4.1 SYNTAX), here, we - // also check for LF just in case... - if (c == '\n') - { - ++pos; - break; - } - else if (c == '\r' && pos + 1 < end && buffer[pos + 1] == '\n') - { - pos += 2; - break; - } - - // This line may be a field description - if (!parserHelpers::isspace(c)) - { - const string::size_type nameStart = pos; // remember the start position of the line - - while (pos < end && (buffer[pos] != ':' && !parserHelpers::isspace(buffer[pos]))) - ++pos; - - const string::size_type nameEnd = pos; - - while (pos < end && parserHelpers::isspace(buffer[pos])) - ++pos; - - if (buffer[pos] != ':') - { - // Humm...does not seem to be a valid header line. - // Skip this error and advance to the next line - pos = nameStart; - - while (pos < end && buffer[pos] != '\n') - ++pos; - - if (buffer[pos] == '\n') - ++pos; - } - else - { - // Extract the field name - const string name(buffer.begin() + nameStart, - buffer.begin() + nameEnd); - - // Skip ':' character - ++pos; - - // Skip spaces between ':' and the field contents - while (pos < end && (buffer[pos] == ' ' || buffer[pos] == '\t')) - ++pos; - - // Extract the field value - string contents; - - while (pos < end) - { - c = buffer[pos]; - - // Check for end of contents - if (c == '\r' && pos + 1 < end && buffer[pos + 1] == '\n') - { - pos += 2; - break; - } - else if (c == '\n') - { - ++pos; - break; - } - - const string::size_type ctsStart = pos; - string::size_type ctsEnd = pos; - - while (pos < end) - { - c = buffer[pos]; - - // Check for end of line - if (c == '\r' && pos + 1 < end && buffer[pos + 1] == '\n') - { - ctsEnd = pos; - pos += 2; - break; - } - else if (c == '\n') - { - ctsEnd = pos; - ++pos; - break; - } - - ++pos; - } - - if (ctsEnd != ctsStart) - { - // Append this line to contents - contents.append(buffer.begin() + ctsStart, - buffer.begin() + ctsEnd); - } - - // Handle the case of folded lines - if (buffer[pos] == ' ' || buffer[pos] == '\t') - { - // This is a folding white-space: we keep it as is and - // we continue with contents parsing... - } - else - { - // End of this field - break; - } - } - - // Add a new field to list - m_fields.push_back(headerFieldFactory::getInstance()-> - create(name, contents)); - } - } - else - { - // Skip this error and advance to the next line - while (pos < end && buffer[pos] != '\n') - ++pos; - - if (buffer[pos] == '\n') - ++pos; - } - } - - // If we have found the header/body separator, skip it - if (pos < end) - { - if (buffer[pos] == '\n') - { - // This is a LF (illegal but...) - ++pos; - } - else if (buffer[pos] == '\r' && pos + 1 < end) - { - // This is a CRLF - pos += 2; - } + m_fields.push_back(field); } setParsedBounds(position, pos); diff --git a/src/headerField.cpp b/src/headerField.cpp index 627c63eb..7c8562da 100644 --- a/src/headerField.cpp +++ b/src/headerField.cpp @@ -20,6 +20,8 @@ #include "vmime/headerField.hpp" #include "vmime/headerFieldFactory.hpp" +#include "vmime/parserHelpers.hpp" + namespace vmime { @@ -67,12 +69,168 @@ headerField& headerField::operator=(const headerField& other) } +headerField* headerField::parseNext(const string& buffer, const string::size_type position, + const string::size_type end, string::size_type* newPosition) +{ + string::size_type pos = position; + + while (pos < end) + { + char_t c = buffer[pos]; + + // Check for end of headers (empty line): although RFC-822 recommends + // to use CRLF for header/body separator (see 4.1 SYNTAX), here, we + // also check for LF for compatibility with broken implementations... + if (c == '\n') + { + if (newPosition) + *newPosition = pos + 1; // LF: illegal + + return (NULL); + } + else if (c == '\r' && pos + 1 < end && buffer[pos + 1] == '\n') + { + if (newPosition) + *newPosition = pos + 2; // CR+LF + + return (NULL); + } + + // This line may be a field description + if (!parserHelpers::isspace(c)) + { + const string::size_type nameStart = pos; // remember the start position of the line + + while (pos < end && (buffer[pos] != ':' && !parserHelpers::isspace(buffer[pos]))) + ++pos; + + const string::size_type nameEnd = pos; + + while (pos < end && (buffer[pos] == ' ' || buffer[pos] == '\t')) + ++pos; + + if (buffer[pos] != ':') + { + // Humm...does not seem to be a valid header line. + // Skip this error and advance to the next line + pos = nameStart; + + while (pos < end && buffer[pos] != '\n') + ++pos; + + if (pos < end && buffer[pos] == '\n') + ++pos; + } + else + { + // Extract the field name + const string name(buffer.begin() + nameStart, + buffer.begin() + nameEnd); + + // Skip ':' character + ++pos; + + // Skip spaces between ':' and the field contents + while (pos < end && (buffer[pos] == ' ' || buffer[pos] == '\t')) + ++pos; + + // Extract the field value + string contents; + + while (pos < end) + { + c = buffer[pos]; + + // Check for end of contents + if (c == '\r' && pos + 1 < end && buffer[pos + 1] == '\n') + { + pos += 2; + break; + } + else if (c == '\n') + { + ++pos; + break; + } + + const string::size_type ctsStart = pos; + string::size_type ctsEnd = pos; + + while (pos < end) + { + c = buffer[pos]; + + // Check for end of line + if (c == '\r' && pos + 1 < end && buffer[pos + 1] == '\n') + { + ctsEnd = pos; + pos += 2; + break; + } + else if (c == '\n') + { + ctsEnd = pos; + ++pos; + break; + } + + ++pos; + } + + if (ctsEnd != ctsStart) + { + // Append this line to contents + contents.append(buffer.begin() + ctsStart, + buffer.begin() + ctsEnd); + } + + // Handle the case of folded lines + if (buffer[pos] == ' ' || buffer[pos] == '\t') + { + // This is a folding white-space: we keep it as is and + // we continue with contents parsing... + } + else + { + // End of this field + break; + } + } + + // Return a new field + headerField* field = headerFieldFactory::getInstance()->create(name); + + field->parse(contents); // TODO: fix incorrect parsed bounds... + field->setParsedBounds(nameStart, pos); + + if (newPosition) + *newPosition = pos; + + return (field); + } + } + else + { + // Skip this error and advance to the next line + while (pos < end && buffer[pos] != '\n') + ++pos; + + if (buffer[pos] == '\n') + ++pos; + } + } + + if (newPosition) + *newPosition = pos; + + return (NULL); +} + + void headerField::parse(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition) { getValue().parse(buffer, position, end, newPosition); - - setParsedBounds(position, end); } diff --git a/vmime/headerField.hpp b/vmime/headerField.hpp index 626b4f10..d5f7160d 100644 --- a/vmime/headerField.hpp +++ b/vmime/headerField.hpp @@ -97,6 +97,8 @@ public: void parse(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); void generate(utility::outputStream& os, const string::size_type maxLineLength = lineLengthLimits::infinite, const string::size_type curLinePos = 0, string::size_type* newLinePos = NULL) const; + static headerField* parseNext(const string& buffer, const string::size_type position, const string::size_type end, string::size_type* newPosition = NULL); + private: string m_name;