third_party/WebKit/Source/platform/mhtml/MHTMLParser.cpp - chromium/src - Git at Google

 /*
  * Copyright (C) 2011 Google Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  *     * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
  *     * Neither the name of Google Inc. nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "platform/mhtml/MHTMLParser.h"

 #include "platform/mhtml/ArchiveResource.h"
 #include "platform/network/ParsedContentType.h"
 #include "platform/text/QuotedPrintable.h"
 #include "wtf/HashMap.h"
 #include "wtf/text/Base64.h"
 #include "wtf/text/StringBuilder.h"
 #include "wtf/text/StringConcatenate.h"
 #include "wtf/text/StringHash.h"
 #include "wtf/text/WTFString.h"

 namespace blink {

 // This class is a limited MIME parser used to parse the MIME headers of MHTML
 // files.
 class MIMEHeader : public GarbageCollectedFinalized<MIMEHeader> {
  public:
   static MIMEHeader* create() { return new MIMEHeader; }

   enum Encoding {
     QuotedPrintable,
     Base64,
     EightBit,
     SevenBit,
     Binary,
     Unknown
   };

   static MIMEHeader* parseHeader(SharedBufferChunkReader* crLFLineReader);

   bool isMultipart() const {
     return m_contentType.startsWith("multipart/", TextCaseInsensitive);
   }

   String contentType() const { return m_contentType; }
   String charset() const { return m_charset; }
   Encoding contentTransferEncoding() const { return m_contentTransferEncoding; }
   String contentLocation() const { return m_contentLocation; }
   String contentID() const { return m_contentID; }

   // Multi-part type and boundaries are only valid for multipart MIME headers.
   String multiPartType() const { return m_multipartType; }
   String endOfPartBoundary() const { return m_endOfPartBoundary; }
   String endOfDocumentBoundary() const { return m_endOfDocumentBoundary; }

   DEFINE_INLINE_TRACE() {}

  private:
   MIMEHeader();

   static Encoding parseContentTransferEncoding(const String&);

   String m_contentType;
   String m_charset;
   Encoding m_contentTransferEncoding;
   String m_contentLocation;
   String m_contentID;
   String m_multipartType;
   String m_endOfPartBoundary;
   String m_endOfDocumentBoundary;
 };

 typedef HashMap<String, String> KeyValueMap;

 static KeyValueMap retrieveKeyValuePairs(SharedBufferChunkReader* buffer) {
   KeyValueMap keyValuePairs;
   String line;
   String key;
   StringBuilder value;
   while (!(line = buffer->nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
     if (line.isEmpty())
       break;  // Empty line means end of key/value section.
     if (line[0] == '\t') {
       value.append(line.substring(1));
       continue;
     }
     // New key/value, store the previous one if any.
     if (!key.isEmpty()) {
       if (keyValuePairs.find(key) != keyValuePairs.end())
         DVLOG(1) << "Key duplicate found in MIME header. Key is '" << key
                  << "', previous value replaced.";
       keyValuePairs.add(key, value.toString().stripWhiteSpace());
       key = String();
       value.clear();
     }
     size_t semiColonIndex = line.find(':');
     if (semiColonIndex == kNotFound) {
       // This is not a key value pair, ignore.
       continue;
     }
     key = line.substring(0, semiColonIndex).lower().stripWhiteSpace();
     value.append(line.substring(semiColonIndex + 1));
   }
   // Store the last property if there is one.
   if (!key.isEmpty())
     keyValuePairs.set(key, value.toString().stripWhiteSpace());
   return keyValuePairs;
 }

 MIMEHeader* MIMEHeader::parseHeader(SharedBufferChunkReader* buffer) {
   MIMEHeader* mimeHeader = MIMEHeader::create();
   KeyValueMap keyValuePairs = retrieveKeyValuePairs(buffer);
   KeyValueMap::iterator mimeParametersIterator =
       keyValuePairs.find("content-type");
   if (mimeParametersIterator != keyValuePairs.end()) {
     ParsedContentType parsedContentType(mimeParametersIterator->value);
     mimeHeader->m_contentType = parsedContentType.mimeType();
     if (!mimeHeader->isMultipart()) {
       mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace();
     } else {
       mimeHeader->m_multipartType =
           parsedContentType.parameterValueForName("type");
       mimeHeader->m_endOfPartBoundary =
           parsedContentType.parameterValueForName("boundary");
       if (mimeHeader->m_endOfPartBoundary.isNull()) {
         DVLOG(1) << "No boundary found in multipart MIME header.";
         return nullptr;
       }
       mimeHeader->m_endOfPartBoundary.insert("--", 0);
       mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundary;
       mimeHeader->m_endOfDocumentBoundary.append("--");
     }
   }

   mimeParametersIterator = keyValuePairs.find("content-transfer-encoding");
   if (mimeParametersIterator != keyValuePairs.end())
     mimeHeader->m_contentTransferEncoding =
         parseContentTransferEncoding(mimeParametersIterator->value);

   mimeParametersIterator = keyValuePairs.find("content-location");
   if (mimeParametersIterator != keyValuePairs.end())
     mimeHeader->m_contentLocation = mimeParametersIterator->value;

   // See rfc2557 - section 8.3 - Use of the Content-ID header and CID URLs.
   mimeParametersIterator = keyValuePairs.find("content-id");
   if (mimeParametersIterator != keyValuePairs.end())
     mimeHeader->m_contentID = mimeParametersIterator->value;

   return mimeHeader;
 }

 MIMEHeader::Encoding MIMEHeader::parseContentTransferEncoding(
     const String& text) {
   String encoding = text.stripWhiteSpace().lower();
   if (encoding == "base64")
     return Base64;
   if (encoding == "quoted-printable")
     return QuotedPrintable;
   if (encoding == "8bit")
     return EightBit;
   if (encoding == "7bit")
     return SevenBit;
   if (encoding == "binary")
     return Binary;
   DVLOG(1) << "Unknown encoding '" << text << "' found in MIME header.";
   return Unknown;
 }

 MIMEHeader::MIMEHeader() : m_contentTransferEncoding(Unknown) {}

 static bool skipLinesUntilBoundaryFound(SharedBufferChunkReader& lineReader,
                                         const String& boundary) {
   String line;
   while (
       !(line = lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
     if (line == boundary)
       return true;
   }
   return false;
 }

 MHTMLParser::MHTMLParser(PassRefPtr<const SharedBuffer> data)
     : m_lineReader(std::move(data), "\r\n") {}

 HeapVector<Member<ArchiveResource>> MHTMLParser::parseArchive() {
   MIMEHeader* header = MIMEHeader::parseHeader(&m_lineReader);
   HeapVector<Member<ArchiveResource>> resources;
   if (!parseArchiveWithHeader(header, resources))
     resources.clear();
   return resources;
 }

 bool MHTMLParser::parseArchiveWithHeader(
     MIMEHeader* header,
     HeapVector<Member<ArchiveResource>>& resources) {
   if (!header) {
     DVLOG(1) << "Failed to parse MHTML part: no header.";
     return false;
   }

   if (!header->isMultipart()) {
     // With IE a page with no resource is not multi-part.
     bool endOfArchiveReached = false;
     ArchiveResource* resource =
         parseNextPart(*header, String(), String(), endOfArchiveReached);
     if (!resource)
       return false;
     resources.append(resource);
     return true;
   }

   // Skip the message content (it's a generic browser specific message).
   skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary());

   bool endOfArchive = false;
   while (!endOfArchive) {
     MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader);
     if (!resourceHeader) {
       DVLOG(1) << "Failed to parse MHTML, invalid MIME header.";
       return false;
     }
     if (resourceHeader->contentType() == "multipart/alternative") {
       // Ignore IE nesting which makes little sense (IE seems to nest only some
       // of the frames).
       if (!parseArchiveWithHeader(resourceHeader, resources)) {
         DVLOG(1) << "Failed to parse MHTML subframe.";
         return false;
       }
       skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary());
       continue;
     }

     ArchiveResource* resource =
         parseNextPart(*resourceHeader, header->endOfPartBoundary(),
                       header->endOfDocumentBoundary(), endOfArchive);
     if (!resource) {
       DVLOG(1) << "Failed to parse MHTML part.";
       return false;
     }
     resources.append(resource);
   }
   return true;
 }

 ArchiveResource* MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader,
                                             const String& endOfPartBoundary,
                                             const String& endOfDocumentBoundary,
                                             bool& endOfArchiveReached) {
   ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty());

   // If no content transfer encoding is specified, default to binary encoding.
   MIMEHeader::Encoding contentTransferEncoding =
       mimeHeader.contentTransferEncoding();
   if (contentTransferEncoding == MIMEHeader::Unknown)
     contentTransferEncoding = MIMEHeader::Binary;

   RefPtr<SharedBuffer> content = SharedBuffer::create();
   const bool checkBoundary = !endOfPartBoundary.isEmpty();
   bool endOfPartReached = false;
   if (contentTransferEncoding == MIMEHeader::Binary) {
     if (!checkBoundary) {
       DVLOG(1) << "Binary contents requires end of part";
       return nullptr;
     }
     m_lineReader.setSeparator(endOfPartBoundary.utf8().data());
     Vector<char> part;
     if (!m_lineReader.nextChunk(part)) {
       DVLOG(1) << "Binary contents requires end of part";
       return nullptr;
     }
     content->append(part);
     m_lineReader.setSeparator("\r\n");
     Vector<char> nextChars;
     if (m_lineReader.peek(nextChars, 2) != 2) {
       DVLOG(1) << "Invalid seperator.";
       return nullptr;
     }
     endOfPartReached = true;
     ASSERT(nextChars.size() == 2);
     endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-');
     if (!endOfArchiveReached) {
       String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback();
       if (!line.isEmpty()) {
         DVLOG(1) << "No CRLF at end of binary section.";
         return nullptr;
       }
     }
   } else {
     String line;
     while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback())
                 .isNull()) {
       endOfArchiveReached = (line == endOfDocumentBoundary);
       if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReached)) {
         endOfPartReached = true;
         break;
       }
       // Note that we use line.utf8() and not line.ascii() as ascii turns
       // special characters (such as tab, line-feed...) into '?'.
       content->append(line.utf8().data(), line.length());
       if (contentTransferEncoding == MIMEHeader::QuotedPrintable) {
         // The line reader removes the \r\n, but we need them for the content in
         // this case as the QuotedPrintable decoder expects CR-LF terminated
         // lines.
         content->append("\r\n", 2u);
       }
     }
   }
   if (!endOfPartReached && checkBoundary) {
     DVLOG(1) << "No boundary found for MHTML part.";
     return nullptr;
   }

   Vector<char> data;
   switch (contentTransferEncoding) {
     case MIMEHeader::Base64:
       if (!base64Decode(content->data(), content->size(), data)) {
         DVLOG(1) << "Invalid base64 content for MHTML part.";
         return nullptr;
       }
       break;
     case MIMEHeader::QuotedPrintable:
       quotedPrintableDecode(content->data(), content->size(), data);
       break;
     case MIMEHeader::EightBit:
     case MIMEHeader::SevenBit:
     case MIMEHeader::Binary:
       data.append(content->data(), content->size());
       break;
     default:
       DVLOG(1) << "Invalid encoding for MHTML part.";
       return nullptr;
   }
   RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data);
   // FIXME: the URL in the MIME header could be relative, we should resolve it
   // if it is.  The specs mentions 5 ways to resolve a URL:
   // http://tools.ietf.org/html/rfc2557#section-5
   // IE and Firefox (UNMht) seem to generate only absolute URLs.
   KURL location = KURL(KURL(), mimeHeader.contentLocation());
   return ArchiveResource::create(contentBuffer, location,
                                  mimeHeader.contentID(),
                                  AtomicString(mimeHeader.contentType()),
                                  AtomicString(mimeHeader.charset()));
 }

 // static
 KURL MHTMLParser::convertContentIDToURI(const String& contentID) {
   // This function is based primarily on an example from rfc2557 in section
   // 9.5, but also based on more normative parts of specs like:
   // - rfc2557 - MHTML - section 8.3 - "Use of the Content-ID header and CID
   //                                    URLs"
   // - rfc1738 - URL - section 4 (reserved scheme names;  includes "cid")
   // - rfc2387 - multipart/related - section 3.4 - "Syntax" (cid := msg-id)
   // - rfc0822 - msg-id = "<" addr-spec ">"; addr-spec = local-part "@" domain

   if (contentID.length() <= 2)
     return KURL();

   if (!contentID.startsWith('<') || !contentID.endsWith('>'))
     return KURL();

   StringBuilder uriBuilder;
   uriBuilder.append("cid:");
   uriBuilder.append(contentID, 1, contentID.length() - 2);
   return KURL(KURL(), uriBuilder.toString());
 }

 }  // namespace blink
	/*
	* Copyright (C) 2011 Google Inc. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions are
	* met:
	*
	* * Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* * Redistributions in binary form must reproduce the above
	* copyright notice, this list of conditions and the following disclaimer
	* in the documentation and/or other materials provided with the
	* distribution.
	* * Neither the name of Google Inc. nor the names of its
	* contributors may be used to endorse or promote products derived from
	* this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#include "platform/mhtml/MHTMLParser.h"

	#include "platform/mhtml/ArchiveResource.h"
	#include "platform/network/ParsedContentType.h"
	#include "platform/text/QuotedPrintable.h"
	#include "wtf/HashMap.h"
	#include "wtf/text/Base64.h"
	#include "wtf/text/StringBuilder.h"
	#include "wtf/text/StringConcatenate.h"
	#include "wtf/text/StringHash.h"
	#include "wtf/text/WTFString.h"

	namespace blink {

	// This class is a limited MIME parser used to parse the MIME headers of MHTML
	// files.
	class MIMEHeader : public GarbageCollectedFinalized<MIMEHeader> {
	public:
	static MIMEHeader* create() { return new MIMEHeader; }

	enum Encoding {
	QuotedPrintable,
	Base64,
	EightBit,
	SevenBit,
	Binary,
	Unknown
	};

	static MIMEHeader* parseHeader(SharedBufferChunkReader* crLFLineReader);

	bool isMultipart() const {
	return m_contentType.startsWith("multipart/", TextCaseInsensitive);
	}

	String contentType() const { return m_contentType; }
	String charset() const { return m_charset; }
	Encoding contentTransferEncoding() const { return m_contentTransferEncoding; }
	String contentLocation() const { return m_contentLocation; }
	String contentID() const { return m_contentID; }

	// Multi-part type and boundaries are only valid for multipart MIME headers.
	String multiPartType() const { return m_multipartType; }
	String endOfPartBoundary() const { return m_endOfPartBoundary; }
	String endOfDocumentBoundary() const { return m_endOfDocumentBoundary; }

	DEFINE_INLINE_TRACE() {}

	private:
	MIMEHeader();

	static Encoding parseContentTransferEncoding(const String&);

	String m_contentType;
	String m_charset;
	Encoding m_contentTransferEncoding;
	String m_contentLocation;
	String m_contentID;
	String m_multipartType;
	String m_endOfPartBoundary;
	String m_endOfDocumentBoundary;
	};

	typedef HashMap<String, String> KeyValueMap;

	static KeyValueMap retrieveKeyValuePairs(SharedBufferChunkReader* buffer) {
	KeyValueMap keyValuePairs;
	String line;
	String key;
	StringBuilder value;
	while (!(line = buffer->nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
	if (line.isEmpty())
	break; // Empty line means end of key/value section.
	if (line[0] == '\t') {
	value.append(line.substring(1));
	continue;
	}
	// New key/value, store the previous one if any.
	if (!key.isEmpty()) {
	if (keyValuePairs.find(key) != keyValuePairs.end())
	DVLOG(1) << "Key duplicate found in MIME header. Key is '" << key
	<< "', previous value replaced.";
	keyValuePairs.add(key, value.toString().stripWhiteSpace());
	key = String();
	value.clear();
	}
	size_t semiColonIndex = line.find(':');
	if (semiColonIndex == kNotFound) {
	// This is not a key value pair, ignore.
	continue;
	}
	key = line.substring(0, semiColonIndex).lower().stripWhiteSpace();
	value.append(line.substring(semiColonIndex + 1));
	}
	// Store the last property if there is one.
	if (!key.isEmpty())
	keyValuePairs.set(key, value.toString().stripWhiteSpace());
	return keyValuePairs;
	}

	MIMEHeader* MIMEHeader::parseHeader(SharedBufferChunkReader* buffer) {
	MIMEHeader* mimeHeader = MIMEHeader::create();
	KeyValueMap keyValuePairs = retrieveKeyValuePairs(buffer);
	KeyValueMap::iterator mimeParametersIterator =
	keyValuePairs.find("content-type");
	if (mimeParametersIterator != keyValuePairs.end()) {
	ParsedContentType parsedContentType(mimeParametersIterator->value);
	mimeHeader->m_contentType = parsedContentType.mimeType();
	if (!mimeHeader->isMultipart()) {
	mimeHeader->m_charset = parsedContentType.charset().stripWhiteSpace();
	} else {
	mimeHeader->m_multipartType =
	parsedContentType.parameterValueForName("type");
	mimeHeader->m_endOfPartBoundary =
	parsedContentType.parameterValueForName("boundary");
	if (mimeHeader->m_endOfPartBoundary.isNull()) {
	DVLOG(1) << "No boundary found in multipart MIME header.";
	return nullptr;
	}
	mimeHeader->m_endOfPartBoundary.insert("--", 0);
	mimeHeader->m_endOfDocumentBoundary = mimeHeader->m_endOfPartBoundary;
	mimeHeader->m_endOfDocumentBoundary.append("--");
	}
	}

	mimeParametersIterator = keyValuePairs.find("content-transfer-encoding");
	if (mimeParametersIterator != keyValuePairs.end())
	mimeHeader->m_contentTransferEncoding =
	parseContentTransferEncoding(mimeParametersIterator->value);

	mimeParametersIterator = keyValuePairs.find("content-location");
	if (mimeParametersIterator != keyValuePairs.end())
	mimeHeader->m_contentLocation = mimeParametersIterator->value;

	// See rfc2557 - section 8.3 - Use of the Content-ID header and CID URLs.
	mimeParametersIterator = keyValuePairs.find("content-id");
	if (mimeParametersIterator != keyValuePairs.end())
	mimeHeader->m_contentID = mimeParametersIterator->value;

	return mimeHeader;
	}

	MIMEHeader::Encoding MIMEHeader::parseContentTransferEncoding(
	const String& text) {
	String encoding = text.stripWhiteSpace().lower();
	if (encoding == "base64")
	return Base64;
	if (encoding == "quoted-printable")
	return QuotedPrintable;
	if (encoding == "8bit")
	return EightBit;
	if (encoding == "7bit")
	return SevenBit;
	if (encoding == "binary")
	return Binary;
	DVLOG(1) << "Unknown encoding '" << text << "' found in MIME header.";
	return Unknown;
	}

	MIMEHeader::MIMEHeader() : m_contentTransferEncoding(Unknown) {}

	static bool skipLinesUntilBoundaryFound(SharedBufferChunkReader& lineReader,
	const String& boundary) {
	String line;
	while (
	!(line = lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
	if (line == boundary)
	return true;
	}
	return false;
	}

	MHTMLParser::MHTMLParser(PassRefPtr<const SharedBuffer> data)
	: m_lineReader(std::move(data), "\r\n") {}

	HeapVector<Member<ArchiveResource>> MHTMLParser::parseArchive() {
	MIMEHeader* header = MIMEHeader::parseHeader(&m_lineReader);
	HeapVector<Member<ArchiveResource>> resources;
	if (!parseArchiveWithHeader(header, resources))
	resources.clear();
	return resources;
	}

	bool MHTMLParser::parseArchiveWithHeader(
	MIMEHeader* header,
	HeapVector<Member<ArchiveResource>>& resources) {
	if (!header) {
	DVLOG(1) << "Failed to parse MHTML part: no header.";
	return false;
	}

	if (!header->isMultipart()) {
	// With IE a page with no resource is not multi-part.
	bool endOfArchiveReached = false;
	ArchiveResource* resource =
	parseNextPart(*header, String(), String(), endOfArchiveReached);
	if (!resource)
	return false;
	resources.append(resource);
	return true;
	}

	// Skip the message content (it's a generic browser specific message).
	skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary());

	bool endOfArchive = false;
	while (!endOfArchive) {
	MIMEHeader* resourceHeader = MIMEHeader::parseHeader(&m_lineReader);
	if (!resourceHeader) {
	DVLOG(1) << "Failed to parse MHTML, invalid MIME header.";
	return false;
	}
	if (resourceHeader->contentType() == "multipart/alternative") {
	// Ignore IE nesting which makes little sense (IE seems to nest only some
	// of the frames).
	if (!parseArchiveWithHeader(resourceHeader, resources)) {
	DVLOG(1) << "Failed to parse MHTML subframe.";
	return false;
	}
	skipLinesUntilBoundaryFound(m_lineReader, header->endOfPartBoundary());
	continue;
	}

	ArchiveResource* resource =
	parseNextPart(*resourceHeader, header->endOfPartBoundary(),
	header->endOfDocumentBoundary(), endOfArchive);
	if (!resource) {
	DVLOG(1) << "Failed to parse MHTML part.";
	return false;
	}
	resources.append(resource);
	}
	return true;
	}

	ArchiveResource* MHTMLParser::parseNextPart(const MIMEHeader& mimeHeader,
	const String& endOfPartBoundary,
	const String& endOfDocumentBoundary,
	bool& endOfArchiveReached) {
	ASSERT(endOfPartBoundary.isEmpty() == endOfDocumentBoundary.isEmpty());

	// If no content transfer encoding is specified, default to binary encoding.
	MIMEHeader::Encoding contentTransferEncoding =
	mimeHeader.contentTransferEncoding();
	if (contentTransferEncoding == MIMEHeader::Unknown)
	contentTransferEncoding = MIMEHeader::Binary;

	RefPtr<SharedBuffer> content = SharedBuffer::create();
	const bool checkBoundary = !endOfPartBoundary.isEmpty();
	bool endOfPartReached = false;
	if (contentTransferEncoding == MIMEHeader::Binary) {
	if (!checkBoundary) {
	DVLOG(1) << "Binary contents requires end of part";
	return nullptr;
	}
	m_lineReader.setSeparator(endOfPartBoundary.utf8().data());
	Vector<char> part;
	if (!m_lineReader.nextChunk(part)) {
	DVLOG(1) << "Binary contents requires end of part";
	return nullptr;
	}
	content->append(part);
	m_lineReader.setSeparator("\r\n");
	Vector<char> nextChars;
	if (m_lineReader.peek(nextChars, 2) != 2) {
	DVLOG(1) << "Invalid seperator.";
	return nullptr;
	}
	endOfPartReached = true;
	ASSERT(nextChars.size() == 2);
	endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-');
	if (!endOfArchiveReached) {
	String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback();
	if (!line.isEmpty()) {
	DVLOG(1) << "No CRLF at end of binary section.";
	return nullptr;
	}
	}
	} else {
	String line;
	while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback())
	.isNull()) {
	endOfArchiveReached = (line == endOfDocumentBoundary);
	if (checkBoundary && (line == endOfPartBoundary \|\| endOfArchiveReached)) {
	endOfPartReached = true;
	break;
	}
	// Note that we use line.utf8() and not line.ascii() as ascii turns
	// special characters (such as tab, line-feed...) into '?'.
	content->append(line.utf8().data(), line.length());
	if (contentTransferEncoding == MIMEHeader::QuotedPrintable) {
	// The line reader removes the \r\n, but we need them for the content in
	// this case as the QuotedPrintable decoder expects CR-LF terminated
	// lines.
	content->append("\r\n", 2u);
	}
	}
	}
	if (!endOfPartReached && checkBoundary) {
	DVLOG(1) << "No boundary found for MHTML part.";
	return nullptr;
	}

	Vector<char> data;
	switch (contentTransferEncoding) {
	case MIMEHeader::Base64:
	if (!base64Decode(content->data(), content->size(), data)) {
	DVLOG(1) << "Invalid base64 content for MHTML part.";
	return nullptr;
	}
	break;
	case MIMEHeader::QuotedPrintable:
	quotedPrintableDecode(content->data(), content->size(), data);
	break;
	case MIMEHeader::EightBit:
	case MIMEHeader::SevenBit:
	case MIMEHeader::Binary:
	data.append(content->data(), content->size());
	break;
	default:
	DVLOG(1) << "Invalid encoding for MHTML part.";
	return nullptr;
	}
	RefPtr<SharedBuffer> contentBuffer = SharedBuffer::adoptVector(data);
	// FIXME: the URL in the MIME header could be relative, we should resolve it
	// if it is. The specs mentions 5 ways to resolve a URL:
	// http://tools.ietf.org/html/rfc2557#section-5
	// IE and Firefox (UNMht) seem to generate only absolute URLs.
	KURL location = KURL(KURL(), mimeHeader.contentLocation());
	return ArchiveResource::create(contentBuffer, location,
	mimeHeader.contentID(),
	AtomicString(mimeHeader.contentType()),
	AtomicString(mimeHeader.charset()));
	}

	// static
	KURL MHTMLParser::convertContentIDToURI(const String& contentID) {
	// This function is based primarily on an example from rfc2557 in section
	// 9.5, but also based on more normative parts of specs like:
	// - rfc2557 - MHTML - section 8.3 - "Use of the Content-ID header and CID
	// URLs"
	// - rfc1738 - URL - section 4 (reserved scheme names; includes "cid")
	// - rfc2387 - multipart/related - section 3.4 - "Syntax" (cid := msg-id)
	// - rfc0822 - msg-id = "<" addr-spec ">"; addr-spec = local-part "@" domain

	if (contentID.length() <= 2)
	return KURL();

	if (!contentID.startsWith('<') \|\| !contentID.endsWith('>'))
	return KURL();

	StringBuilder uriBuilder;
	uriBuilder.append("cid:");
	uriBuilder.append(contentID, 1, contentID.length() - 2);
	return KURL(KURL(), uriBuilder.toString());
	}

	} // namespace blink