third_party/WebKit/Source/core/html/parser/HTMLConstructionSite.cpp - chromium/src - Git at Google

 /*
  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
  * Copyright (C) 2011 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #include "core/html/parser/HTMLConstructionSite.h"

 #include "bindings/core/v8/Microtask.h"
 #include "bindings/core/v8/V8PerIsolateData.h"
 #include "core/HTMLElementFactory.h"
 #include "core/HTMLNames.h"
 #include "core/dom/Comment.h"
 #include "core/dom/DocumentFragment.h"
 #include "core/dom/DocumentType.h"
 #include "core/dom/Element.h"
 #include "core/dom/ElementTraversal.h"
 #include "core/dom/IgnoreDestructiveWriteCountIncrementer.h"
 #include "core/dom/ScriptLoader.h"
 #include "core/dom/TemplateContentDocumentFragment.h"
 #include "core/dom/Text.h"
 #include "core/dom/ThrowOnDynamicMarkupInsertionCountIncrementer.h"
 #include "core/dom/custom/CEReactionsScope.h"
 #include "core/dom/custom/CustomElementDefinition.h"
 #include "core/dom/custom/CustomElementDescriptor.h"
 #include "core/dom/custom/CustomElementRegistry.h"
 #include "core/frame/LocalDOMWindow.h"
 #include "core/frame/LocalFrame.h"
 #include "core/html/HTMLFormElement.h"
 #include "core/html/HTMLHtmlElement.h"
 #include "core/html/HTMLPlugInElement.h"
 #include "core/html/HTMLScriptElement.h"
 #include "core/html/HTMLTemplateElement.h"
 #include "core/html/parser/AtomicHTMLToken.h"
 #include "core/html/parser/HTMLParserIdioms.h"
 #include "core/html/parser/HTMLParserReentryPermit.h"
 #include "core/html/parser/HTMLStackItem.h"
 #include "core/html/parser/HTMLToken.h"
 #include "core/loader/FrameLoader.h"
 #include "core/loader/FrameLoaderClient.h"
 #include "core/svg/SVGScriptElement.h"
 #include "platform/text/TextBreakIterator.h"
 #include <limits>

 namespace blink {

 using namespace HTMLNames;

 static const unsigned maximumHTMLParserDOMTreeDepth = 512;

 static inline void setAttributes(Element* element,
                                  AtomicHTMLToken* token,
                                  ParserContentPolicy parserContentPolicy) {
   if (!scriptingContentIsAllowed(parserContentPolicy))
     element->stripScriptingAttributes(token->attributes());
   element->parserSetAttributes(token->attributes());
 }

 static bool hasImpliedEndTag(const HTMLStackItem* item) {
   return item->hasTagName(ddTag) || item->hasTagName(dtTag) ||
          item->hasTagName(liTag) || item->hasTagName(optionTag) ||
          item->hasTagName(optgroupTag) || item->hasTagName(pTag) ||
          item->hasTagName(rbTag) || item->hasTagName(rpTag) ||
          item->hasTagName(rtTag) || item->hasTagName(rtcTag);
 }

 static bool shouldUseLengthLimit(const ContainerNode& node) {
   return !isHTMLScriptElement(node) && !isHTMLStyleElement(node) &&
          !isSVGScriptElement(node);
 }

 static unsigned textLengthLimitForContainer(const ContainerNode& node) {
   return shouldUseLengthLimit(node) ? Text::defaultLengthLimit
                                     : std::numeric_limits<unsigned>::max();
 }

 static inline bool isAllWhitespace(const String& string) {
   return string.isAllSpecialCharacters<isHTMLSpace<UChar>>();
 }

 static inline void insert(HTMLConstructionSiteTask& task) {
   if (isHTMLTemplateElement(*task.parent))
     task.parent = toHTMLTemplateElement(task.parent.get())->content();

   if (task.nextChild)
     task.parent->parserInsertBefore(task.child.get(), *task.nextChild);
   else
     task.parent->parserAppendChild(task.child.get());
 }

 static inline void executeInsertTask(HTMLConstructionSiteTask& task) {
   ASSERT(task.operation == HTMLConstructionSiteTask::Insert);

   insert(task);

   if (task.child->isElementNode()) {
     Element& child = toElement(*task.child);
     child.beginParsingChildren();
     if (task.selfClosing)
       child.finishParsingChildren();
   }
 }

 static inline void executeInsertTextTask(HTMLConstructionSiteTask& task) {
   ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
   ASSERT(task.child->isTextNode());

   // Merge text nodes into previous ones if possible:
   // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
   Text* newText = toText(task.child.get());
   Node* previousChild = task.nextChild ? task.nextChild->previousSibling()
                                        : task.parent->lastChild();
   if (previousChild && previousChild->isTextNode()) {
     Text* previousText = toText(previousChild);
     unsigned lengthLimit = textLengthLimitForContainer(*task.parent);
     if (previousText->length() + newText->length() < lengthLimit) {
       previousText->parserAppendData(newText->data());
       return;
     }
   }

   insert(task);
 }

 static inline void executeReparentTask(HTMLConstructionSiteTask& task) {
   ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);

   task.parent->parserAppendChild(task.child);
 }

 static inline void executeInsertAlreadyParsedChildTask(
     HTMLConstructionSiteTask& task) {
   ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);

   insert(task);
 }

 static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task) {
   ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren);

   task.parent->parserTakeAllChildrenFrom(*task.oldParent());
 }

 void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task) {
   ASSERT(m_taskQueue.isEmpty());
   if (task.operation == HTMLConstructionSiteTask::Insert)
     return executeInsertTask(task);

   if (task.operation == HTMLConstructionSiteTask::InsertText)
     return executeInsertTextTask(task);

   // All the cases below this point are only used by the adoption agency.

   if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)
     return executeInsertAlreadyParsedChildTask(task);

   if (task.operation == HTMLConstructionSiteTask::Reparent)
     return executeReparentTask(task);

   if (task.operation == HTMLConstructionSiteTask::TakeAllChildren)
     return executeTakeAllChildrenTask(task);

   ASSERT_NOT_REACHED();
 }

 // This is only needed for TextDocuments where we might have text nodes
 // approaching the default length limit (~64k) and we don't want to break a text
 // node in the middle of a combining character.
 static unsigned findBreakIndexBetween(const StringBuilder& string,
                                       unsigned currentPosition,
                                       unsigned proposedBreakIndex) {
   ASSERT(currentPosition < proposedBreakIndex);
   ASSERT(proposedBreakIndex <= string.length());
   // The end of the string is always a valid break.
   if (proposedBreakIndex == string.length())
     return proposedBreakIndex;

   // Latin-1 does not have breakable boundaries. If we ever moved to a different
   // 8-bit encoding this could be wrong.
   if (string.is8Bit())
     return proposedBreakIndex;

   const UChar* breakSearchCharacters = string.characters16() + currentPosition;
   // We need at least two characters look-ahead to account for UTF-16
   // surrogates, but can't search off the end of the buffer!
   unsigned breakSearchLength =
       std::min(proposedBreakIndex - currentPosition + 2,
                string.length() - currentPosition);
   NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength);

   if (it.isBreak(proposedBreakIndex - currentPosition))
     return proposedBreakIndex;

   int adjustedBreakIndexInSubstring =
       it.preceding(proposedBreakIndex - currentPosition);
   if (adjustedBreakIndexInSubstring > 0)
     return currentPosition + adjustedBreakIndexInSubstring;
   // We failed to find a breakable point, let the caller figure out what to do.
   return 0;
 }

 static String atomizeIfAllWhitespace(const String& string,
                                      WhitespaceMode whitespaceMode) {
   // Strings composed entirely of whitespace are likely to be repeated. Turn
   // them into AtomicString so we share a single string for each.
   if (whitespaceMode == AllWhitespace ||
       (whitespaceMode == WhitespaceUnknown && isAllWhitespace(string)))
     return AtomicString(string).getString();
   return string;
 }

 void HTMLConstructionSite::flushPendingText(FlushMode mode) {
   if (m_pendingText.isEmpty())
     return;

   if (mode == FlushIfAtTextLimit &&
       !shouldUseLengthLimit(*m_pendingText.parent))
     return;

   PendingText pendingText;
   // Hold onto the current pending text on the stack so that queueTask doesn't
   // recurse infinitely.
   m_pendingText.swap(pendingText);
   ASSERT(m_pendingText.isEmpty());

   // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is
   // necessary for performance, see:
   // https://bugs.webkit.org/show_bug.cgi?id=55898
   unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent);

   unsigned currentPosition = 0;
   const StringBuilder& string = pendingText.stringBuilder;
   while (currentPosition < string.length()) {
     unsigned proposedBreakIndex =
         std::min(currentPosition + lengthLimit, string.length());
     unsigned breakIndex =
         findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
     ASSERT(breakIndex <= string.length());
     String substring =
         string.substring(currentPosition, breakIndex - currentPosition);
     substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode);

     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
     task.parent = pendingText.parent;
     task.nextChild = pendingText.nextChild;
     task.child = Text::create(task.parent->document(), substring);
     queueTask(task);

     ASSERT(breakIndex > currentPosition);
     ASSERT(breakIndex - currentPosition == substring.length());
     ASSERT(toText(task.child.get())->length() == substring.length());
     currentPosition = breakIndex;
   }
 }

 void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task) {
   flushPendingText(FlushAlways);
   ASSERT(m_pendingText.isEmpty());
   m_taskQueue.append(task);
 }

 void HTMLConstructionSite::attachLater(ContainerNode* parent,
                                        Node* child,
                                        bool selfClosing) {
   ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) ||
          !child->isElementNode() ||
          !toScriptLoaderIfPossible(toElement(child)));
   ASSERT(pluginContentIsAllowed(m_parserContentPolicy) ||
          !isHTMLPlugInElement(child));

   HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
   task.parent = parent;
   task.child = child;
   task.selfClosing = selfClosing;

   if (shouldFosterParent()) {
     fosterParent(task.child);
     return;
   }

   // Add as a sibling of the parent if we have reached the maximum depth
   // allowed.
   if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth &&
       task.parent->parentNode())
     task.parent = task.parent->parentNode();

   ASSERT(task.parent);
   queueTask(task);
 }

 void HTMLConstructionSite::executeQueuedTasks() {
   // This has no affect on pendingText, and we may have pendingText remaining
   // after executing all other queued tasks.
   const size_t size = m_taskQueue.size();
   if (!size)
     return;

   // Copy the task queue into a local variable in case executeTask re-enters the
   // parser.
   TaskQueue queue;
   queue.swap(m_taskQueue);

   for (size_t i = 0; i < size; ++i)
     executeTask(queue[i]);

   // We might be detached now.
 }

 HTMLConstructionSite::HTMLConstructionSite(
     HTMLParserReentryPermit* reentryPermit,
     Document& document,
     ParserContentPolicy parserContentPolicy)
     : m_reentryPermit(reentryPermit),
       m_document(&document),
       m_attachmentRoot(document),
       m_parserContentPolicy(parserContentPolicy),
       m_isParsingFragment(false),
       m_redirectAttachToFosterParent(false),
       m_inQuirksMode(document.inQuirksMode()) {
   ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
 }

 void HTMLConstructionSite::initFragmentParsing(DocumentFragment* fragment,
                                                Element* contextElement) {
   DCHECK(contextElement);
   DCHECK_EQ(m_document, &fragment->document());
   DCHECK_EQ(m_inQuirksMode, fragment->document().inQuirksMode());
   DCHECK(!m_isParsingFragment);
   DCHECK(!m_form);

   m_attachmentRoot = fragment;
   m_isParsingFragment = true;

   if (!contextElement->document().isTemplateDocument())
     m_form = Traversal<HTMLFormElement>::firstAncestorOrSelf(*contextElement);
 }

 HTMLConstructionSite::~HTMLConstructionSite() {
   // Depending on why we're being destroyed it might be OK to forget queued
   // tasks, but currently we don't expect to.
   ASSERT(m_taskQueue.isEmpty());
   // Currently we assume that text will never be the last token in the document
   // and that we'll always queue some additional task to cause it to flush.
   ASSERT(m_pendingText.isEmpty());
 }

 DEFINE_TRACE(HTMLConstructionSite) {
   visitor->trace(m_document);
   visitor->trace(m_attachmentRoot);
   visitor->trace(m_head);
   visitor->trace(m_form);
   visitor->trace(m_openElements);
   visitor->trace(m_activeFormattingElements);
   visitor->trace(m_taskQueue);
   visitor->trace(m_pendingText);
 }

 void HTMLConstructionSite::detach() {
   // FIXME: We'd like to ASSERT here that we're canceling and not just
   // discarding text that really should have made it into the DOM earlier, but
   // there doesn't seem to be a nice way to do that.
   m_pendingText.discard();
   m_document = nullptr;
   m_attachmentRoot = nullptr;
 }

 HTMLFormElement* HTMLConstructionSite::takeForm() {
   return m_form.release();
 }

 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(
     AtomicHTMLToken* token) {
   ASSERT(m_document);
   HTMLHtmlElement* element = HTMLHtmlElement::create(*m_document);
   setAttributes(element, token, m_parserContentPolicy);
   attachLater(m_attachmentRoot, element);
   m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));

   executeQueuedTasks();
   element->insertedByParser();
 }

 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(
     AtomicHTMLToken* token,
     Element* element) {
   if (token->attributes().isEmpty())
     return;

   for (unsigned i = 0; i < token->attributes().size(); ++i) {
     const Attribute& tokenAttribute = token->attributes().at(i);
     if (element->attributesWithoutUpdate().findIndex(tokenAttribute.name()) ==
         kNotFound)
       element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
   }
 }

 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(
     AtomicHTMLToken* token) {
   // Fragments do not have a root HTML element, so any additional HTML elements
   // encountered during fragment parsing should be ignored.
   if (m_isParsingFragment)
     return;

   mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
 }

 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(
     AtomicHTMLToken* token) {
   mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
 }

 void HTMLConstructionSite::setDefaultCompatibilityMode() {
   if (m_isParsingFragment)
     return;
   setCompatibilityMode(Document::QuirksMode);
 }

 void HTMLConstructionSite::setCompatibilityMode(
     Document::CompatibilityMode mode) {
   m_inQuirksMode = (mode == Document::QuirksMode);
   m_document->setCompatibilityMode(mode);
 }

 void HTMLConstructionSite::setCompatibilityModeFromDoctype(
     const String& name,
     const String& publicId,
     const String& systemId) {
   // There are three possible compatibility modes:
   // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in
   // this mode, e.g., unit types can be omitted from numbers.
   // Limited Quirks - This mode is identical to no-quirks mode except for its
   // treatment of line-height in the inline box model.
   // No Quirks - no quirks apply. Web pages will obey the specifications to the
   // letter.

   // Check for Quirks Mode.
   if (name != "html" ||
       publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//",
                           TextCaseInsensitive) ||
       publicId.startsWith(
           "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//",
           TextCaseInsensitive) ||
       publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML 2.0//", TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML 2.1E//", TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML 3.0//", TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML 3.2 Final//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML 3.2//", TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML 3//", TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML Level 0//", TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML Level 1//", TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML Level 2//", TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML Level 3//", TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML Strict Level 0//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML Strict Level 1//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML Strict Level 2//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML Strict Level 3//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML Strict//", TextCaseInsensitive) ||
       publicId.startsWith("-//IETF//DTD HTML//", TextCaseInsensitive) ||
       publicId.startsWith("-//Metrius//DTD Metrius Presentational//",
                           TextCaseInsensitive) ||
       publicId.startsWith(
           "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//",
           TextCaseInsensitive) ||
       publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//",
                           TextCaseInsensitive) ||
       publicId.startsWith(
           "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//",
           TextCaseInsensitive) ||
       publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//",
                           TextCaseInsensitive) ||
       publicId.startsWith(
           "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//",
           TextCaseInsensitive) ||
       publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO "
                           "6.0::19990601::extensions to HTML 4.0//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO "
                           "4.0::19971010::extensions to HTML 4.0//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//",
                           TextCaseInsensitive) ||
       publicId.startsWith(
           "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//",
           TextCaseInsensitive) ||
       publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//W3C//DTD HTML 3.2 Final//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//W3C//DTD HTML 3.2//", TextCaseInsensitive) ||
       publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//W3C//DTD HTML Experimental 970421//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//W3C//DTD W3 HTML//", TextCaseInsensitive) ||
       publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", TextCaseInsensitive) ||
       equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//") ||
       publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//WebTechs//DTD Mozilla HTML//",
                           TextCaseInsensitive) ||
       equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN") ||
       equalIgnoringCase(publicId, "HTML") ||
       equalIgnoringCase(
           systemId,
           "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") ||
       (systemId.isEmpty() &&
        publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//",
                            TextCaseInsensitive)) ||
       (systemId.isEmpty() &&
        publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//",
                            TextCaseInsensitive))) {
     setCompatibilityMode(Document::QuirksMode);
     return;
   }

   // Check for Limited Quirks Mode.
   if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//",
                           TextCaseInsensitive) ||
       publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//",
                           TextCaseInsensitive) ||
       (!systemId.isEmpty() &&
        publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//",
                            TextCaseInsensitive)) ||
       (!systemId.isEmpty() &&
        publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//",
                            TextCaseInsensitive))) {
     setCompatibilityMode(Document::LimitedQuirksMode);
     return;
   }

   // Otherwise we are No Quirks Mode.
   setCompatibilityMode(Document::NoQuirksMode);
 }

 void HTMLConstructionSite::processEndOfFile() {
   ASSERT(currentNode());
   flush(FlushAlways);
   openElements()->popAll();
 }

 void HTMLConstructionSite::finishedParsing() {
   // We shouldn't have any queued tasks but we might have pending text which we
   // need to promote to tasks and execute.
   ASSERT(m_taskQueue.isEmpty());
   flush(FlushAlways);
   m_document->finishedParsing();
 }

 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token) {
   ASSERT(token->type() == HTMLToken::DOCTYPE);

   const String& publicId =
       StringImpl::create8BitIfPossible(token->publicIdentifier());
   const String& systemId =
       StringImpl::create8BitIfPossible(token->systemIdentifier());
   DocumentType* doctype =
       DocumentType::create(m_document, token->name(), publicId, systemId);
   attachLater(m_attachmentRoot, doctype);

   // DOCTYPE nodes are only processed when parsing fragments w/o
   // contextElements, which never occurs.  However, if we ever chose to support
   // such, this code is subtly wrong, because context-less fragments can
   // determine their own quirks mode, and thus change parsing rules (like <p>
   // inside <table>).  For now we ASSERT that we never hit this code in a
   // fragment, as changing the owning document's compatibility mode would be
   // wrong.
   ASSERT(!m_isParsingFragment);
   if (m_isParsingFragment)
     return;

   if (token->forceQuirks())
     setCompatibilityMode(Document::QuirksMode);
   else {
     setCompatibilityModeFromDoctype(token->name(), publicId, systemId);
   }
 }

 void HTMLConstructionSite::insertComment(AtomicHTMLToken* token) {
   ASSERT(token->type() == HTMLToken::Comment);
   attachLater(currentNode(),
               Comment::create(ownerDocumentForCurrentNode(), token->comment()));
 }

 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token) {
   ASSERT(token->type() == HTMLToken::Comment);
   ASSERT(m_document);
   attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment()));
 }

 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(
     AtomicHTMLToken* token) {
   ASSERT(token->type() == HTMLToken::Comment);
   ContainerNode* parent = m_openElements.rootNode();
   attachLater(parent, Comment::create(parent->document(), token->comment()));
 }

 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token) {
   ASSERT(!shouldFosterParent());
   m_head = HTMLStackItem::create(createHTMLElement(token), token);
   attachLater(currentNode(), m_head->element());
   m_openElements.pushHTMLHeadElement(m_head);
 }

 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token) {
   ASSERT(!shouldFosterParent());
   HTMLElement* body = createHTMLElement(token);
   attachLater(currentNode(), body);
   m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body, token));
   if (m_document)
     m_document->willInsertBody();
 }

 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token,
                                                  bool isDemoted) {
   HTMLElement* element = createHTMLElement(token);
   ASSERT(isHTMLFormElement(element));
   HTMLFormElement* formElement = toHTMLFormElement(element);
   if (!ownerDocumentForCurrentNode().isTemplateDocument())
     m_form = formElement;
   formElement->setDemoted(isDemoted);
   attachLater(currentNode(), formElement);
   m_openElements.push(HTMLStackItem::create(formElement, token));
 }

 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token) {
   HTMLElement* element = createHTMLElement(token);
   attachLater(currentNode(), element);
   m_openElements.push(HTMLStackItem::create(element, token));
 }

 void HTMLConstructionSite::insertSelfClosingHTMLElementDestroyingToken(
     AtomicHTMLToken* token) {
   ASSERT(token->type() == HTMLToken::StartTag);
   // Normally HTMLElementStack is responsible for calling finishParsingChildren,
   // but self-closing elements are never in the element stack so the stack
   // doesn't get a chance to tell them that we're done parsing their children.
   attachLater(currentNode(), createHTMLElement(token), true);
   // FIXME: Do we want to acknowledge the token's self-closing flag?
   // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
 }

 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token) {
   // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
   // Possible active formatting elements include:
   // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
   insertHTMLElement(token);
   m_activeFormattingElements.append(currentElementRecord()->stackItem());
 }

 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token) {
   // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
   // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
   // For createContextualFragment, the specifications say to mark it
   // parser-inserted and already-started and later unmark them. However, we
   // short circuit that logic to avoid the subtree traversal to find script
   // elements since scripts can never see those flags or effects thereof.
   const bool parserInserted =
       m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
   const bool alreadyStarted = m_isParsingFragment && parserInserted;
   // TODO(csharrison): This logic only works if the tokenizer/parser was not
   // blocked waiting for scripts when the element was inserted. This usually
   // fails for instance, on second document.write if a script writes twice in a
   // row. To fix this, the parser might have to keep track of raw string
   // position.
   // TODO(csharrison): Refactor this so that the bools that are passed
   // in are packed in a bitfield from an enum class.
   const bool createdDuringDocumentWrite =
       ownerDocumentForCurrentNode().isInDocumentWrite();
   HTMLScriptElement* element =
       HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted,
                                 alreadyStarted, createdDuringDocumentWrite);
   setAttributes(element, token, m_parserContentPolicy);
   if (scriptingContentIsAllowed(m_parserContentPolicy))
     attachLater(currentNode(), element);
   m_openElements.push(HTMLStackItem::create(element, token));
 }

 void HTMLConstructionSite::insertForeignElement(
     AtomicHTMLToken* token,
     const AtomicString& namespaceURI) {
   ASSERT(token->type() == HTMLToken::StartTag);
   // parseError when xmlns or xmlns:xlink are wrong.
   DVLOG(1) << "Not implemented.";

   Element* element = createElement(token, namespaceURI);
   if (scriptingContentIsAllowed(m_parserContentPolicy) ||
       !toScriptLoaderIfPossible(element))
     attachLater(currentNode(), element, token->selfClosing());
   if (!token->selfClosing())
     m_openElements.push(HTMLStackItem::create(element, token, namespaceURI));
 }

 void HTMLConstructionSite::insertTextNode(const String& string,
                                           WhitespaceMode whitespaceMode) {
   HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
   dummyTask.parent = currentNode();

   if (shouldFosterParent())
     findFosterSite(dummyTask);

   // FIXME: This probably doesn't need to be done both here and in insert(Task).
   if (isHTMLTemplateElement(*dummyTask.parent))
     dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();

   // Unclear when parent != case occurs. Somehow we insert text into two
   // separate nodes while processing the same Token. The nextChild !=
   // dummy.nextChild case occurs whenever foster parenting happened and we hit a
   // new text node "<table>a</table>b" In either case we have to flush the
   // pending text into the task queue before making more.
   if (!m_pendingText.isEmpty() &&
       (m_pendingText.parent != dummyTask.parent ||
        m_pendingText.nextChild != dummyTask.nextChild))
     flushPendingText(FlushAlways);
   m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string,
                        whitespaceMode);
 }

 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent,
                                     HTMLElementStack::ElementRecord* child) {
   HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
   task.parent = newParent->node();
   task.child = child->node();
   queueTask(task);
 }

 void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent,
                                     HTMLStackItem* child) {
   HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
   task.parent = newParent->node();
   task.child = child->node();
   queueTask(task);
 }

 void HTMLConstructionSite::insertAlreadyParsedChild(
     HTMLStackItem* newParent,
     HTMLElementStack::ElementRecord* child) {
   if (newParent->causesFosterParenting()) {
     fosterParent(child->node());
     return;
   }

   HTMLConstructionSiteTask task(
       HTMLConstructionSiteTask::InsertAlreadyParsedChild);
   task.parent = newParent->node();
   task.child = child->node();
   queueTask(task);
 }

 void HTMLConstructionSite::takeAllChildren(
     HTMLStackItem* newParent,
     HTMLElementStack::ElementRecord* oldParent) {
   HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren);
   task.parent = newParent->node();
   task.child = oldParent->node();
   queueTask(task);
 }

 CreateElementFlags HTMLConstructionSite::getCreateElementFlags() const {
   return m_isParsingFragment ? CreatedByFragmentParser : CreatedByParser;
 }

 Element* HTMLConstructionSite::createElement(AtomicHTMLToken* token,
                                              const AtomicString& namespaceURI) {
   QualifiedName tagName(nullAtom, token->name(), namespaceURI);
   Element* element = ownerDocumentForCurrentNode().createElement(
       tagName, getCreateElementFlags());
   setAttributes(element, token, m_parserContentPolicy);
   return element;
 }

 inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode() {
   if (isHTMLTemplateElement(*currentNode()))
     return toHTMLTemplateElement(currentElement())->content()->document();
   return currentNode()->document();
 }

 // "look up a custom element definition" for a token
 // https://html.spec.whatwg.org/#look-up-a-custom-element-definition
 CustomElementDefinition* HTMLConstructionSite::lookUpCustomElementDefinition(
     Document& document,
     AtomicHTMLToken* token) {
   // "2. If document does not have a browsing context, return null."
   LocalDOMWindow* window = document.executingWindow();
   if (!window)
     return nullptr;

   // "3. Let registry be document's browsing context's Window's
   // CustomElementRegistry object."
   CustomElementRegistry* registry = window->maybeCustomElements();
   if (!registry)
     return nullptr;

   const AtomicString& localName = token->name();
   const Attribute* isAttribute = token->getAttributeItem(HTMLNames::isAttr);
   const AtomicString& name = isAttribute ? isAttribute->value() : localName;
   CustomElementDescriptor descriptor(name, localName);

   // 4.-6.
   return registry->definitionFor(descriptor);
 }

 // "create an element for a token"
 // https://html.spec.whatwg.org/#create-an-element-for-the-token
 // TODO(dominicc): When form association is separate from creation, unify this
 // with foreign element creation. Add a namespace parameter and check for HTML
 // namespace to lookupCustomElementDefinition.
 HTMLElement* HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token) {
   // "1. Let document be intended parent's node document."
   Document& document = ownerDocumentForCurrentNode();

   // Only associate the element with the current form if we're creating the new
   // element in a document with a browsing context (rather than in <template>
   // contents).
   // TODO(dominicc): Change form to happen after element creation when
   // implementing customized built-in elements.
   HTMLFormElement* form = document.frame() ? m_form.get() : nullptr;

   // "2. Let local name be the tag name of the token."
   // "3. Let is be the value of the "is" attribute in the giev token ..." etc.
   // "4. Let definition be the result of looking up a custom element ..." etc.
   CustomElementDefinition* definition =
       m_isParsingFragment ? nullptr
                           : lookUpCustomElementDefinition(document, token);
   // "5. If definition is non-null and the parser was not originally created
   // for the HTML fragment parsing algorithm, then let will execute script
   // be true."
   bool willExecuteScript = definition && !m_isParsingFragment;

   HTMLElement* element;

   if (willExecuteScript) {
     // "6.1 Increment the document's throw-on-dynamic-insertion counter."
     ThrowOnDynamicMarkupInsertionCountIncrementer
         throwOnDynamicMarkupInsertions(&document);

     // "6.2 If the JavaScript execution context stack is empty,
     // then perform a microtask checkpoint."

     // TODO(dominicc): This is the way the Blink HTML parser performs
     // checkpoints, but note the spec is different--it talks about the
     // JavaScript stack, not the script nesting level.
     if (0u == m_reentryPermit->scriptNestingLevel())
       Microtask::performCheckpoint(V8PerIsolateData::mainThreadIsolate());

     // "6.3 Push a new element queue onto the custom element
     // reactions stack."
     CEReactionsScope reactions;

     // 7.
     QualifiedName elementQName(nullAtom, token->name(),
                                HTMLNames::xhtmlNamespaceURI);
     element = definition->createElementSync(document, elementQName);

     // "8. Append each attribute in the given token to element." We don't use
     // setAttributes here because the custom element constructor may have
     // manipulated attributes.
     for (const auto& attribute : token->attributes())
       element->setAttribute(attribute.name(), attribute.value());

     // "9. If will execute script is true, then ..." etc. The CEReactionsScope
     // and ThrowOnDynamicMarkupInsertionCountIncrementer destructors implement
     // steps 9.1-3.
   } else {
     // FIXME: This can't use HTMLConstructionSite::createElement because we have
     // to pass the current form element. We should rework form association to
     // occur after construction to allow better code sharing here.
     element = HTMLElementFactory::createHTMLElement(
         token->name(), document, form, getCreateElementFlags());
     // Definition for the created element does not exist here and it cannot be
     // custom or failed.
     DCHECK_NE(element->getCustomElementState(), CustomElementState::Custom);
     DCHECK_NE(element->getCustomElementState(), CustomElementState::Failed);

     // "8. Append each attribute in the given token to element."
     setAttributes(element, token, m_parserContentPolicy);
   }

   // TODO(dominicc): Implement steps 10-12 when customized built-in elements are
   // implemented.

   return element;
 }

 HTMLStackItem* HTMLConstructionSite::createElementFromSavedToken(
     HTMLStackItem* item) {
   Element* element;
   // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
   AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(),
                             item->attributes());
   if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
     element = createHTMLElement(&fakeToken);
   else
     element = createElement(&fakeToken, item->namespaceURI());
   return HTMLStackItem::create(element, &fakeToken, item->namespaceURI());
 }

 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(
     unsigned& firstUnopenElementIndex) const {
   if (m_activeFormattingElements.isEmpty())
     return false;
   unsigned index = m_activeFormattingElements.size();
   do {
     --index;
     const HTMLFormattingElementList::Entry& entry =
         m_activeFormattingElements.at(index);
     if (entry.isMarker() || m_openElements.contains(entry.element())) {
       firstUnopenElementIndex = index + 1;
       return firstUnopenElementIndex < m_activeFormattingElements.size();
     }
   } while (index);
   firstUnopenElementIndex = index;
   return true;
 }

 void HTMLConstructionSite::reconstructTheActiveFormattingElements() {
   unsigned firstUnopenElementIndex;
   if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
     return;

   unsigned unopenEntryIndex = firstUnopenElementIndex;
   ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
   for (; unopenEntryIndex < m_activeFormattingElements.size();
        ++unopenEntryIndex) {
     HTMLFormattingElementList::Entry& unopenedEntry =
         m_activeFormattingElements.at(unopenEntryIndex);
     HTMLStackItem* reconstructed =
         createElementFromSavedToken(unopenedEntry.stackItem());
     attachLater(currentNode(), reconstructed->node());
     m_openElements.push(reconstructed);
     unopenedEntry.replaceElement(reconstructed);
   }
 }

 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(
     const AtomicString& tagName) {
   while (hasImpliedEndTag(currentStackItem()) &&
          !currentStackItem()->matchesHTMLTag(tagName))
     m_openElements.pop();
 }

 void HTMLConstructionSite::generateImpliedEndTags() {
   while (hasImpliedEndTag(currentStackItem()))
     m_openElements.pop();
 }

 bool HTMLConstructionSite::inQuirksMode() {
   return m_inQuirksMode;
 }

 // Adjusts |task| to match the "adjusted insertion location" determined by the
 // foster parenting algorithm, laid out as the substeps of step 2 of
 // https://html.spec.whatwg.org/#appropriate-place-for-inserting-a-node
 void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task) {
   // 2.1
   HTMLElementStack::ElementRecord* lastTemplate =
       m_openElements.topmost(templateTag.localName());

   // 2.2
   HTMLElementStack::ElementRecord* lastTable =
       m_openElements.topmost(tableTag.localName());

   // 2.3
   if (lastTemplate && (!lastTable || lastTemplate->isAbove(lastTable))) {
     task.parent = lastTemplate->element();
     return;
   }

   // 2.4
   if (!lastTable) {
     // Fragment case
     task.parent = m_openElements.rootNode();  // DocumentFragment
     return;
   }

   // 2.5
   if (ContainerNode* parent = lastTable->element()->parentNode()) {
     task.parent = parent;
     task.nextChild = lastTable->element();
     return;
   }

   // 2.6, 2.7
   task.parent = lastTable->next()->element();
 }

 bool HTMLConstructionSite::shouldFosterParent() const {
   return m_redirectAttachToFosterParent &&
          currentStackItem()->isElementNode() &&
          currentStackItem()->causesFosterParenting();
 }

 void HTMLConstructionSite::fosterParent(Node* node) {
   HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
   findFosterSite(task);
   task.child = node;
   ASSERT(task.parent);
   queueTask(task);
 }

 DEFINE_TRACE(HTMLConstructionSite::PendingText) {
   visitor->trace(parent);
   visitor->trace(nextChild);
 }

 }  // namespace blink