blob: 3304c74ba11a7685ee032f54d15b7060f456ccf6 [file] [log] [blame]
/*
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
* Copyright (C) 2011 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "core/html/parser/HTMLConstructionSite.h"
#include "bindings/core/v8/Microtask.h"
#include "bindings/core/v8/V8PerIsolateData.h"
#include "core/HTMLElementFactory.h"
#include "core/HTMLNames.h"
#include "core/dom/Comment.h"
#include "core/dom/DocumentFragment.h"
#include "core/dom/DocumentType.h"
#include "core/dom/Element.h"
#include "core/dom/ElementTraversal.h"
#include "core/dom/IgnoreDestructiveWriteCountIncrementer.h"
#include "core/dom/ScriptLoader.h"
#include "core/dom/TemplateContentDocumentFragment.h"
#include "core/dom/Text.h"
#include "core/dom/ThrowOnDynamicMarkupInsertionCountIncrementer.h"
#include "core/dom/custom/CEReactionsScope.h"
#include "core/dom/custom/CustomElementDefinition.h"
#include "core/dom/custom/CustomElementDescriptor.h"
#include "core/dom/custom/CustomElementRegistry.h"
#include "core/frame/LocalDOMWindow.h"
#include "core/frame/LocalFrame.h"
#include "core/html/HTMLFormElement.h"
#include "core/html/HTMLHtmlElement.h"
#include "core/html/HTMLPlugInElement.h"
#include "core/html/HTMLScriptElement.h"
#include "core/html/HTMLTemplateElement.h"
#include "core/html/parser/AtomicHTMLToken.h"
#include "core/html/parser/HTMLParserIdioms.h"
#include "core/html/parser/HTMLParserReentryPermit.h"
#include "core/html/parser/HTMLStackItem.h"
#include "core/html/parser/HTMLToken.h"
#include "core/loader/FrameLoader.h"
#include "core/loader/FrameLoaderClient.h"
#include "core/svg/SVGScriptElement.h"
#include "platform/text/TextBreakIterator.h"
#include <limits>
namespace blink {
using namespace HTMLNames;
static const unsigned maximumHTMLParserDOMTreeDepth = 512;
static inline void setAttributes(Element* element,
AtomicHTMLToken* token,
ParserContentPolicy parserContentPolicy) {
if (!scriptingContentIsAllowed(parserContentPolicy))
element->stripScriptingAttributes(token->attributes());
element->parserSetAttributes(token->attributes());
}
static bool hasImpliedEndTag(const HTMLStackItem* item) {
return item->hasTagName(ddTag) || item->hasTagName(dtTag) ||
item->hasTagName(liTag) || item->hasTagName(optionTag) ||
item->hasTagName(optgroupTag) || item->hasTagName(pTag) ||
item->hasTagName(rbTag) || item->hasTagName(rpTag) ||
item->hasTagName(rtTag) || item->hasTagName(rtcTag);
}
static bool shouldUseLengthLimit(const ContainerNode& node) {
return !isHTMLScriptElement(node) && !isHTMLStyleElement(node) &&
!isSVGScriptElement(node);
}
static unsigned textLengthLimitForContainer(const ContainerNode& node) {
return shouldUseLengthLimit(node) ? Text::defaultLengthLimit
: std::numeric_limits<unsigned>::max();
}
static inline bool isAllWhitespace(const String& string) {
return string.isAllSpecialCharacters<isHTMLSpace<UChar>>();
}
static inline void insert(HTMLConstructionSiteTask& task) {
if (isHTMLTemplateElement(*task.parent))
task.parent = toHTMLTemplateElement(task.parent.get())->content();
if (task.nextChild)
task.parent->parserInsertBefore(task.child.get(), *task.nextChild);
else
task.parent->parserAppendChild(task.child.get());
}
static inline void executeInsertTask(HTMLConstructionSiteTask& task) {
ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
insert(task);
if (task.child->isElementNode()) {
Element& child = toElement(*task.child);
child.beginParsingChildren();
if (task.selfClosing)
child.finishParsingChildren();
}
}
static inline void executeInsertTextTask(HTMLConstructionSiteTask& task) {
ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
ASSERT(task.child->isTextNode());
// Merge text nodes into previous ones if possible:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
Text* newText = toText(task.child.get());
Node* previousChild = task.nextChild ? task.nextChild->previousSibling()
: task.parent->lastChild();
if (previousChild && previousChild->isTextNode()) {
Text* previousText = toText(previousChild);
unsigned lengthLimit = textLengthLimitForContainer(*task.parent);
if (previousText->length() + newText->length() < lengthLimit) {
previousText->parserAppendData(newText->data());
return;
}
}
insert(task);
}
static inline void executeReparentTask(HTMLConstructionSiteTask& task) {
ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
task.parent->parserAppendChild(task.child);
}
static inline void executeInsertAlreadyParsedChildTask(
HTMLConstructionSiteTask& task) {
ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);
insert(task);
}
static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task) {
ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren);
task.parent->parserTakeAllChildrenFrom(*task.oldParent());
}
void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task) {
ASSERT(m_taskQueue.isEmpty());
if (task.operation == HTMLConstructionSiteTask::Insert)
return executeInsertTask(task);
if (task.operation == HTMLConstructionSiteTask::InsertText)
return executeInsertTextTask(task);
// All the cases below this point are only used by the adoption agency.
if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)
return executeInsertAlreadyParsedChildTask(task);
if (task.operation == HTMLConstructionSiteTask::Reparent)
return executeReparentTask(task);
if (task.operation == HTMLConstructionSiteTask::TakeAllChildren)
return executeTakeAllChildrenTask(task);
ASSERT_NOT_REACHED();
}
// This is only needed for TextDocuments where we might have text nodes
// approaching the default length limit (~64k) and we don't want to break a text
// node in the middle of a combining character.
static unsigned findBreakIndexBetween(const StringBuilder& string,
unsigned currentPosition,
unsigned proposedBreakIndex) {
ASSERT(currentPosition < proposedBreakIndex);
ASSERT(proposedBreakIndex <= string.length());
// The end of the string is always a valid break.
if (proposedBreakIndex == string.length())
return proposedBreakIndex;
// Latin-1 does not have breakable boundaries. If we ever moved to a different
// 8-bit encoding this could be wrong.
if (string.is8Bit())
return proposedBreakIndex;
const UChar* breakSearchCharacters = string.characters16() + currentPosition;
// We need at least two characters look-ahead to account for UTF-16
// surrogates, but can't search off the end of the buffer!
unsigned breakSearchLength =
std::min(proposedBreakIndex - currentPosition + 2,
string.length() - currentPosition);
NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength);
if (it.isBreak(proposedBreakIndex - currentPosition))
return proposedBreakIndex;
int adjustedBreakIndexInSubstring =
it.preceding(proposedBreakIndex - currentPosition);
if (adjustedBreakIndexInSubstring > 0)
return currentPosition + adjustedBreakIndexInSubstring;
// We failed to find a breakable point, let the caller figure out what to do.
return 0;
}
static String atomizeIfAllWhitespace(const String& string,
WhitespaceMode whitespaceMode) {
// Strings composed entirely of whitespace are likely to be repeated. Turn
// them into AtomicString so we share a single string for each.
if (whitespaceMode == AllWhitespace ||
(whitespaceMode == WhitespaceUnknown && isAllWhitespace(string)))
return AtomicString(string).getString();
return string;
}
void HTMLConstructionSite::flushPendingText(FlushMode mode) {
if (m_pendingText.isEmpty())
return;
if (mode == FlushIfAtTextLimit &&
!shouldUseLengthLimit(*m_pendingText.parent))
return;
PendingText pendingText;
// Hold onto the current pending text on the stack so that queueTask doesn't
// recurse infinitely.
m_pendingText.swap(pendingText);
ASSERT(m_pendingText.isEmpty());
// Splitting text nodes into smaller chunks contradicts HTML5 spec, but is
// necessary for performance, see:
// https://bugs.webkit.org/show_bug.cgi?id=55898
unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent);
unsigned currentPosition = 0;
const StringBuilder& string = pendingText.stringBuilder;
while (currentPosition < string.length()) {
unsigned proposedBreakIndex =
std::min(currentPosition + lengthLimit, string.length());
unsigned breakIndex =
findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
ASSERT(breakIndex <= string.length());
String substring =
string.substring(currentPosition, breakIndex - currentPosition);
substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode);
HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
task.parent = pendingText.parent;
task.nextChild = pendingText.nextChild;
task.child = Text::create(task.parent->document(), substring);
queueTask(task);
ASSERT(breakIndex > currentPosition);
ASSERT(breakIndex - currentPosition == substring.length());
ASSERT(toText(task.child.get())->length() == substring.length());
currentPosition = breakIndex;
}
}
void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task) {
flushPendingText(FlushAlways);
ASSERT(m_pendingText.isEmpty());
m_taskQueue.append(task);
}
void HTMLConstructionSite::attachLater(ContainerNode* parent,
Node* child,
bool selfClosing) {
ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) ||
!child->isElementNode() ||
!toScriptLoaderIfPossible(toElement(child)));
ASSERT(pluginContentIsAllowed(m_parserContentPolicy) ||
!isHTMLPlugInElement(child));
HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
task.parent = parent;
task.child = child;
task.selfClosing = selfClosing;
if (shouldFosterParent()) {
fosterParent(task.child);
return;
}
// Add as a sibling of the parent if we have reached the maximum depth
// allowed.
if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth &&
task.parent->parentNode())
task.parent = task.parent->parentNode();
ASSERT(task.parent);
queueTask(task);
}
void HTMLConstructionSite::executeQueuedTasks() {
// This has no affect on pendingText, and we may have pendingText remaining
// after executing all other queued tasks.
const size_t size = m_taskQueue.size();
if (!size)
return;
// Copy the task queue into a local variable in case executeTask re-enters the
// parser.
TaskQueue queue;
queue.swap(m_taskQueue);
for (size_t i = 0; i < size; ++i)
executeTask(queue[i]);
// We might be detached now.
}
HTMLConstructionSite::HTMLConstructionSite(
HTMLParserReentryPermit* reentryPermit,
Document& document,
ParserContentPolicy parserContentPolicy)
: m_reentryPermit(reentryPermit),
m_document(&document),
m_attachmentRoot(document),
m_parserContentPolicy(parserContentPolicy),
m_isParsingFragment(false),
m_redirectAttachToFosterParent(false),
m_inQuirksMode(document.inQuirksMode()) {
ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
}
void HTMLConstructionSite::initFragmentParsing(DocumentFragment* fragment,
Element* contextElement) {
DCHECK(contextElement);
DCHECK_EQ(m_document, &fragment->document());
DCHECK_EQ(m_inQuirksMode, fragment->document().inQuirksMode());
DCHECK(!m_isParsingFragment);
DCHECK(!m_form);
m_attachmentRoot = fragment;
m_isParsingFragment = true;
if (!contextElement->document().isTemplateDocument())
m_form = Traversal<HTMLFormElement>::firstAncestorOrSelf(*contextElement);
}
HTMLConstructionSite::~HTMLConstructionSite() {
// Depending on why we're being destroyed it might be OK to forget queued
// tasks, but currently we don't expect to.
ASSERT(m_taskQueue.isEmpty());
// Currently we assume that text will never be the last token in the document
// and that we'll always queue some additional task to cause it to flush.
ASSERT(m_pendingText.isEmpty());
}
DEFINE_TRACE(HTMLConstructionSite) {
visitor->trace(m_document);
visitor->trace(m_attachmentRoot);
visitor->trace(m_head);
visitor->trace(m_form);
visitor->trace(m_openElements);
visitor->trace(m_activeFormattingElements);
visitor->trace(m_taskQueue);
visitor->trace(m_pendingText);
}
void HTMLConstructionSite::detach() {
// FIXME: We'd like to ASSERT here that we're canceling and not just
// discarding text that really should have made it into the DOM earlier, but
// there doesn't seem to be a nice way to do that.
m_pendingText.discard();
m_document = nullptr;
m_attachmentRoot = nullptr;
}
HTMLFormElement* HTMLConstructionSite::takeForm() {
return m_form.release();
}
void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(
AtomicHTMLToken* token) {
ASSERT(m_document);
HTMLHtmlElement* element = HTMLHtmlElement::create(*m_document);
setAttributes(element, token, m_parserContentPolicy);
attachLater(m_attachmentRoot, element);
m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));
executeQueuedTasks();
element->insertedByParser();
}
void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(
AtomicHTMLToken* token,
Element* element) {
if (token->attributes().isEmpty())
return;
for (unsigned i = 0; i < token->attributes().size(); ++i) {
const Attribute& tokenAttribute = token->attributes().at(i);
if (element->attributesWithoutUpdate().findIndex(tokenAttribute.name()) ==
kNotFound)
element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
}
}
void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(
AtomicHTMLToken* token) {
// Fragments do not have a root HTML element, so any additional HTML elements
// encountered during fragment parsing should be ignored.
if (m_isParsingFragment)
return;
mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
}
void HTMLConstructionSite::insertHTMLBodyStartTagInBody(
AtomicHTMLToken* token) {
mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
}
void HTMLConstructionSite::setDefaultCompatibilityMode() {
if (m_isParsingFragment)
return;
setCompatibilityMode(Document::QuirksMode);
}
void HTMLConstructionSite::setCompatibilityMode(
Document::CompatibilityMode mode) {
m_inQuirksMode = (mode == Document::QuirksMode);
m_document->setCompatibilityMode(mode);
}
void HTMLConstructionSite::setCompatibilityModeFromDoctype(
const String& name,
const String& publicId,
const String& systemId) {
// There are three possible compatibility modes:
// Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in
// this mode, e.g., unit types can be omitted from numbers.
// Limited Quirks - This mode is identical to no-quirks mode except for its
// treatment of line-height in the inline box model.
// No Quirks - no quirks apply. Web pages will obey the specifications to the
// letter.
// Check for Quirks Mode.
if (name != "html" ||
publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//",
TextCaseInsensitive) ||
publicId.startsWith(
"-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//",
TextCaseInsensitive) ||
publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//",
TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//",
TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//",
TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//",
TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//",
TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//",
TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML 2.0//", TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML 2.1E//", TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML 3.0//", TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML 3.2 Final//",
TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML 3.2//", TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML 3//", TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML Level 0//", TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML Level 1//", TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML Level 2//", TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML Level 3//", TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML Strict Level 0//",
TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML Strict Level 1//",
TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML Strict Level 2//",
TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML Strict Level 3//",
TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML Strict//", TextCaseInsensitive) ||
publicId.startsWith("-//IETF//DTD HTML//", TextCaseInsensitive) ||
publicId.startsWith("-//Metrius//DTD Metrius Presentational//",
TextCaseInsensitive) ||
publicId.startsWith(
"-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//",
TextCaseInsensitive) ||
publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//",
TextCaseInsensitive) ||
publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//",
TextCaseInsensitive) ||
publicId.startsWith(
"-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//",
TextCaseInsensitive) ||
publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//",
TextCaseInsensitive) ||
publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//",
TextCaseInsensitive) ||
publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//",
TextCaseInsensitive) ||
publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//",
TextCaseInsensitive) ||
publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//",
TextCaseInsensitive) ||
publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//",
TextCaseInsensitive) ||
publicId.startsWith(
"-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//",
TextCaseInsensitive) ||
publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO "
"6.0::19990601::extensions to HTML 4.0//",
TextCaseInsensitive) ||
publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO "
"4.0::19971010::extensions to HTML 4.0//",
TextCaseInsensitive) ||
publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//",
TextCaseInsensitive) ||
publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//",
TextCaseInsensitive) ||
publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//",
TextCaseInsensitive) ||
publicId.startsWith(
"-//Sun Microsystems Corp.//DTD HotJava Strict HTML//",
TextCaseInsensitive) ||
publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//",
TextCaseInsensitive) ||
publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//",
TextCaseInsensitive) ||
publicId.startsWith("-//W3C//DTD HTML 3.2 Final//",
TextCaseInsensitive) ||
publicId.startsWith("-//W3C//DTD HTML 3.2//", TextCaseInsensitive) ||
publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//",
TextCaseInsensitive) ||
publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//",
TextCaseInsensitive) ||
publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//",
TextCaseInsensitive) ||
publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//",
TextCaseInsensitive) ||
publicId.startsWith("-//W3C//DTD HTML Experimental 970421//",
TextCaseInsensitive) ||
publicId.startsWith("-//W3C//DTD W3 HTML//", TextCaseInsensitive) ||
publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", TextCaseInsensitive) ||
equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//") ||
publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//",
TextCaseInsensitive) ||
publicId.startsWith("-//WebTechs//DTD Mozilla HTML//",
TextCaseInsensitive) ||
equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN") ||
equalIgnoringCase(publicId, "HTML") ||
equalIgnoringCase(
systemId,
"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") ||
(systemId.isEmpty() &&
publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//",
TextCaseInsensitive)) ||
(systemId.isEmpty() &&
publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//",
TextCaseInsensitive))) {
setCompatibilityMode(Document::QuirksMode);
return;
}
// Check for Limited Quirks Mode.
if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//",
TextCaseInsensitive) ||
publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//",
TextCaseInsensitive) ||
(!systemId.isEmpty() &&
publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//",
TextCaseInsensitive)) ||
(!systemId.isEmpty() &&
publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//",
TextCaseInsensitive))) {
setCompatibilityMode(Document::LimitedQuirksMode);
return;
}
// Otherwise we are No Quirks Mode.
setCompatibilityMode(Document::NoQuirksMode);
}
void HTMLConstructionSite::processEndOfFile() {
ASSERT(currentNode());
flush(FlushAlways);
openElements()->popAll();
}
void HTMLConstructionSite::finishedParsing() {
// We shouldn't have any queued tasks but we might have pending text which we
// need to promote to tasks and execute.
ASSERT(m_taskQueue.isEmpty());
flush(FlushAlways);
m_document->finishedParsing();
}
void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token) {
ASSERT(token->type() == HTMLToken::DOCTYPE);
const String& publicId =
StringImpl::create8BitIfPossible(token->publicIdentifier());
const String& systemId =
StringImpl::create8BitIfPossible(token->systemIdentifier());
DocumentType* doctype =
DocumentType::create(m_document, token->name(), publicId, systemId);
attachLater(m_attachmentRoot, doctype);
// DOCTYPE nodes are only processed when parsing fragments w/o
// contextElements, which never occurs. However, if we ever chose to support
// such, this code is subtly wrong, because context-less fragments can
// determine their own quirks mode, and thus change parsing rules (like <p>
// inside <table>). For now we ASSERT that we never hit this code in a
// fragment, as changing the owning document's compatibility mode would be
// wrong.
ASSERT(!m_isParsingFragment);
if (m_isParsingFragment)
return;
if (token->forceQuirks())
setCompatibilityMode(Document::QuirksMode);
else {
setCompatibilityModeFromDoctype(token->name(), publicId, systemId);
}
}
void HTMLConstructionSite::insertComment(AtomicHTMLToken* token) {
ASSERT(token->type() == HTMLToken::Comment);
attachLater(currentNode(),
Comment::create(ownerDocumentForCurrentNode(), token->comment()));
}
void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token) {
ASSERT(token->type() == HTMLToken::Comment);
ASSERT(m_document);
attachLater(m_attachmentRoot, Comment::create(*m_document, token->comment()));
}
void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(
AtomicHTMLToken* token) {
ASSERT(token->type() == HTMLToken::Comment);
ContainerNode* parent = m_openElements.rootNode();
attachLater(parent, Comment::create(parent->document(), token->comment()));
}
void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token) {
ASSERT(!shouldFosterParent());
m_head = HTMLStackItem::create(createHTMLElement(token), token);
attachLater(currentNode(), m_head->element());
m_openElements.pushHTMLHeadElement(m_head);
}
void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token) {
ASSERT(!shouldFosterParent());
HTMLElement* body = createHTMLElement(token);
attachLater(currentNode(), body);
m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body, token));
if (m_document)
m_document->willInsertBody();
}
void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token,
bool isDemoted) {
HTMLElement* element = createHTMLElement(token);
ASSERT(isHTMLFormElement(element));
HTMLFormElement* formElement = toHTMLFormElement(element);
if (!ownerDocumentForCurrentNode().isTemplateDocument())
m_form = formElement;
formElement->setDemoted(isDemoted);
attachLater(currentNode(), formElement);
m_openElements.push(HTMLStackItem::create(formElement, token));
}
void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token) {
HTMLElement* element = createHTMLElement(token);
attachLater(currentNode(), element);
m_openElements.push(HTMLStackItem::create(element, token));
}
void HTMLConstructionSite::insertSelfClosingHTMLElementDestroyingToken(
AtomicHTMLToken* token) {
ASSERT(token->type() == HTMLToken::StartTag);
// Normally HTMLElementStack is responsible for calling finishParsingChildren,
// but self-closing elements are never in the element stack so the stack
// doesn't get a chance to tell them that we're done parsing their children.
attachLater(currentNode(), createHTMLElement(token), true);
// FIXME: Do we want to acknowledge the token's self-closing flag?
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
}
void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token) {
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
// Possible active formatting elements include:
// a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
insertHTMLElement(token);
m_activeFormattingElements.append(currentElementRecord()->stackItem());
}
void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token) {
// http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
// http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
// For createContextualFragment, the specifications say to mark it
// parser-inserted and already-started and later unmark them. However, we
// short circuit that logic to avoid the subtree traversal to find script
// elements since scripts can never see those flags or effects thereof.
const bool parserInserted =
m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
const bool alreadyStarted = m_isParsingFragment && parserInserted;
// TODO(csharrison): This logic only works if the tokenizer/parser was not
// blocked waiting for scripts when the element was inserted. This usually
// fails for instance, on second document.write if a script writes twice in a
// row. To fix this, the parser might have to keep track of raw string
// position.
// TODO(csharrison): Refactor this so that the bools that are passed
// in are packed in a bitfield from an enum class.
const bool createdDuringDocumentWrite =
ownerDocumentForCurrentNode().isInDocumentWrite();
HTMLScriptElement* element =
HTMLScriptElement::create(ownerDocumentForCurrentNode(), parserInserted,
alreadyStarted, createdDuringDocumentWrite);
setAttributes(element, token, m_parserContentPolicy);
if (scriptingContentIsAllowed(m_parserContentPolicy))
attachLater(currentNode(), element);
m_openElements.push(HTMLStackItem::create(element, token));
}
void HTMLConstructionSite::insertForeignElement(
AtomicHTMLToken* token,
const AtomicString& namespaceURI) {
ASSERT(token->type() == HTMLToken::StartTag);
// parseError when xmlns or xmlns:xlink are wrong.
DVLOG(1) << "Not implemented.";
Element* element = createElement(token, namespaceURI);
if (scriptingContentIsAllowed(m_parserContentPolicy) ||
!toScriptLoaderIfPossible(element))
attachLater(currentNode(), element, token->selfClosing());
if (!token->selfClosing())
m_openElements.push(HTMLStackItem::create(element, token, namespaceURI));
}
void HTMLConstructionSite::insertTextNode(const String& string,
WhitespaceMode whitespaceMode) {
HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
dummyTask.parent = currentNode();
if (shouldFosterParent())
findFosterSite(dummyTask);
// FIXME: This probably doesn't need to be done both here and in insert(Task).
if (isHTMLTemplateElement(*dummyTask.parent))
dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();
// Unclear when parent != case occurs. Somehow we insert text into two
// separate nodes while processing the same Token. The nextChild !=
// dummy.nextChild case occurs whenever foster parenting happened and we hit a
// new text node "<table>a</table>b" In either case we have to flush the
// pending text into the task queue before making more.
if (!m_pendingText.isEmpty() &&
(m_pendingText.parent != dummyTask.parent ||
m_pendingText.nextChild != dummyTask.nextChild))
flushPendingText(FlushAlways);
m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string,
whitespaceMode);
}
void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent,
HTMLElementStack::ElementRecord* child) {
HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
task.parent = newParent->node();
task.child = child->node();
queueTask(task);
}
void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent,
HTMLStackItem* child) {
HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
task.parent = newParent->node();
task.child = child->node();
queueTask(task);
}
void HTMLConstructionSite::insertAlreadyParsedChild(
HTMLStackItem* newParent,
HTMLElementStack::ElementRecord* child) {
if (newParent->causesFosterParenting()) {
fosterParent(child->node());
return;
}
HTMLConstructionSiteTask task(
HTMLConstructionSiteTask::InsertAlreadyParsedChild);
task.parent = newParent->node();
task.child = child->node();
queueTask(task);
}
void HTMLConstructionSite::takeAllChildren(
HTMLStackItem* newParent,
HTMLElementStack::ElementRecord* oldParent) {
HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren);
task.parent = newParent->node();
task.child = oldParent->node();
queueTask(task);
}
CreateElementFlags HTMLConstructionSite::getCreateElementFlags() const {
return m_isParsingFragment ? CreatedByFragmentParser : CreatedByParser;
}
Element* HTMLConstructionSite::createElement(AtomicHTMLToken* token,
const AtomicString& namespaceURI) {
QualifiedName tagName(nullAtom, token->name(), namespaceURI);
Element* element = ownerDocumentForCurrentNode().createElement(
tagName, getCreateElementFlags());
setAttributes(element, token, m_parserContentPolicy);
return element;
}
inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode() {
if (isHTMLTemplateElement(*currentNode()))
return toHTMLTemplateElement(currentElement())->content()->document();
return currentNode()->document();
}
// "look up a custom element definition" for a token
// https://html.spec.whatwg.org/#look-up-a-custom-element-definition
CustomElementDefinition* HTMLConstructionSite::lookUpCustomElementDefinition(
Document& document,
AtomicHTMLToken* token) {
// "2. If document does not have a browsing context, return null."
LocalDOMWindow* window = document.executingWindow();
if (!window)
return nullptr;
// "3. Let registry be document's browsing context's Window's
// CustomElementRegistry object."
CustomElementRegistry* registry = window->maybeCustomElements();
if (!registry)
return nullptr;
const AtomicString& localName = token->name();
const Attribute* isAttribute = token->getAttributeItem(HTMLNames::isAttr);
const AtomicString& name = isAttribute ? isAttribute->value() : localName;
CustomElementDescriptor descriptor(name, localName);
// 4.-6.
return registry->definitionFor(descriptor);
}
// "create an element for a token"
// https://html.spec.whatwg.org/#create-an-element-for-the-token
// TODO(dominicc): When form association is separate from creation, unify this
// with foreign element creation. Add a namespace parameter and check for HTML
// namespace to lookupCustomElementDefinition.
HTMLElement* HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token) {
// "1. Let document be intended parent's node document."
Document& document = ownerDocumentForCurrentNode();
// Only associate the element with the current form if we're creating the new
// element in a document with a browsing context (rather than in <template>
// contents).
// TODO(dominicc): Change form to happen after element creation when
// implementing customized built-in elements.
HTMLFormElement* form = document.frame() ? m_form.get() : nullptr;
// "2. Let local name be the tag name of the token."
// "3. Let is be the value of the "is" attribute in the giev token ..." etc.
// "4. Let definition be the result of looking up a custom element ..." etc.
CustomElementDefinition* definition =
m_isParsingFragment ? nullptr
: lookUpCustomElementDefinition(document, token);
// "5. If definition is non-null and the parser was not originally created
// for the HTML fragment parsing algorithm, then let will execute script
// be true."
bool willExecuteScript = definition && !m_isParsingFragment;
HTMLElement* element;
if (willExecuteScript) {
// "6.1 Increment the document's throw-on-dynamic-insertion counter."
ThrowOnDynamicMarkupInsertionCountIncrementer
throwOnDynamicMarkupInsertions(&document);
// "6.2 If the JavaScript execution context stack is empty,
// then perform a microtask checkpoint."
// TODO(dominicc): This is the way the Blink HTML parser performs
// checkpoints, but note the spec is different--it talks about the
// JavaScript stack, not the script nesting level.
if (0u == m_reentryPermit->scriptNestingLevel())
Microtask::performCheckpoint(V8PerIsolateData::mainThreadIsolate());
// "6.3 Push a new element queue onto the custom element
// reactions stack."
CEReactionsScope reactions;
// 7.
QualifiedName elementQName(nullAtom, token->name(),
HTMLNames::xhtmlNamespaceURI);
element = definition->createElementSync(document, elementQName);
// "8. Append each attribute in the given token to element." We don't use
// setAttributes here because the custom element constructor may have
// manipulated attributes.
for (const auto& attribute : token->attributes())
element->setAttribute(attribute.name(), attribute.value());
// "9. If will execute script is true, then ..." etc. The CEReactionsScope
// and ThrowOnDynamicMarkupInsertionCountIncrementer destructors implement
// steps 9.1-3.
} else {
// FIXME: This can't use HTMLConstructionSite::createElement because we have
// to pass the current form element. We should rework form association to
// occur after construction to allow better code sharing here.
element = HTMLElementFactory::createHTMLElement(
token->name(), document, form, getCreateElementFlags());
// Definition for the created element does not exist here and it cannot be
// custom or failed.
DCHECK_NE(element->getCustomElementState(), CustomElementState::Custom);
DCHECK_NE(element->getCustomElementState(), CustomElementState::Failed);
// "8. Append each attribute in the given token to element."
setAttributes(element, token, m_parserContentPolicy);
}
// TODO(dominicc): Implement steps 10-12 when customized built-in elements are
// implemented.
return element;
}
HTMLStackItem* HTMLConstructionSite::createElementFromSavedToken(
HTMLStackItem* item) {
Element* element;
// NOTE: Moving from item -> token -> item copies the Attribute vector twice!
AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(),
item->attributes());
if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
element = createHTMLElement(&fakeToken);
else
element = createElement(&fakeToken, item->namespaceURI());
return HTMLStackItem::create(element, &fakeToken, item->namespaceURI());
}
bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(
unsigned& firstUnopenElementIndex) const {
if (m_activeFormattingElements.isEmpty())
return false;
unsigned index = m_activeFormattingElements.size();
do {
--index;
const HTMLFormattingElementList::Entry& entry =
m_activeFormattingElements.at(index);
if (entry.isMarker() || m_openElements.contains(entry.element())) {
firstUnopenElementIndex = index + 1;
return firstUnopenElementIndex < m_activeFormattingElements.size();
}
} while (index);
firstUnopenElementIndex = index;
return true;
}
void HTMLConstructionSite::reconstructTheActiveFormattingElements() {
unsigned firstUnopenElementIndex;
if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
return;
unsigned unopenEntryIndex = firstUnopenElementIndex;
ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
for (; unopenEntryIndex < m_activeFormattingElements.size();
++unopenEntryIndex) {
HTMLFormattingElementList::Entry& unopenedEntry =
m_activeFormattingElements.at(unopenEntryIndex);
HTMLStackItem* reconstructed =
createElementFromSavedToken(unopenedEntry.stackItem());
attachLater(currentNode(), reconstructed->node());
m_openElements.push(reconstructed);
unopenedEntry.replaceElement(reconstructed);
}
}
void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(
const AtomicString& tagName) {
while (hasImpliedEndTag(currentStackItem()) &&
!currentStackItem()->matchesHTMLTag(tagName))
m_openElements.pop();
}
void HTMLConstructionSite::generateImpliedEndTags() {
while (hasImpliedEndTag(currentStackItem()))
m_openElements.pop();
}
bool HTMLConstructionSite::inQuirksMode() {
return m_inQuirksMode;
}
// Adjusts |task| to match the "adjusted insertion location" determined by the
// foster parenting algorithm, laid out as the substeps of step 2 of
// https://html.spec.whatwg.org/#appropriate-place-for-inserting-a-node
void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task) {
// 2.1
HTMLElementStack::ElementRecord* lastTemplate =
m_openElements.topmost(templateTag.localName());
// 2.2
HTMLElementStack::ElementRecord* lastTable =
m_openElements.topmost(tableTag.localName());
// 2.3
if (lastTemplate && (!lastTable || lastTemplate->isAbove(lastTable))) {
task.parent = lastTemplate->element();
return;
}
// 2.4
if (!lastTable) {
// Fragment case
task.parent = m_openElements.rootNode(); // DocumentFragment
return;
}
// 2.5
if (ContainerNode* parent = lastTable->element()->parentNode()) {
task.parent = parent;
task.nextChild = lastTable->element();
return;
}
// 2.6, 2.7
task.parent = lastTable->next()->element();
}
bool HTMLConstructionSite::shouldFosterParent() const {
return m_redirectAttachToFosterParent &&
currentStackItem()->isElementNode() &&
currentStackItem()->causesFosterParenting();
}
void HTMLConstructionSite::fosterParent(Node* node) {
HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
findFosterSite(task);
task.child = node;
ASSERT(task.parent);
queueTask(task);
}
DEFINE_TRACE(HTMLConstructionSite::PendingText) {
visitor->trace(parent);
visitor->trace(nextChild);
}
} // namespace blink