third_party/WebKit/Source/core/html/parser/HTMLToken.h - chromium/src - Git at Google

 /*
  * Copyright (C) 2013 Google, Inc. All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #ifndef HTMLToken_h
 #define HTMLToken_h

 #include "core/dom/Attribute.h"
 #include "core/html/parser/HTMLParserIdioms.h"
 #include "wtf/Forward.h"
 #include "wtf/PtrUtil.h"
 #include <memory>

 namespace blink {

 class DoctypeData {
   USING_FAST_MALLOC(DoctypeData);
   WTF_MAKE_NONCOPYABLE(DoctypeData);

  public:
   DoctypeData()
       : m_hasPublicIdentifier(false),
         m_hasSystemIdentifier(false),
         m_forceQuirks(false) {}

   bool m_hasPublicIdentifier;
   bool m_hasSystemIdentifier;
   WTF::Vector<UChar> m_publicIdentifier;
   WTF::Vector<UChar> m_systemIdentifier;
   bool m_forceQuirks;
 };

 static inline Attribute* findAttributeInVector(Vector<Attribute>& attributes,
                                                const QualifiedName& name) {
   for (unsigned i = 0; i < attributes.size(); ++i) {
     if (attributes.at(i).name().matches(name))
       return &attributes.at(i);
   }
   return 0;
 }

 class HTMLToken {
   WTF_MAKE_NONCOPYABLE(HTMLToken);
   USING_FAST_MALLOC(HTMLToken);

  public:
   enum TokenType {
     Uninitialized,
     DOCTYPE,
     StartTag,
     EndTag,
     Comment,
     Character,
     EndOfFile,
   };

   class Attribute {
     DISALLOW_NEW_EXCEPT_PLACEMENT_NEW();

    public:
     class Range {
       DISALLOW_NEW();

      public:
       static constexpr int kInvalidOffset = -1;

       inline void clear() {
 #if ENABLE(ASSERT)
         start = kInvalidOffset;
         end = kInvalidOffset;
 #endif
       }

       // Check Range instance that is actively being parsed.
       inline void checkValidStart() const {
         DCHECK_NE(start, kInvalidOffset);
         DCHECK_GE(start, 0);
       }

       // Check Range instance which finished parse.
       inline void checkValid() const {
         checkValidStart();
         DCHECK_NE(end, kInvalidOffset);
         DCHECK_GE(end, 0);
         DCHECK_LE(start, end);
       }

       int start;
       int end;
     };

     AtomicString name() const { return AtomicString(m_name); }
     String nameAttemptStaticStringCreation() const {
       return attemptStaticStringCreation(m_name, Likely8Bit);
     }
     const Vector<UChar, 32>& nameAsVector() const { return m_name; }

     void appendToName(UChar c) { m_name.append(c); }

     PassRefPtr<StringImpl> value8BitIfNecessary() const {
       return StringImpl::create8BitIfPossible(m_value);
     }
     String value() const { return String(m_value); }

     void appendToValue(UChar c) { m_value.append(c); }
     void appendToValue(const String& value) { value.appendTo(m_value); }
     void clearValue() { m_value.clear(); }

     const Range& nameRange() const { return m_nameRange; }
     const Range& valueRange() const { return m_valueRange; }
     Range& mutableNameRange() { return m_nameRange; }
     Range& mutableValueRange() { return m_valueRange; }

    private:
     Vector<UChar, 32> m_name;
     Vector<UChar, 32> m_value;
     Range m_nameRange;
     Range m_valueRange;
   };

   typedef Vector<Attribute, 10> AttributeList;

   // By using an inline capacity of 256, we avoid spilling over into an malloced
   // buffer approximately 99% of the time based on a non-scientific browse
   // around a number of popular web sites on 23 May 2013.
   typedef Vector<UChar, 256> DataVector;

   HTMLToken() { clear(); }

   void clear() {
     m_type = Uninitialized;
     m_range.clear();
     m_range.start = 0;
     m_baseOffset = 0;
     // Don't call Vector::clear() as that would destroy the
     // alloced VectorBuffer. If the innerHTML'd content has
     // two 257 character text nodes in a row, we'll needlessly
     // thrash malloc. When we finally finish the parse the
     // HTMLToken will be destroyed and the VectorBuffer released.
     m_data.shrink(0);
     m_orAllData = 0;
   }

   bool isUninitialized() { return m_type == Uninitialized; }
   TokenType type() const { return m_type; }

   void makeEndOfFile() {
     ASSERT(m_type == Uninitialized);
     m_type = EndOfFile;
   }

   // Range and offset methods exposed for HTMLSourceTracker and
   // HTMLViewSourceParser.
   int startIndex() const { return m_range.start; }
   int endIndex() const { return m_range.end; }

   void setBaseOffset(int offset) { m_baseOffset = offset; }

   void end(int endOffset) { m_range.end = endOffset - m_baseOffset; }

   const DataVector& data() const {
     ASSERT(m_type == Character || m_type == Comment || m_type == StartTag ||
            m_type == EndTag);
     return m_data;
   }

   bool isAll8BitData() const { return (m_orAllData <= 0xff); }

   const DataVector& name() const {
     ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
     return m_data;
   }

   void appendToName(UChar character) {
     ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
     ASSERT(character);
     m_data.append(character);
     m_orAllData |= character;
   }

   /* DOCTYPE Tokens */

   bool forceQuirks() const {
     ASSERT(m_type == DOCTYPE);
     return m_doctypeData->m_forceQuirks;
   }

   void setForceQuirks() {
     ASSERT(m_type == DOCTYPE);
     m_doctypeData->m_forceQuirks = true;
   }

   void beginDOCTYPE() {
     ASSERT(m_type == Uninitialized);
     m_type = DOCTYPE;
     m_doctypeData = wrapUnique(new DoctypeData);
   }

   void beginDOCTYPE(UChar character) {
     ASSERT(character);
     beginDOCTYPE();
     m_data.append(character);
     m_orAllData |= character;
   }

   // FIXME: Distinguish between a missing public identifer and an empty one.
   const WTF::Vector<UChar>& publicIdentifier() const {
     ASSERT(m_type == DOCTYPE);
     return m_doctypeData->m_publicIdentifier;
   }

   // FIXME: Distinguish between a missing system identifer and an empty one.
   const WTF::Vector<UChar>& systemIdentifier() const {
     ASSERT(m_type == DOCTYPE);
     return m_doctypeData->m_systemIdentifier;
   }

   void setPublicIdentifierToEmptyString() {
     ASSERT(m_type == DOCTYPE);
     m_doctypeData->m_hasPublicIdentifier = true;
     m_doctypeData->m_publicIdentifier.clear();
   }

   void setSystemIdentifierToEmptyString() {
     ASSERT(m_type == DOCTYPE);
     m_doctypeData->m_hasSystemIdentifier = true;
     m_doctypeData->m_systemIdentifier.clear();
   }

   void appendToPublicIdentifier(UChar character) {
     ASSERT(character);
     ASSERT(m_type == DOCTYPE);
     ASSERT(m_doctypeData->m_hasPublicIdentifier);
     m_doctypeData->m_publicIdentifier.append(character);
   }

   void appendToSystemIdentifier(UChar character) {
     ASSERT(character);
     ASSERT(m_type == DOCTYPE);
     ASSERT(m_doctypeData->m_hasSystemIdentifier);
     m_doctypeData->m_systemIdentifier.append(character);
   }

   std::unique_ptr<DoctypeData> releaseDoctypeData() {
     return std::move(m_doctypeData);
   }

   /* Start/End Tag Tokens */

   bool selfClosing() const {
     ASSERT(m_type == StartTag || m_type == EndTag);
     return m_selfClosing;
   }

   void setSelfClosing() {
     ASSERT(m_type == StartTag || m_type == EndTag);
     m_selfClosing = true;
   }

   void beginStartTag(UChar character) {
     ASSERT(character);
     ASSERT(m_type == Uninitialized);
     m_type = StartTag;
     m_selfClosing = false;
     m_currentAttribute = 0;
     m_attributes.clear();

     m_data.append(character);
     m_orAllData |= character;
   }

   void beginEndTag(LChar character) {
     ASSERT(m_type == Uninitialized);
     m_type = EndTag;
     m_selfClosing = false;
     m_currentAttribute = 0;
     m_attributes.clear();

     m_data.append(character);
   }

   void beginEndTag(const Vector<LChar, 32>& characters) {
     ASSERT(m_type == Uninitialized);
     m_type = EndTag;
     m_selfClosing = false;
     m_currentAttribute = 0;
     m_attributes.clear();

     m_data.appendVector(characters);
   }

   void addNewAttribute() {
     ASSERT(m_type == StartTag || m_type == EndTag);
     m_attributes.grow(m_attributes.size() + 1);
     m_currentAttribute = &m_attributes.last();
     m_currentAttribute->mutableNameRange().clear();
     m_currentAttribute->mutableValueRange().clear();
   }

   void beginAttributeName(int offset) {
     m_currentAttribute->mutableNameRange().start = offset - m_baseOffset;
     m_currentAttribute->nameRange().checkValidStart();
   }

   void endAttributeName(int offset) {
     int index = offset - m_baseOffset;
     m_currentAttribute->mutableNameRange().end = index;
     m_currentAttribute->nameRange().checkValid();
     m_currentAttribute->mutableValueRange().start = index;
     m_currentAttribute->mutableValueRange().end = index;
   }

   void beginAttributeValue(int offset) {
     m_currentAttribute->mutableValueRange().clear();
     m_currentAttribute->mutableValueRange().start = offset - m_baseOffset;
     m_currentAttribute->valueRange().checkValidStart();
   }

   void endAttributeValue(int offset) {
     m_currentAttribute->mutableValueRange().end = offset - m_baseOffset;
     m_currentAttribute->valueRange().checkValid();
   }

   void appendToAttributeName(UChar character) {
     ASSERT(character);
     ASSERT(m_type == StartTag || m_type == EndTag);
     m_currentAttribute->nameRange().checkValidStart();
     m_currentAttribute->appendToName(character);
   }

   void appendToAttributeValue(UChar character) {
     ASSERT(character);
     ASSERT(m_type == StartTag || m_type == EndTag);
     m_currentAttribute->valueRange().checkValidStart();
     m_currentAttribute->appendToValue(character);
   }

   void appendToAttributeValue(size_t i, const String& value) {
     ASSERT(!value.isEmpty());
     ASSERT(m_type == StartTag || m_type == EndTag);
     m_attributes[i].appendToValue(value);
   }

   const AttributeList& attributes() const {
     ASSERT(m_type == StartTag || m_type == EndTag);
     return m_attributes;
   }

   const Attribute* getAttributeItem(const QualifiedName& name) const {
     for (unsigned i = 0; i < m_attributes.size(); ++i) {
       if (m_attributes.at(i).name() == name.localName())
         return &m_attributes.at(i);
     }
     return 0;
   }

   // Used by the XSSAuditor to nuke XSS-laden attributes.
   void eraseValueOfAttribute(size_t i) {
     ASSERT(m_type == StartTag || m_type == EndTag);
     m_attributes[i].clearValue();
   }

   /* Character Tokens */

   // Starting a character token works slightly differently than starting
   // other types of tokens because we want to save a per-character branch.
   void ensureIsCharacterToken() {
     ASSERT(m_type == Uninitialized || m_type == Character);
     m_type = Character;
   }

   const DataVector& characters() const {
     ASSERT(m_type == Character);
     return m_data;
   }

   void appendToCharacter(char character) {
     ASSERT(m_type == Character);
     m_data.append(character);
   }

   void appendToCharacter(UChar character) {
     ASSERT(m_type == Character);
     m_data.append(character);
     m_orAllData |= character;
   }

   void appendToCharacter(const Vector<LChar, 32>& characters) {
     ASSERT(m_type == Character);
     m_data.appendVector(characters);
   }

   /* Comment Tokens */

   const DataVector& comment() const {
     ASSERT(m_type == Comment);
     return m_data;
   }

   void beginComment() {
     ASSERT(m_type == Uninitialized);
     m_type = Comment;
   }

   void appendToComment(UChar character) {
     ASSERT(character);
     ASSERT(m_type == Comment);
     m_data.append(character);
     m_orAllData |= character;
   }

   // Only for XSSAuditor
   void eraseCharacters() {
     ASSERT(m_type == Character);
     m_data.clear();
     m_orAllData = 0;
   }

  private:
   TokenType m_type;
   Attribute::Range m_range;  // Always starts at zero.
   int m_baseOffset;
   DataVector m_data;
   UChar m_orAllData;

   // For StartTag and EndTag
   bool m_selfClosing;
   AttributeList m_attributes;

   // A pointer into m_attributes used during lexing.
   Attribute* m_currentAttribute;

   // For DOCTYPE
   std::unique_ptr<DoctypeData> m_doctypeData;
 };

 #ifndef NDEBUG
 const char* toString(HTMLToken::TokenType);
 #endif

 }  // namespace blink

 #endif
	/*
	* Copyright (C) 2013 Google, Inc. All Rights Reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#ifndef HTMLToken_h
	#define HTMLToken_h

	#include "core/dom/Attribute.h"
	#include "core/html/parser/HTMLParserIdioms.h"
	#include "wtf/Forward.h"
	#include "wtf/PtrUtil.h"
	#include <memory>

	namespace blink {

	class DoctypeData {
	USING_FAST_MALLOC(DoctypeData);
	WTF_MAKE_NONCOPYABLE(DoctypeData);

	public:
	DoctypeData()
	: m_hasPublicIdentifier(false),
	m_hasSystemIdentifier(false),
	m_forceQuirks(false) {}

	bool m_hasPublicIdentifier;
	bool m_hasSystemIdentifier;
	WTF::Vector<UChar> m_publicIdentifier;
	WTF::Vector<UChar> m_systemIdentifier;
	bool m_forceQuirks;
	};

	static inline Attribute* findAttributeInVector(Vector<Attribute>& attributes,
	const QualifiedName& name) {
	for (unsigned i = 0; i < attributes.size(); ++i) {
	if (attributes.at(i).name().matches(name))
	return &attributes.at(i);
	}
	return 0;
	}

	class HTMLToken {
	WTF_MAKE_NONCOPYABLE(HTMLToken);
	USING_FAST_MALLOC(HTMLToken);

	public:
	enum TokenType {
	Uninitialized,
	DOCTYPE,
	StartTag,
	EndTag,
	Comment,
	Character,
	EndOfFile,
	};

	class Attribute {
	DISALLOW_NEW_EXCEPT_PLACEMENT_NEW();

	public:
	class Range {
	DISALLOW_NEW();

	public:
	static constexpr int kInvalidOffset = -1;

	inline void clear() {
	#if ENABLE(ASSERT)
	start = kInvalidOffset;
	end = kInvalidOffset;
	#endif
	}

	// Check Range instance that is actively being parsed.
	inline void checkValidStart() const {
	DCHECK_NE(start, kInvalidOffset);
	DCHECK_GE(start, 0);
	}

	// Check Range instance which finished parse.
	inline void checkValid() const {
	checkValidStart();
	DCHECK_NE(end, kInvalidOffset);
	DCHECK_GE(end, 0);
	DCHECK_LE(start, end);
	}

	int start;
	int end;
	};

	AtomicString name() const { return AtomicString(m_name); }
	String nameAttemptStaticStringCreation() const {
	return attemptStaticStringCreation(m_name, Likely8Bit);
	}
	const Vector<UChar, 32>& nameAsVector() const { return m_name; }

	void appendToName(UChar c) { m_name.append(c); }

	PassRefPtr<StringImpl> value8BitIfNecessary() const {
	return StringImpl::create8BitIfPossible(m_value);
	}
	String value() const { return String(m_value); }

	void appendToValue(UChar c) { m_value.append(c); }
	void appendToValue(const String& value) { value.appendTo(m_value); }
	void clearValue() { m_value.clear(); }

	const Range& nameRange() const { return m_nameRange; }
	const Range& valueRange() const { return m_valueRange; }
	Range& mutableNameRange() { return m_nameRange; }
	Range& mutableValueRange() { return m_valueRange; }

	private:
	Vector<UChar, 32> m_name;
	Vector<UChar, 32> m_value;
	Range m_nameRange;
	Range m_valueRange;
	};

	typedef Vector<Attribute, 10> AttributeList;

	// By using an inline capacity of 256, we avoid spilling over into an malloced
	// buffer approximately 99% of the time based on a non-scientific browse
	// around a number of popular web sites on 23 May 2013.
	typedef Vector<UChar, 256> DataVector;

	HTMLToken() { clear(); }

	void clear() {
	m_type = Uninitialized;
	m_range.clear();
	m_range.start = 0;
	m_baseOffset = 0;
	// Don't call Vector::clear() as that would destroy the
	// alloced VectorBuffer. If the innerHTML'd content has
	// two 257 character text nodes in a row, we'll needlessly
	// thrash malloc. When we finally finish the parse the
	// HTMLToken will be destroyed and the VectorBuffer released.
	m_data.shrink(0);
	m_orAllData = 0;
	}

	bool isUninitialized() { return m_type == Uninitialized; }
	TokenType type() const { return m_type; }

	void makeEndOfFile() {
	ASSERT(m_type == Uninitialized);
	m_type = EndOfFile;
	}

	// Range and offset methods exposed for HTMLSourceTracker and
	// HTMLViewSourceParser.
	int startIndex() const { return m_range.start; }
	int endIndex() const { return m_range.end; }

	void setBaseOffset(int offset) { m_baseOffset = offset; }

	void end(int endOffset) { m_range.end = endOffset - m_baseOffset; }

	const DataVector& data() const {
	ASSERT(m_type == Character \|\| m_type == Comment \|\| m_type == StartTag \|\|
	m_type == EndTag);
	return m_data;
	}

	bool isAll8BitData() const { return (m_orAllData <= 0xff); }

	const DataVector& name() const {
	ASSERT(m_type == StartTag \|\| m_type == EndTag \|\| m_type == DOCTYPE);
	return m_data;
	}

	void appendToName(UChar character) {
	ASSERT(m_type == StartTag \|\| m_type == EndTag \|\| m_type == DOCTYPE);
	ASSERT(character);
	m_data.append(character);
	m_orAllData \|= character;
	}

	/* DOCTYPE Tokens */

	bool forceQuirks() const {
	ASSERT(m_type == DOCTYPE);
	return m_doctypeData->m_forceQuirks;
	}

	void setForceQuirks() {
	ASSERT(m_type == DOCTYPE);
	m_doctypeData->m_forceQuirks = true;
	}

	void beginDOCTYPE() {
	ASSERT(m_type == Uninitialized);
	m_type = DOCTYPE;
	m_doctypeData = wrapUnique(new DoctypeData);
	}

	void beginDOCTYPE(UChar character) {
	ASSERT(character);
	beginDOCTYPE();
	m_data.append(character);
	m_orAllData \|= character;
	}

	// FIXME: Distinguish between a missing public identifer and an empty one.
	const WTF::Vector<UChar>& publicIdentifier() const {
	ASSERT(m_type == DOCTYPE);
	return m_doctypeData->m_publicIdentifier;
	}

	// FIXME: Distinguish between a missing system identifer and an empty one.
	const WTF::Vector<UChar>& systemIdentifier() const {
	ASSERT(m_type == DOCTYPE);
	return m_doctypeData->m_systemIdentifier;
	}

	void setPublicIdentifierToEmptyString() {
	ASSERT(m_type == DOCTYPE);
	m_doctypeData->m_hasPublicIdentifier = true;
	m_doctypeData->m_publicIdentifier.clear();
	}

	void setSystemIdentifierToEmptyString() {
	ASSERT(m_type == DOCTYPE);
	m_doctypeData->m_hasSystemIdentifier = true;
	m_doctypeData->m_systemIdentifier.clear();
	}

	void appendToPublicIdentifier(UChar character) {
	ASSERT(character);
	ASSERT(m_type == DOCTYPE);
	ASSERT(m_doctypeData->m_hasPublicIdentifier);
	m_doctypeData->m_publicIdentifier.append(character);
	}

	void appendToSystemIdentifier(UChar character) {
	ASSERT(character);
	ASSERT(m_type == DOCTYPE);
	ASSERT(m_doctypeData->m_hasSystemIdentifier);
	m_doctypeData->m_systemIdentifier.append(character);
	}

	std::unique_ptr<DoctypeData> releaseDoctypeData() {
	return std::move(m_doctypeData);
	}

	/* Start/End Tag Tokens */

	bool selfClosing() const {
	ASSERT(m_type == StartTag \|\| m_type == EndTag);
	return m_selfClosing;
	}

	void setSelfClosing() {
	ASSERT(m_type == StartTag \|\| m_type == EndTag);
	m_selfClosing = true;
	}

	void beginStartTag(UChar character) {
	ASSERT(character);
	ASSERT(m_type == Uninitialized);
	m_type = StartTag;
	m_selfClosing = false;
	m_currentAttribute = 0;
	m_attributes.clear();

	m_data.append(character);
	m_orAllData \|= character;
	}

	void beginEndTag(LChar character) {
	ASSERT(m_type == Uninitialized);
	m_type = EndTag;
	m_selfClosing = false;
	m_currentAttribute = 0;
	m_attributes.clear();

	m_data.append(character);
	}

	void beginEndTag(const Vector<LChar, 32>& characters) {
	ASSERT(m_type == Uninitialized);
	m_type = EndTag;
	m_selfClosing = false;
	m_currentAttribute = 0;
	m_attributes.clear();

	m_data.appendVector(characters);
	}

	void addNewAttribute() {
	ASSERT(m_type == StartTag \|\| m_type == EndTag);
	m_attributes.grow(m_attributes.size() + 1);
	m_currentAttribute = &m_attributes.last();
	m_currentAttribute->mutableNameRange().clear();
	m_currentAttribute->mutableValueRange().clear();
	}

	void beginAttributeName(int offset) {
	m_currentAttribute->mutableNameRange().start = offset - m_baseOffset;
	m_currentAttribute->nameRange().checkValidStart();
	}

	void endAttributeName(int offset) {
	int index = offset - m_baseOffset;
	m_currentAttribute->mutableNameRange().end = index;
	m_currentAttribute->nameRange().checkValid();
	m_currentAttribute->mutableValueRange().start = index;
	m_currentAttribute->mutableValueRange().end = index;
	}

	void beginAttributeValue(int offset) {
	m_currentAttribute->mutableValueRange().clear();
	m_currentAttribute->mutableValueRange().start = offset - m_baseOffset;
	m_currentAttribute->valueRange().checkValidStart();
	}

	void endAttributeValue(int offset) {
	m_currentAttribute->mutableValueRange().end = offset - m_baseOffset;
	m_currentAttribute->valueRange().checkValid();
	}

	void appendToAttributeName(UChar character) {
	ASSERT(character);
	ASSERT(m_type == StartTag \|\| m_type == EndTag);
	m_currentAttribute->nameRange().checkValidStart();
	m_currentAttribute->appendToName(character);
	}

	void appendToAttributeValue(UChar character) {
	ASSERT(character);
	ASSERT(m_type == StartTag \|\| m_type == EndTag);
	m_currentAttribute->valueRange().checkValidStart();
	m_currentAttribute->appendToValue(character);
	}

	void appendToAttributeValue(size_t i, const String& value) {
	ASSERT(!value.isEmpty());
	ASSERT(m_type == StartTag \|\| m_type == EndTag);
	m_attributes[i].appendToValue(value);
	}

	const AttributeList& attributes() const {
	ASSERT(m_type == StartTag \|\| m_type == EndTag);
	return m_attributes;
	}

	const Attribute* getAttributeItem(const QualifiedName& name) const {
	for (unsigned i = 0; i < m_attributes.size(); ++i) {
	if (m_attributes.at(i).name() == name.localName())
	return &m_attributes.at(i);
	}
	return 0;
	}

	// Used by the XSSAuditor to nuke XSS-laden attributes.
	void eraseValueOfAttribute(size_t i) {
	ASSERT(m_type == StartTag \|\| m_type == EndTag);
	m_attributes[i].clearValue();
	}

	/* Character Tokens */

	// Starting a character token works slightly differently than starting
	// other types of tokens because we want to save a per-character branch.
	void ensureIsCharacterToken() {
	ASSERT(m_type == Uninitialized \|\| m_type == Character);
	m_type = Character;
	}

	const DataVector& characters() const {
	ASSERT(m_type == Character);
	return m_data;
	}

	void appendToCharacter(char character) {
	ASSERT(m_type == Character);
	m_data.append(character);
	}

	void appendToCharacter(UChar character) {
	ASSERT(m_type == Character);
	m_data.append(character);
	m_orAllData \|= character;
	}

	void appendToCharacter(const Vector<LChar, 32>& characters) {
	ASSERT(m_type == Character);
	m_data.appendVector(characters);
	}

	/* Comment Tokens */

	const DataVector& comment() const {
	ASSERT(m_type == Comment);
	return m_data;
	}

	void beginComment() {
	ASSERT(m_type == Uninitialized);
	m_type = Comment;
	}

	void appendToComment(UChar character) {
	ASSERT(character);
	ASSERT(m_type == Comment);
	m_data.append(character);
	m_orAllData \|= character;
	}

	// Only for XSSAuditor
	void eraseCharacters() {
	ASSERT(m_type == Character);
	m_data.clear();
	m_orAllData = 0;
	}

	private:
	TokenType m_type;
	Attribute::Range m_range; // Always starts at zero.
	int m_baseOffset;
	DataVector m_data;
	UChar m_orAllData;

	// For StartTag and EndTag
	bool m_selfClosing;
	AttributeList m_attributes;

	// A pointer into m_attributes used during lexing.
	Attribute* m_currentAttribute;

	// For DOCTYPE
	std::unique_ptr<DoctypeData> m_doctypeData;
	};

	#ifndef NDEBUG
	const char* toString(HTMLToken::TokenType);
	#endif

	} // namespace blink

	#endif