blob: f6d1c628eea5b250c1e4ce9b18ea41444cc6df1f [file] [log] [blame]
/*
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HTMLDocumentParser_h
#define HTMLDocumentParser_h
#include "core/CoreExport.h"
#include "core/dom/ParserContentPolicy.h"
#include "core/dom/ScriptableDocumentParser.h"
#include "core/html/parser/BackgroundHTMLInputStream.h"
#include "core/html/parser/HTMLInputStream.h"
#include "core/html/parser/HTMLParserOptions.h"
#include "core/html/parser/HTMLParserReentryPermit.h"
#include "core/html/parser/HTMLPreloadScanner.h"
#include "core/html/parser/HTMLScriptRunnerHost.h"
#include "core/html/parser/HTMLSourceTracker.h"
#include "core/html/parser/HTMLToken.h"
#include "core/html/parser/HTMLTokenizer.h"
#include "core/html/parser/HTMLTreeBuilderSimulator.h"
#include "core/html/parser/ParserSynchronizationPolicy.h"
#include "core/html/parser/TextResourceDecoder.h"
#include "core/html/parser/XSSAuditor.h"
#include "core/html/parser/XSSAuditorDelegate.h"
#include "wtf/Deque.h"
#include "wtf/RefPtr.h"
#include "wtf/WeakPtr.h"
#include "wtf/text/TextPosition.h"
#include <memory>
namespace blink {
class BackgroundHTMLParser;
class CompactHTMLToken;
class Document;
class DocumentEncodingData;
class DocumentFragment;
class Element;
class HTMLDocument;
class HTMLParserScheduler;
class HTMLPreloadScanner;
class HTMLResourcePreloader;
class HTMLScriptRunner;
class HTMLTreeBuilder;
class PumpSession;
class SegmentedString;
class TokenizedChunkQueue;
class DocumentWriteEvaluator;
class CORE_EXPORT HTMLDocumentParser : public ScriptableDocumentParser,
private HTMLScriptRunnerHost {
USING_GARBAGE_COLLECTED_MIXIN(HTMLDocumentParser);
USING_PRE_FINALIZER(HTMLDocumentParser, dispose);
public:
static HTMLDocumentParser* create(
HTMLDocument& document,
ParserSynchronizationPolicy backgroundParsingPolicy) {
return new HTMLDocumentParser(document, backgroundParsingPolicy);
}
~HTMLDocumentParser() override;
DECLARE_VIRTUAL_TRACE();
// TODO(alexclarke): Remove when background parser goes away.
void dispose();
// Exposed for HTMLParserScheduler
void resumeParsingAfterYield();
static void parseDocumentFragment(
const String&,
DocumentFragment*,
Element* contextElement,
ParserContentPolicy = AllowScriptingContent);
// Exposed for testing.
HTMLScriptRunnerHost* asHTMLScriptRunnerHostForTesting() { return this; }
HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
TextPosition textPosition() const final;
bool isParsingAtLineNumber() const final;
OrdinalNumber lineNumber() const final;
void suspendScheduledTasks() final;
void resumeScheduledTasks() final;
HTMLParserReentryPermit* reentryPermit() { return m_reentryPermit.get(); }
struct TokenizedChunk {
USING_FAST_MALLOC(TokenizedChunk);
public:
std::unique_ptr<CompactHTMLTokenStream> tokens;
PreloadRequestStream preloads;
ViewportDescriptionWrapper viewport;
XSSInfoStream xssInfos;
HTMLTokenizer::State tokenizerState;
HTMLTreeBuilderSimulator::State treeBuilderState;
HTMLInputCheckpoint inputCheckpoint;
TokenPreloadScannerCheckpoint preloadScannerCheckpoint;
bool startingScript;
// Indices into |tokens|.
Vector<int> likelyDocumentWriteScriptIndices;
// Index into |tokens| of the last <meta> csp tag in |tokens|. Preloads will
// be deferred until this token is parsed. Will be noPendingToken if there
// are no csp tokens.
int pendingCSPMetaTokenIndex;
static constexpr int noPendingToken = -1;
};
void notifyPendingTokenizedChunks();
void didReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData&);
void appendBytes(const char* bytes, size_t length) override;
void flush() final;
void setDecoder(std::unique_ptr<TextResourceDecoder>) final;
protected:
void insert(const SegmentedString&) final;
void append(const String&) override;
void finish() final;
HTMLDocumentParser(HTMLDocument&, ParserSynchronizationPolicy);
HTMLDocumentParser(DocumentFragment*,
Element* contextElement,
ParserContentPolicy);
HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
void forcePlaintextForTextDocument();
private:
static HTMLDocumentParser* create(DocumentFragment* fragment,
Element* contextElement,
ParserContentPolicy parserContentPolicy) {
return new HTMLDocumentParser(fragment, contextElement,
parserContentPolicy);
}
HTMLDocumentParser(Document&,
ParserContentPolicy,
ParserSynchronizationPolicy);
// DocumentParser
void detach() final;
bool hasInsertionPoint() final;
void prepareToStopParsing() final;
void stopParsing() final;
bool isWaitingForScripts() const final;
bool isExecutingScript() const final;
void executeScriptsWaitingForResources() final;
void documentElementAvailable() override;
// HTMLScriptRunnerHost
void notifyScriptLoaded(Resource*) final;
HTMLInputStream& inputStream() final { return m_input; }
bool hasPreloadScanner() const final {
return m_preloadScanner.get() && !shouldUseThreading();
}
void appendCurrentInputStreamToPreloadScannerAndScan() final;
void startBackgroundParser();
void stopBackgroundParser();
void validateSpeculations(std::unique_ptr<TokenizedChunk> lastChunk);
void discardSpeculationsAndResumeFrom(
std::unique_ptr<TokenizedChunk> lastChunk,
std::unique_ptr<HTMLToken>,
std::unique_ptr<HTMLTokenizer>);
size_t processTokenizedChunkFromBackgroundParser(
std::unique_ptr<TokenizedChunk>);
void pumpPendingSpeculations();
bool canTakeNextToken();
void pumpTokenizer();
void pumpTokenizerIfPossible();
void constructTreeFromHTMLToken();
void constructTreeFromCompactHTMLToken(const CompactHTMLToken&);
void runScriptsForPausedTreeBuilder();
void resumeParsingAfterScriptExecution();
void attemptToEnd();
void endIfDelayed();
void attemptToRunDeferredScriptsAndEnd();
void end();
bool shouldUseThreading() const { return m_shouldUseThreading; }
bool isParsingFragment() const;
bool isScheduledForResume() const;
bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; }
bool shouldDelayEnd() const {
return inPumpSession() || isWaitingForScripts() || isScheduledForResume() ||
isExecutingScript();
}
std::unique_ptr<HTMLPreloadScanner> createPreloadScanner();
void fetchQueuedPreloads();
void evaluateAndPreloadScriptForDocumentWrite(const String& source);
// Temporary enum for the ParseHTMLOnMainThread experiment. This is used to
// annotate whether a given task should post a task or not on the main thread
// if the lookahead parser is living on the main thread.
enum LookaheadParserTaskSynchrony {
Synchronous,
Asynchronous,
};
// Setting |synchronyPolicy| to Synchronous will just call the function with
// the given parameters. Note, this method is completely temporary as we need
// to maintain both threading implementations until the ParseHTMLOnMainThread
// experiment finishes.
template <typename FunctionType, typename... Ps>
void postTaskToLookaheadParser(LookaheadParserTaskSynchrony synchronyPolicy,
FunctionType,
Ps&&... parameters);
HTMLToken& token() { return *m_token; }
HTMLParserOptions m_options;
HTMLInputStream m_input;
RefPtr<HTMLParserReentryPermit> m_reentryPermit;
std::unique_ptr<HTMLToken> m_token;
std::unique_ptr<HTMLTokenizer> m_tokenizer;
Member<HTMLScriptRunner> m_scriptRunner;
Member<HTMLTreeBuilder> m_treeBuilder;
std::unique_ptr<HTMLPreloadScanner> m_preloadScanner;
std::unique_ptr<HTMLPreloadScanner> m_insertionPreloadScanner;
std::unique_ptr<WebTaskRunner> m_loadingTaskRunner;
Member<HTMLParserScheduler> m_parserScheduler;
HTMLSourceTracker m_sourceTracker;
TextPosition m_textPosition;
XSSAuditor m_xssAuditor;
XSSAuditorDelegate m_xssAuditorDelegate;
// FIXME: m_lastChunkBeforeScript, m_tokenizer, m_token, and m_input should be
// combined into a single state object so they can be set and cleared together
// and passed between threads together.
std::unique_ptr<TokenizedChunk> m_lastChunkBeforeScript;
Deque<std::unique_ptr<TokenizedChunk>> m_speculations;
WeakPtrFactory<HTMLDocumentParser> m_weakFactory;
WeakPtr<BackgroundHTMLParser> m_backgroundParser;
Member<HTMLResourcePreloader> m_preloader;
PreloadRequestStream m_queuedPreloads;
Vector<String> m_queuedDocumentWriteScripts;
RefPtr<TokenizedChunkQueue> m_tokenizedChunkQueue;
std::unique_ptr<DocumentWriteEvaluator> m_evaluator;
// If this is non-null, then there is a meta CSP token somewhere in the
// speculation buffer. Preloads will be deferred until a token matching this
// pointer is parsed and the CSP policy is applied. Note that this pointer
// tracks the *last* meta token in the speculation buffer, so it overestimates
// how long to defer preloads. This is for simplicity, as the alternative
// would require keeping track of token positions of preload requests.
CompactHTMLToken* m_pendingCSPMetaToken;
bool m_shouldUseThreading;
bool m_endWasDelayed;
bool m_haveBackgroundParser;
bool m_tasksWereSuspended;
unsigned m_pumpSessionNestingLevel;
unsigned m_pumpSpeculationsSessionNestingLevel;
bool m_isParsingAtLineNumber;
bool m_triedLoadingLinkHeaders;
};
} // namespace blink
#endif // HTMLDocumentParser_h