blob: d8088c7b9033d83fdceee3dc391c3a4e5a50abaa [file] [log] [blame]
/*
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HTMLDocumentParser_h
#define HTMLDocumentParser_h
#include <memory>
#include "core/CoreExport.h"
#include "core/dom/ParserContentPolicy.h"
#include "core/dom/ScriptableDocumentParser.h"
#include "core/html/parser/BackgroundHTMLInputStream.h"
#include "core/html/parser/HTMLInputStream.h"
#include "core/html/parser/HTMLParserOptions.h"
#include "core/html/parser/HTMLParserReentryPermit.h"
#include "core/html/parser/HTMLParserScriptRunnerHost.h"
#include "core/html/parser/HTMLPreloadScanner.h"
#include "core/html/parser/HTMLSourceTracker.h"
#include "core/html/parser/HTMLToken.h"
#include "core/html/parser/HTMLTokenizer.h"
#include "core/html/parser/HTMLTreeBuilderSimulator.h"
#include "core/html/parser/ParserSynchronizationPolicy.h"
#include "core/html/parser/PreloadRequest.h"
#include "core/html/parser/TextResourceDecoder.h"
#include "core/html/parser/XSSAuditor.h"
#include "core/html/parser/XSSAuditorDelegate.h"
#include "platform/bindings/TraceWrapperMember.h"
#include "platform/wtf/Deque.h"
#include "platform/wtf/RefPtr.h"
#include "platform/wtf/WeakPtr.h"
#include "platform/wtf/text/TextPosition.h"
namespace blink {
class BackgroundHTMLParser;
class CompactHTMLToken;
class Document;
class DocumentEncodingData;
class DocumentFragment;
class Element;
class HTMLDocument;
class HTMLParserScheduler;
class HTMLParserScriptRunner;
class HTMLPreloadScanner;
class HTMLResourcePreloader;
class HTMLTreeBuilder;
class SegmentedString;
class TokenizedChunkQueue;
class DocumentWriteEvaluator;
class CORE_EXPORT HTMLDocumentParser : public ScriptableDocumentParser,
private HTMLParserScriptRunnerHost {
USING_GARBAGE_COLLECTED_MIXIN(HTMLDocumentParser);
USING_PRE_FINALIZER(HTMLDocumentParser, Dispose);
public:
static HTMLDocumentParser* Create(
HTMLDocument& document,
ParserSynchronizationPolicy background_parsing_policy) {
return new HTMLDocumentParser(document, background_parsing_policy);
}
~HTMLDocumentParser() override;
DECLARE_VIRTUAL_TRACE();
DECLARE_TRACE_WRAPPERS();
// TODO(alexclarke): Remove when background parser goes away.
void Dispose();
// Exposed for HTMLParserScheduler
void ResumeParsingAfterYield();
static void ParseDocumentFragment(
const String&,
DocumentFragment*,
Element* context_element,
ParserContentPolicy = kAllowScriptingContent);
// Exposed for testing.
HTMLParserScriptRunnerHost* AsHTMLParserScriptRunnerHostForTesting() {
return this;
}
HTMLTokenizer* Tokenizer() const { return tokenizer_.get(); }
TextPosition GetTextPosition() const final;
bool IsParsingAtLineNumber() const final;
OrdinalNumber LineNumber() const final;
void SuspendScheduledTasks() final;
void ResumeScheduledTasks() final;
HTMLParserReentryPermit* ReentryPermit() { return reentry_permit_.Get(); }
struct TokenizedChunk {
USING_FAST_MALLOC(TokenizedChunk);
public:
std::unique_ptr<CompactHTMLTokenStream> tokens;
PreloadRequestStream preloads;
ViewportDescriptionWrapper viewport;
XSSInfoStream xss_infos;
HTMLTokenizer::State tokenizer_state;
HTMLTreeBuilderSimulator::State tree_builder_state;
HTMLInputCheckpoint input_checkpoint;
TokenPreloadScannerCheckpoint preload_scanner_checkpoint;
bool starting_script;
// Indices into |tokens|.
Vector<int> likely_document_write_script_indices;
// Index into |tokens| of the last <meta> csp tag in |tokens|. Preloads will
// be deferred until this token is parsed. Will be noPendingToken if there
// are no csp tokens.
int pending_csp_meta_token_index;
static constexpr int kNoPendingToken = -1;
};
void NotifyPendingTokenizedChunks();
void DidReceiveEncodingDataFromBackgroundParser(const DocumentEncodingData&);
void AppendBytes(const char* bytes, size_t length) override;
void Flush() final;
void SetDecoder(std::unique_ptr<TextResourceDecoder>) final;
protected:
void insert(const SegmentedString&) final;
void Append(const String&) override;
void Finish() final;
HTMLDocumentParser(HTMLDocument&, ParserSynchronizationPolicy);
HTMLDocumentParser(DocumentFragment*,
Element* context_element,
ParserContentPolicy);
HTMLTreeBuilder* TreeBuilder() const { return tree_builder_.Get(); }
void ForcePlaintextForTextDocument();
private:
static HTMLDocumentParser* Create(DocumentFragment* fragment,
Element* context_element,
ParserContentPolicy parser_content_policy) {
return new HTMLDocumentParser(fragment, context_element,
parser_content_policy);
}
HTMLDocumentParser(Document&,
ParserContentPolicy,
ParserSynchronizationPolicy);
// DocumentParser
void Detach() final;
bool HasInsertionPoint() final;
void PrepareToStopParsing() final;
void StopParsing() final;
bool IsPaused() const {
return IsWaitingForScripts() || is_waiting_for_stylesheets_;
}
bool IsWaitingForScripts() const final;
bool IsExecutingScript() const final;
void ExecuteScriptsWaitingForResources() final;
void DidAddPendingStylesheetInBody() final;
void DidLoadAllBodyStylesheets() final;
void CheckIfBodyStylesheetAdded();
void DocumentElementAvailable() override;
// HTMLParserScriptRunnerHost
void NotifyScriptLoaded(PendingScript*) final;
HTMLInputStream& InputStream() final { return input_; }
bool HasPreloadScanner() const final {
return preload_scanner_.get() && !ShouldUseThreading();
}
void AppendCurrentInputStreamToPreloadScannerAndScan() final;
void StartBackgroundParser();
void StopBackgroundParser();
void ValidateSpeculations(std::unique_ptr<TokenizedChunk> last_chunk);
void DiscardSpeculationsAndResumeFrom(
std::unique_ptr<TokenizedChunk> last_chunk,
std::unique_ptr<HTMLToken>,
std::unique_ptr<HTMLTokenizer>);
size_t ProcessTokenizedChunkFromBackgroundParser(
std::unique_ptr<TokenizedChunk>);
void PumpPendingSpeculations();
bool CanTakeNextToken();
void PumpTokenizer();
void PumpTokenizerIfPossible();
void ConstructTreeFromHTMLToken();
void ConstructTreeFromCompactHTMLToken(const CompactHTMLToken&);
void RunScriptsForPausedTreeBuilder();
void ResumeParsingAfterPause();
void AttemptToEnd();
void EndIfDelayed();
void AttemptToRunDeferredScriptsAndEnd();
void end();
bool ShouldUseThreading() const { return should_use_threading_; }
bool IsParsingFragment() const;
bool IsScheduledForResume() const;
bool InPumpSession() const { return pump_session_nesting_level_ > 0; }
bool ShouldDelayEnd() const {
return InPumpSession() || IsPaused() || IsScheduledForResume() ||
IsExecutingScript();
}
std::unique_ptr<HTMLPreloadScanner> CreatePreloadScanner(
TokenPreloadScanner::ScannerType);
// Let the given HTMLPreloadScanner scan the input it has, and then preloads
// resources using the resulting PreloadRequests and |m_preloader|.
void ScanAndPreload(HTMLPreloadScanner*);
void FetchQueuedPreloads();
void EvaluateAndPreloadScriptForDocumentWrite(const String& source);
HTMLToken& Token() { return *token_; }
HTMLParserOptions options_;
HTMLInputStream input_;
RefPtr<HTMLParserReentryPermit> reentry_permit_;
std::unique_ptr<HTMLToken> token_;
std::unique_ptr<HTMLTokenizer> tokenizer_;
TraceWrapperMember<HTMLParserScriptRunner> script_runner_;
Member<HTMLTreeBuilder> tree_builder_;
std::unique_ptr<HTMLPreloadScanner> preload_scanner_;
// A scanner used only for input provided to the insert() method.
std::unique_ptr<HTMLPreloadScanner> insertion_preload_scanner_;
RefPtr<WebTaskRunner> loading_task_runner_;
Member<HTMLParserScheduler> parser_scheduler_;
HTMLSourceTracker source_tracker_;
TextPosition text_position_;
XSSAuditor xss_auditor_;
XSSAuditorDelegate xss_auditor_delegate_;
// FIXME: m_lastChunkBeforePause, m_tokenizer, m_token, and m_input should be
// combined into a single state object so they can be set and cleared together
// and passed between threads together.
std::unique_ptr<TokenizedChunk> last_chunk_before_pause_;
Deque<std::unique_ptr<TokenizedChunk>> speculations_;
// Using WeakPtr for GarbageCollected is discouraged. But in this case this is
// ok because HTMLDocumentParser guarantees to revoke all WeakPtrs in the pre
// finalizer.
WeakPtrFactory<HTMLDocumentParser> weak_factory_;
WeakPtr<BackgroundHTMLParser> background_parser_;
Member<HTMLResourcePreloader> preloader_;
PreloadRequestStream queued_preloads_;
Vector<String> queued_document_write_scripts_;
RefPtr<TokenizedChunkQueue> tokenized_chunk_queue_;
std::unique_ptr<DocumentWriteEvaluator> evaluator_;
// If this is non-null, then there is a meta CSP token somewhere in the
// speculation buffer. Preloads will be deferred until a token matching this
// pointer is parsed and the CSP policy is applied. Note that this pointer
// tracks the *last* meta token in the speculation buffer, so it overestimates
// how long to defer preloads. This is for simplicity, as the alternative
// would require keeping track of token positions of preload requests.
CompactHTMLToken* pending_csp_meta_token_;
TaskHandle resume_parsing_task_handle_;
bool should_use_threading_;
bool end_was_delayed_;
bool have_background_parser_;
bool tasks_were_suspended_;
unsigned pump_session_nesting_level_;
unsigned pump_speculations_session_nesting_level_;
bool is_parsing_at_line_number_;
bool tried_loading_link_headers_;
bool added_pending_stylesheet_in_body_;
bool is_waiting_for_stylesheets_;
};
} // namespace blink
#endif // HTMLDocumentParser_h