| /* |
| * Copyright (C) 2013 Google, Inc. All Rights Reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "core/html/parser/BackgroundHTMLParser.h" |
| |
| #include "core/HTMLNames.h" |
| #include "core/html/parser/HTMLDocumentParser.h" |
| #include "core/html/parser/TextResourceDecoder.h" |
| #include "core/html/parser/XSSAuditor.h" |
| #include "platform/CrossThreadFunctional.h" |
| #include "platform/Histogram.h" |
| #include "platform/TraceEvent.h" |
| #include "public/platform/Platform.h" |
| #include "public/platform/WebTaskRunner.h" |
| #include "wtf/CurrentTime.h" |
| #include "wtf/Functional.h" |
| #include "wtf/PtrUtil.h" |
| #include "wtf/text/TextPosition.h" |
| #include <memory> |
| |
| namespace blink { |
| |
| // On a network with high latency and high bandwidth, using a device with a fast |
| // CPU, we could end up speculatively tokenizing the whole document, well ahead |
| // of when the main-thread actually needs it. This is a waste of memory (and |
| // potentially time if the speculation fails). So we limit our outstanding |
| // tokens arbitrarily to 10,000. Our maximal memory spent speculating will be |
| // approximately: |
| // (defaultOutstandingTokenLimit + defaultPendingTokenLimit) * |
| // sizeof(CompactToken) |
| // |
| // We use a separate low and high water mark to avoid |
| // constantly topping off the main thread's token buffer. At time of writing, |
| // this is (10000 + 1000) * 28 bytes = ~308kb of memory. These numbers have not |
| // been tuned. |
| static const size_t defaultOutstandingTokenLimit = 10000; |
| |
| // We limit our chucks to 1000 tokens, to make sure the main thread is never |
| // waiting on the parser thread for tokens. This was tuned in |
| // https://bugs.webkit.org/show_bug.cgi?id=110408. |
| static const size_t defaultPendingTokenLimit = 1000; |
| |
| using namespace HTMLNames; |
| |
| #if ENABLE(ASSERT) |
| |
| static void checkThatTokensAreSafeToSendToAnotherThread( |
| const CompactHTMLTokenStream* tokens) { |
| for (size_t i = 0; i < tokens->size(); ++i) |
| ASSERT(tokens->at(i).isSafeToSendToAnotherThread()); |
| } |
| |
| static void checkThatPreloadsAreSafeToSendToAnotherThread( |
| const PreloadRequestStream& preloads) { |
| for (size_t i = 0; i < preloads.size(); ++i) |
| ASSERT(preloads[i]->isSafeToSendToAnotherThread()); |
| } |
| |
| static void checkThatXSSInfosAreSafeToSendToAnotherThread( |
| const XSSInfoStream& infos) { |
| for (size_t i = 0; i < infos.size(); ++i) |
| ASSERT(infos[i]->isSafeToSendToAnotherThread()); |
| } |
| |
| #endif |
| |
| WeakPtr<BackgroundHTMLParser> BackgroundHTMLParser::create( |
| std::unique_ptr<Configuration> config, |
| std::unique_ptr<WebTaskRunner> loadingTaskRunner) { |
| auto* backgroundParser = |
| new BackgroundHTMLParser(std::move(config), std::move(loadingTaskRunner)); |
| return backgroundParser->m_weakFactory.createWeakPtr(); |
| } |
| |
| void BackgroundHTMLParser::init( |
| const KURL& documentURL, |
| std::unique_ptr<CachedDocumentParameters> cachedDocumentParameters, |
| const MediaValuesCached::MediaValuesCachedData& mediaValuesCachedData) { |
| m_preloadScanner.reset(new TokenPreloadScanner( |
| documentURL, std::move(cachedDocumentParameters), mediaValuesCachedData)); |
| } |
| |
| BackgroundHTMLParser::Configuration::Configuration() |
| : outstandingTokenLimit(defaultOutstandingTokenLimit), |
| pendingTokenLimit(defaultPendingTokenLimit), |
| shouldCoalesceChunks(false) {} |
| |
| BackgroundHTMLParser::BackgroundHTMLParser( |
| std::unique_ptr<Configuration> config, |
| std::unique_ptr<WebTaskRunner> loadingTaskRunner) |
| : m_weakFactory(this), |
| m_token(wrapUnique(new HTMLToken)), |
| m_tokenizer(HTMLTokenizer::create(config->options)), |
| m_treeBuilderSimulator(config->options), |
| m_options(config->options), |
| m_outstandingTokenLimit(config->outstandingTokenLimit), |
| m_parser(config->parser), |
| m_pendingTokens(wrapUnique(new CompactHTMLTokenStream)), |
| m_pendingTokenLimit(config->pendingTokenLimit), |
| m_xssAuditor(std::move(config->xssAuditor)), |
| m_decoder(std::move(config->decoder)), |
| m_loadingTaskRunner(std::move(loadingTaskRunner)), |
| m_tokenizedChunkQueue(config->tokenizedChunkQueue.release()), |
| m_pendingCSPMetaTokenIndex( |
| HTMLDocumentParser::TokenizedChunk::noPendingToken), |
| m_startingScript(false), |
| m_lastBytesReceivedTime(0.0), |
| m_shouldCoalesceChunks(config->shouldCoalesceChunks) { |
| ASSERT(m_outstandingTokenLimit > 0); |
| ASSERT(m_pendingTokenLimit > 0); |
| ASSERT(m_outstandingTokenLimit >= m_pendingTokenLimit); |
| } |
| |
| BackgroundHTMLParser::~BackgroundHTMLParser() {} |
| |
| void BackgroundHTMLParser::appendRawBytesFromMainThread( |
| std::unique_ptr<Vector<char>> buffer, |
| double bytesReceivedTime) { |
| ASSERT(m_decoder); |
| m_lastBytesReceivedTime = bytesReceivedTime; |
| DEFINE_STATIC_LOCAL(CustomCountHistogram, queueDelay, |
| ("Parser.AppendBytesDelay", 1, 5000, 50)); |
| queueDelay.count(monotonicallyIncreasingTimeMS() - bytesReceivedTime); |
| updateDocument(m_decoder->decode(buffer->data(), buffer->size())); |
| } |
| |
| void BackgroundHTMLParser::appendDecodedBytes(const String& input) { |
| ASSERT(!m_input.current().isClosed()); |
| m_input.append(input); |
| pumpTokenizer(); |
| } |
| |
| void BackgroundHTMLParser::setDecoder( |
| std::unique_ptr<TextResourceDecoder> decoder) { |
| ASSERT(decoder); |
| m_decoder = std::move(decoder); |
| } |
| |
| void BackgroundHTMLParser::flush() { |
| ASSERT(m_decoder); |
| updateDocument(m_decoder->flush()); |
| } |
| |
| void BackgroundHTMLParser::updateDocument(const String& decodedData) { |
| DocumentEncodingData encodingData(*m_decoder.get()); |
| |
| if (encodingData != m_lastSeenEncodingData) { |
| m_lastSeenEncodingData = encodingData; |
| |
| m_xssAuditor->setEncoding(encodingData.encoding()); |
| runOnMainThread( |
| &HTMLDocumentParser::didReceiveEncodingDataFromBackgroundParser, |
| m_parser, encodingData); |
| } |
| |
| if (decodedData.isEmpty()) |
| return; |
| |
| appendDecodedBytes(decodedData); |
| } |
| |
| void BackgroundHTMLParser::resumeFrom(std::unique_ptr<Checkpoint> checkpoint) { |
| m_parser = checkpoint->parser; |
| m_token = std::move(checkpoint->token); |
| m_tokenizer = std::move(checkpoint->tokenizer); |
| m_treeBuilderSimulator.setState(checkpoint->treeBuilderState); |
| m_input.rewindTo(checkpoint->inputCheckpoint, checkpoint->unparsedInput); |
| m_preloadScanner->rewindTo(checkpoint->preloadScannerCheckpoint); |
| m_startingScript = false; |
| m_tokenizedChunkQueue->clear(); |
| m_lastBytesReceivedTime = monotonicallyIncreasingTimeMS(); |
| pumpTokenizer(); |
| } |
| |
| void BackgroundHTMLParser::startedChunkWithCheckpoint( |
| HTMLInputCheckpoint inputCheckpoint) { |
| // Note, we should not have to worry about the index being invalid as messages |
| // from the main thread will be processed in FIFO order. |
| m_input.invalidateCheckpointsBefore(inputCheckpoint); |
| pumpTokenizer(); |
| } |
| |
| void BackgroundHTMLParser::finish() { |
| markEndOfFile(); |
| pumpTokenizer(); |
| } |
| |
| void BackgroundHTMLParser::stop() { |
| delete this; |
| } |
| |
| void BackgroundHTMLParser::forcePlaintextForTextDocument() { |
| // This is only used by the TextDocumentParser (a subclass of |
| // HTMLDocumentParser) to force us into the PLAINTEXT state w/o using a |
| // <plaintext> tag. The TextDocumentParser uses a <pre> tag for historical / |
| // compatibility reasons. |
| m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); |
| } |
| |
| void BackgroundHTMLParser::markEndOfFile() { |
| ASSERT(!m_input.current().isClosed()); |
| m_input.append(String(&kEndOfFileMarker, 1)); |
| m_input.close(); |
| } |
| |
| void BackgroundHTMLParser::pumpTokenizer() { |
| TRACE_EVENT0("loading", "BackgroundHTMLParser::pumpTokenizer"); |
| HTMLTreeBuilderSimulator::SimulatedToken simulatedToken = |
| HTMLTreeBuilderSimulator::OtherToken; |
| |
| // No need to start speculating until the main thread has almost caught up. |
| if (m_input.totalCheckpointTokenCount() > m_outstandingTokenLimit) |
| return; |
| |
| bool shouldNotifyMainThread = false; |
| while (true) { |
| if (m_xssAuditor->isEnabled()) |
| m_sourceTracker.start(m_input.current(), m_tokenizer.get(), *m_token); |
| |
| if (!m_tokenizer->nextToken(m_input.current(), *m_token)) { |
| // We've reached the end of our current input. |
| shouldNotifyMainThread |= queueChunkForMainThread(); |
| break; |
| } |
| |
| if (m_xssAuditor->isEnabled()) |
| m_sourceTracker.end(m_input.current(), m_tokenizer.get(), *m_token); |
| |
| { |
| TextPosition position = TextPosition(m_input.current().currentLine(), |
| m_input.current().currentColumn()); |
| |
| if (std::unique_ptr<XSSInfo> xssInfo = m_xssAuditor->filterToken( |
| FilterTokenRequest(*m_token, m_sourceTracker, |
| m_tokenizer->shouldAllowCDATA()))) { |
| xssInfo->m_textPosition = position; |
| m_pendingXSSInfos.append(std::move(xssInfo)); |
| } |
| |
| CompactHTMLToken token(m_token.get(), position); |
| |
| bool shouldEvaluateForDocumentWrite = false; |
| bool isCSPMetaTag = false; |
| m_preloadScanner->scan(token, m_input.current(), m_pendingPreloads, |
| &m_viewportDescription, &isCSPMetaTag, |
| &shouldEvaluateForDocumentWrite); |
| |
| simulatedToken = |
| m_treeBuilderSimulator.simulate(token, m_tokenizer.get()); |
| |
| // Break chunks before a script tag is inserted and flag the chunk as |
| // starting a script so the main parser can decide if it should yield |
| // before processing the chunk. |
| if (simulatedToken == HTMLTreeBuilderSimulator::ScriptStart) { |
| shouldNotifyMainThread |= queueChunkForMainThread(); |
| m_startingScript = true; |
| } |
| |
| m_pendingTokens->append(token); |
| if (isCSPMetaTag) { |
| m_pendingCSPMetaTokenIndex = m_pendingTokens->size() - 1; |
| } |
| if (shouldEvaluateForDocumentWrite) { |
| m_likelyDocumentWriteScriptIndices.append(m_pendingTokens->size() - 1); |
| } |
| } |
| |
| m_token->clear(); |
| |
| if (simulatedToken == HTMLTreeBuilderSimulator::ScriptEnd || |
| m_pendingTokens->size() >= m_pendingTokenLimit) { |
| shouldNotifyMainThread |= queueChunkForMainThread(); |
| // If we're far ahead of the main thread, yield for a bit to avoid |
| // consuming too much memory. |
| if (m_input.totalCheckpointTokenCount() > m_outstandingTokenLimit) |
| break; |
| } |
| |
| if (!m_shouldCoalesceChunks && shouldNotifyMainThread) { |
| runOnMainThread(&HTMLDocumentParser::notifyPendingTokenizedChunks, |
| m_parser); |
| shouldNotifyMainThread = false; |
| } |
| } |
| // Wait to notify the main thread about the chunks until we're at the limit. |
| // This lets the background parser generate lots of valuable preloads before |
| // anything expensive (extensions, scripts) take up time on the main thread. A |
| // busy main thread can cause preload delays. |
| if (shouldNotifyMainThread) { |
| runOnMainThread(&HTMLDocumentParser::notifyPendingTokenizedChunks, |
| m_parser); |
| } |
| } |
| |
| bool BackgroundHTMLParser::queueChunkForMainThread() { |
| if (m_pendingTokens->isEmpty()) |
| return false; |
| |
| #if ENABLE(ASSERT) |
| checkThatTokensAreSafeToSendToAnotherThread(m_pendingTokens.get()); |
| checkThatPreloadsAreSafeToSendToAnotherThread(m_pendingPreloads); |
| checkThatXSSInfosAreSafeToSendToAnotherThread(m_pendingXSSInfos); |
| #endif |
| |
| double chunkStartTime = monotonicallyIncreasingTimeMS(); |
| std::unique_ptr<HTMLDocumentParser::TokenizedChunk> chunk = |
| wrapUnique(new HTMLDocumentParser::TokenizedChunk); |
| TRACE_EVENT_WITH_FLOW0("blink,loading", |
| "BackgroundHTMLParser::sendTokensToMainThread", |
| chunk.get(), TRACE_EVENT_FLAG_FLOW_OUT); |
| |
| if (!m_pendingPreloads.isEmpty()) { |
| double delay = monotonicallyIncreasingTimeMS() - m_lastBytesReceivedTime; |
| DEFINE_STATIC_LOCAL(CustomCountHistogram, preloadTokenizeDelay, |
| ("Parser.PreloadTokenizeDelay", 1, 10000, 50)); |
| preloadTokenizeDelay.count(delay); |
| } |
| |
| chunk->preloads.swap(m_pendingPreloads); |
| if (m_viewportDescription.set) |
| chunk->viewport = m_viewportDescription; |
| chunk->xssInfos.swap(m_pendingXSSInfos); |
| chunk->tokenizerState = m_tokenizer->getState(); |
| chunk->treeBuilderState = m_treeBuilderSimulator.state(); |
| chunk->inputCheckpoint = m_input.createCheckpoint(m_pendingTokens->size()); |
| chunk->preloadScannerCheckpoint = m_preloadScanner->createCheckpoint(); |
| chunk->tokens = std::move(m_pendingTokens); |
| chunk->startingScript = m_startingScript; |
| chunk->likelyDocumentWriteScriptIndices.swap( |
| m_likelyDocumentWriteScriptIndices); |
| chunk->pendingCSPMetaTokenIndex = m_pendingCSPMetaTokenIndex; |
| m_startingScript = false; |
| m_pendingCSPMetaTokenIndex = |
| HTMLDocumentParser::TokenizedChunk::noPendingToken; |
| |
| bool isEmpty = m_tokenizedChunkQueue->enqueue(std::move(chunk)); |
| |
| DEFINE_STATIC_LOCAL(CustomCountHistogram, chunkEnqueueTime, |
| ("Parser.ChunkEnqueueTime", 1, 10000, 50)); |
| chunkEnqueueTime.count(monotonicallyIncreasingTimeMS() - chunkStartTime); |
| |
| m_pendingTokens = wrapUnique(new CompactHTMLTokenStream); |
| return isEmpty; |
| } |
| |
| // If the background parser is already running on the main thread, then it is |
| // not necessary to post a task to the main thread to run asynchronously. The |
| // main parser deals with chunking up its own work. |
| // TODO(csharrison): This is a pretty big hack because we don't actually need a |
| // CrossThreadClosure in these cases. This is just experimental. |
| template <typename FunctionType, typename... Ps> |
| void BackgroundHTMLParser::runOnMainThread(FunctionType function, |
| Ps&&... parameters) { |
| if (isMainThread()) { |
| (*WTF::bind(function, std::forward<Ps>(parameters)...))(); |
| } else { |
| m_loadingTaskRunner->postTask( |
| BLINK_FROM_HERE, |
| crossThreadBind(function, std::forward<Ps>(parameters)...)); |
| } |
| } |
| |
| } // namespace blink |