| /* |
| * Copyright (C) 2011 Adam Barth. All Rights Reserved. |
| * Copyright (C) 2011 Daniel Bates (dbates@intudata.com). |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "third_party/blink/renderer/core/html/parser/xss_auditor.h" |
| |
| #include <memory> |
| |
| #include "third_party/blink/renderer/core/dom/document.h" |
| #include "third_party/blink/renderer/core/frame/local_frame.h" |
| #include "third_party/blink/renderer/core/frame/settings.h" |
| #include "third_party/blink/renderer/core/frame/use_counter.h" |
| #include "third_party/blink/renderer/core/html/html_param_element.h" |
| #include "third_party/blink/renderer/core/html/link_rel_attribute.h" |
| #include "third_party/blink/renderer/core/html/parser/html_document_parser.h" |
| #include "third_party/blink/renderer/core/html/parser/html_parser_idioms.h" |
| #include "third_party/blink/renderer/core/html/parser/text_resource_decoder.h" |
| #include "third_party/blink/renderer/core/html/parser/xss_auditor_delegate.h" |
| #include "third_party/blink/renderer/core/html_names.h" |
| #include "third_party/blink/renderer/core/inspector/console_message.h" |
| #include "third_party/blink/renderer/core/loader/document_loader.h" |
| #include "third_party/blink/renderer/core/loader/mixed_content_checker.h" |
| #include "third_party/blink/renderer/core/svg_names.h" |
| #include "third_party/blink/renderer/core/xlink_names.h" |
| #include "third_party/blink/renderer/platform/network/encoded_form_data.h" |
| #include "third_party/blink/renderer/platform/text/decode_escape_sequences.h" |
| #include "third_party/blink/renderer/platform/wtf/ascii_ctype.h" |
| |
| namespace { |
| |
| // SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or |
| // we'd use it instead. |
| const char kURLWithUniqueOrigin[] = "data:,"; |
| |
| const char kSafeJavaScriptURL[] = "javascript:void(0)"; |
| |
| } // namespace |
| |
| namespace blink { |
| |
| using namespace html_names; |
| |
| static bool IsNonCanonicalCharacter(UChar c) { |
| // We remove all non-ASCII characters, including non-printable ASCII |
| // characters. |
| // |
| // Note, we don't remove backslashes like PHP stripslashes(), which among |
| // other things converts "\\0" to the \0 character. Instead, we remove |
| // backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). |
| // However, this has the adverse effect that we remove any legitimate zeros |
| // from a string. |
| // |
| // We also remove forward-slash, because it is common for some servers to |
| // collapse successive path components, eg, a//b becomes a/b. |
| // |
| // We also remove the questionmark character, since some severs replace |
| // invalid high-bytes with a questionmark. We are already stripping the |
| // high-bytes so we also strip the questionmark to match. |
| // |
| // We also move the percent character, since some servers strip it when |
| // there's a malformed sequence. |
| // |
| // For instance: new String("http://localhost:8000?x") => new |
| // String("http:localhost:8x"). |
| return (c == '\\' || c == '0' || c == '\0' || c == '/' || c == '?' || |
| c == '%' || c >= 127); |
| } |
| |
| static bool IsRequiredForInjection(UChar c) { |
| return (c == '\'' || c == '"' || c == '<' || c == '>'); |
| } |
| |
| static bool IsTerminatingCharacter(UChar c) { |
| return (c == '&' || c == '/' || c == '"' || c == '\'' || c == '<' || |
| c == '>' || c == ',' || c == ';'); |
| } |
| |
| static bool IsSlash(UChar c) { |
| return (c == '/' || c == '\\'); |
| } |
| |
| static bool IsHTMLQuote(UChar c) { |
| return (c == '"' || c == '\''); |
| } |
| |
| static bool IsJSNewline(UChar c) { |
| // Per ecma-262 section 7.3 Line Terminators. |
| return (c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029); |
| } |
| |
| static bool StartsHTMLOpenCommentAt(const String& string, wtf_size_t start) { |
| return (start + 3 < string.length() && string[start] == '<' && |
| string[start + 1] == '!' && string[start + 2] == '-' && |
| string[start + 3] == '-'); |
| } |
| |
| static bool StartsHTMLCloseCommentAt(const String& string, wtf_size_t start) { |
| return (start + 2 < string.length() && string[start] == '-' && |
| string[start + 1] == '-' && string[start + 2] == '>'); |
| } |
| |
| static bool StartsSingleLineCommentAt(const String& string, wtf_size_t start) { |
| return (start + 1 < string.length() && string[start] == '/' && |
| string[start + 1] == '/'); |
| } |
| |
| static bool StartsMultiLineCommentAt(const String& string, wtf_size_t start) { |
| return (start + 1 < string.length() && string[start] == '/' && |
| string[start + 1] == '*'); |
| } |
| |
| static bool StartsOpeningScriptTagAt(const String& string, wtf_size_t start) { |
| if (start + 6 >= string.length()) |
| return false; |
| // TODO(esprehn): StringView should probably have startsWith. |
| StringView script("<script"); |
| return EqualIgnoringASCIICase(StringView(string, start, script.length()), |
| script); |
| } |
| |
| static bool StartsClosingScriptTagAt(const String& string, wtf_size_t start) { |
| if (start + 7 >= string.length()) |
| return false; |
| // TODO(esprehn): StringView should probably have startsWith. |
| StringView script("</script"); |
| return EqualIgnoringASCIICase(StringView(string, start, script.length()), |
| script); |
| } |
| |
| // If other files need this, we should move this to |
| // core/html/parser/html_parser_idioms.h |
| template <wtf_size_t inlineCapacity> |
| bool ThreadSafeMatch(const Vector<UChar, inlineCapacity>& vector, |
| const QualifiedName& qname) { |
| return EqualIgnoringNullity(vector, qname.LocalName().Impl()); |
| } |
| |
| static bool HasName(const HTMLToken& token, const QualifiedName& name) { |
| return ThreadSafeMatch(token.GetName(), name); |
| } |
| |
| static bool FindAttributeWithName(const HTMLToken& token, |
| const QualifiedName& name, |
| wtf_size_t& index_of_matching_attribute) { |
| // Notice that we're careful not to ref the StringImpl here because we might |
| // be on a background thread. |
| const String& attr_name = name.NamespaceURI() == xlink_names::kNamespaceURI |
| ? "xlink:" + name.LocalName().GetString() |
| : name.LocalName().GetString(); |
| |
| for (wtf_size_t i = 0; i < token.Attributes().size(); ++i) { |
| if (EqualIgnoringNullity(token.Attributes().at(i).NameAsVector(), |
| attr_name)) { |
| index_of_matching_attribute = i; |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| static bool IsNameOfInlineEventHandler(const Vector<UChar, 32>& name) { |
| const wtf_size_t kLengthOfShortestInlineEventHandlerName = |
| 5; // To wit: oncut. |
| if (name.size() < kLengthOfShortestInlineEventHandlerName) |
| return false; |
| return name[0] == 'o' && name[1] == 'n'; |
| } |
| |
| static bool IsDangerousHTTPEquiv(const String& value) { |
| String equiv = value.StripWhiteSpace(); |
| return DeprecatedEqualIgnoringCase(equiv, "refresh") || |
| DeprecatedEqualIgnoringCase(equiv, "set-cookie"); |
| } |
| |
| static inline String Decode16BitUnicodeEscapeSequences(const String& string) { |
| // Note, the encoding is ignored since each %u-escape sequence represents a |
| // UTF-16 code unit. |
| return DecodeEscapeSequences<Unicode16BitEscapeSequence>(string, |
| UTF8Encoding()); |
| } |
| |
| static inline String DecodeStandardURLEscapeSequences( |
| const String& string, |
| const WTF::TextEncoding& encoding) { |
| // We use DecodeEscapeSequences() instead of DecodeURLEscapeSequences() |
| // (declared in weborigin/kurl.h) to avoid platform-specific URL decoding |
| // differences (e.g. KURLGoogle). |
| return DecodeEscapeSequences<URLEscapeSequence>(string, encoding); |
| } |
| |
| static String FullyDecodeString(const String& string, |
| const WTF::TextEncoding& encoding) { |
| wtf_size_t old_working_string_length; |
| String working_string = string; |
| do { |
| old_working_string_length = working_string.length(); |
| working_string = Decode16BitUnicodeEscapeSequences( |
| DecodeStandardURLEscapeSequences(working_string, encoding)); |
| } while (working_string.length() < old_working_string_length); |
| working_string.Replace('+', ' '); |
| return working_string; |
| } |
| |
| // XSSAuditor's task is to determine how much of any given content came |
| // from a reflection vs. what occurs normally on the page. It must do |
| // this in face of an attacker avoiding detection by splicing on page |
| // content in such a way as to remain syntactically valid. The next two |
| // functions apply heurisitcs to get the longest possible fragment in |
| // face of such trickery. |
| |
| static void TruncateForSrcLikeAttribute(String& decoded_snippet) { |
| // In HTTP URLs, characters in the query string (following the first ?), |
| // in the fragment (following the first #), or even in the path (typically |
| // following the third slash but subject to generous interpretation of a |
| // lack of leading slashes) may be merely ignored by an attacker's server |
| // when a remote script or script-like resource is requested. Hence these |
| // are places where organic page content may be spliced. |
| // |
| // In DATA URLS, the payload starts at the first comma, and the the first |
| // "/*", "//", or "<!--" may introduce a comment, which can then be used |
| // to splice page data harmlessly onto the end of the payload. |
| // |
| // Also, DATA URLs may use the same string literal tricks as with script |
| // content itself. In either case, content following this may come from the |
| // page and may be ignored when the script is executed. Also, any of these |
| // characters may now be represented by the (enlarged) set of html5 entities. |
| // |
| // For simplicity, we don't differentiate based on URL scheme, and stop at |
| // any of the following: |
| // - the first &, since it might be part of an entity for any of the |
| // subsequent punctuation. |
| // - the first # or ?, since the query and fragment can be ignored. |
| // - the third slash, since this typically starts the path, but account |
| // for a possible lack of leading slashes following the scheme). |
| // - the first slash, <, ', or " once a comma is seen, since we |
| // may now be in a data URL payload. |
| int slash_count = 0; |
| bool comma_seen = false; |
| bool colon_seen = false; |
| for (wtf_size_t current_length = 0, |
| remaining_length = decoded_snippet.length(); |
| remaining_length; ++current_length, --remaining_length) { |
| UChar current_char = decoded_snippet[current_length]; |
| if (current_char == ':' && !colon_seen) { |
| if (remaining_length > 1 && !IsSlash(decoded_snippet[current_length + 1])) |
| ++slash_count; |
| if (remaining_length > 2 && !IsSlash(decoded_snippet[current_length + 2])) |
| ++slash_count; |
| colon_seen = true; |
| } |
| if (current_char == '&' || current_char == '?' || current_char == '#' || |
| (IsSlash(current_char) && (comma_seen || ++slash_count > 2)) || |
| (current_char == '<' && comma_seen) || |
| (current_char == '\'' && comma_seen) || |
| (current_char == '"' && comma_seen)) { |
| decoded_snippet.Truncate(current_length); |
| return; |
| } |
| if (current_char == ',') |
| comma_seen = true; |
| } |
| } |
| |
| static void TruncateForScriptLikeAttribute(String& decoded_snippet) { |
| // Beware of trailing characters which came from the page itself, not the |
| // injected vector. Excluding the terminating character covers common cases |
| // where the page immediately ends the attribute, but doesn't cover more |
| // complex cases where there is other page data following the injection. |
| // |
| // Generally, these won't parse as javascript, so the injected vector |
| // typically excludes them from consideration via a single-line comment or |
| // by enclosing them in a string literal terminated later by the page's own |
| // closing punctuation. Since the snippet has not been parsed, the vector |
| // may also try to introduce these via entities. As a result, we'd like to |
| // stop before the first "//", the first <!--, the first entity, or the first |
| // quote not immediately following the first equals sign (taking whitespace |
| // into consideration). |
| // |
| // To keep things simpler, we don't try to distinguish between |
| // entity-introducing amperands vs. other uses, nor do we bother to check for |
| // a second slash for a comment, nor do we bother to check for !-- following a |
| // less-than sign. We stop instead on any ampersand slash, or less-than sign. |
| wtf_size_t position = 0; |
| if ((position = decoded_snippet.Find("=")) != kNotFound && |
| (position = decoded_snippet.Find(IsNotHTMLSpace<UChar>, position + 1)) != |
| kNotFound && |
| (position = decoded_snippet.Find( |
| IsTerminatingCharacter, |
| IsHTMLQuote(decoded_snippet[position]) ? position + 1 : position)) != |
| kNotFound) { |
| decoded_snippet.Truncate(position); |
| } |
| } |
| |
| static void TruncateForSemicolonSeparatedScriptLikeAttribute( |
| String& decoded_snippet) { |
| // Same as script-like attributes, but semicolons can introduce page data. |
| TruncateForScriptLikeAttribute(decoded_snippet); |
| wtf_size_t position = decoded_snippet.Find(";"); |
| if (position != kNotFound) |
| decoded_snippet.Truncate(position); |
| } |
| |
| static bool IsSemicolonSeparatedAttribute( |
| const HTMLToken::Attribute& attribute) { |
| return ThreadSafeMatch(attribute.NameAsVector(), svg_names::kValuesAttr); |
| } |
| |
| static bool IsSemicolonSeparatedValueContainingJavaScriptURL( |
| const String& value) { |
| Vector<String> value_list; |
| value.Split(';', value_list); |
| for (wtf_size_t i = 0; i < value_list.size(); ++i) { |
| String stripped = StripLeadingAndTrailingHTMLSpaces(value_list[i]); |
| if (ProtocolIsJavaScript(stripped)) |
| return true; |
| } |
| return false; |
| } |
| |
| XSSAuditor::XSSAuditor() |
| : is_enabled_(false), |
| xss_protection_(kFilterReflectedXSS), |
| did_send_valid_xss_protection_header_(false), |
| state_(kUninitialized), |
| script_tag_found_in_request_(false), |
| script_tag_nesting_level_(0), |
| encoding_(UTF8Encoding()) { |
| // Although tempting to call init() at this point, the various objects |
| // we want to reference might not all have been constructed yet. |
| } |
| |
| void XSSAuditor::InitForFragment() { |
| DCHECK(IsMainThread()); |
| DCHECK_EQ(state_, kUninitialized); |
| state_ = kFilteringTokens; |
| // When parsing a fragment, we don't enable the XSS auditor because it's |
| // too much overhead. |
| DCHECK(!is_enabled_); |
| } |
| |
| void XSSAuditor::Init(Document* document, |
| XSSAuditorDelegate* auditor_delegate) { |
| DCHECK(IsMainThread()); |
| if (state_ != kUninitialized) |
| return; |
| state_ = kFilteringTokens; |
| |
| if (Settings* settings = document->GetSettings()) |
| is_enabled_ = settings->GetXSSAuditorEnabled(); |
| |
| if (!is_enabled_) |
| return; |
| |
| document_url_ = document->Url(); |
| document_url_.RemoveFragmentIdentifier(); |
| document_url_ = document_url_.Copy(); // Make thread safe. |
| |
| // In theory, the Document could have detached from the LocalFrame after the |
| // XSSAuditor was constructed. |
| if (!document->GetFrame()) { |
| is_enabled_ = false; |
| return; |
| } |
| |
| if (document_url_.IsEmpty()) { |
| // The URL can be empty when opening a new browser window or calling |
| // window.open(""). |
| is_enabled_ = false; |
| return; |
| } |
| |
| if (document_url_.ProtocolIsData()) { |
| is_enabled_ = false; |
| return; |
| } |
| |
| if (document->Encoding().IsValid()) |
| encoding_ = document->Encoding(); |
| |
| if (DocumentLoader* document_loader = |
| document->GetFrame()->Loader().GetDocumentLoader()) { |
| const AtomicString& header_value = |
| document_loader->GetResponse().HttpHeaderField( |
| http_names::kXXSSProtection); |
| String error_details; |
| unsigned error_position = 0; |
| String report_url; |
| KURL xss_protection_report_url; |
| |
| ReflectedXSSDisposition xss_protection_header = ParseXSSProtectionHeader( |
| header_value, error_details, error_position, report_url); |
| |
| if (xss_protection_header == kAllowReflectedXSS) |
| UseCounter::Count(*document, WebFeature::kXSSAuditorDisabled); |
| else if (xss_protection_header == kFilterReflectedXSS) |
| UseCounter::Count(*document, WebFeature::kXSSAuditorEnabledFilter); |
| else if (xss_protection_header == kBlockReflectedXSS) |
| UseCounter::Count(*document, WebFeature::kXSSAuditorEnabledBlock); |
| else if (xss_protection_header == kReflectedXSSInvalid) |
| UseCounter::Count(*document, WebFeature::kXSSAuditorInvalid); |
| |
| did_send_valid_xss_protection_header_ = |
| xss_protection_header != kReflectedXSSUnset && |
| xss_protection_header != kReflectedXSSInvalid; |
| if ((xss_protection_header == kFilterReflectedXSS || |
| xss_protection_header == kBlockReflectedXSS) && |
| !report_url.IsEmpty()) { |
| xss_protection_report_url = document->CompleteURL(report_url); |
| if (MixedContentChecker::IsMixedContent(document->GetSecurityOrigin(), |
| xss_protection_report_url)) { |
| error_details = "insecure reporting URL for secure page"; |
| xss_protection_header = kReflectedXSSInvalid; |
| xss_protection_report_url = KURL(); |
| } |
| } |
| if (xss_protection_header == kReflectedXSSInvalid) { |
| document->AddConsoleMessage(ConsoleMessage::Create( |
| kSecurityMessageSource, kErrorMessageLevel, |
| "Error parsing header X-XSS-Protection: " + header_value + ": " + |
| error_details + " at character position " + |
| String::Format("%u", error_position) + |
| ". The default protections will be applied.")); |
| } |
| |
| xss_protection_ = xss_protection_header; |
| if (xss_protection_ == kReflectedXSSInvalid || |
| xss_protection_ == kReflectedXSSUnset) { |
| xss_protection_ = kBlockReflectedXSS; |
| } |
| |
| if (auditor_delegate) |
| auditor_delegate->SetReportURL(xss_protection_report_url.Copy()); |
| |
| EncodedFormData* http_body = document_loader->HttpBody(); |
| if (http_body && !http_body->IsEmpty()) |
| http_body_as_string_ = http_body->FlattenToString(); |
| } |
| |
| SetEncoding(encoding_); |
| } |
| |
| void XSSAuditor::SetEncoding(const WTF::TextEncoding& encoding) { |
| const wtf_size_t kMiniumLengthForSuffixTree = |
| 512; // FIXME: Tune this parameter. |
| const int kSuffixTreeDepth = 5; |
| |
| if (!encoding.IsValid()) |
| return; |
| |
| encoding_ = encoding; |
| |
| decoded_url_ = Canonicalize(document_url_.GetString(), kNoTruncation); |
| if (decoded_url_.Find(IsRequiredForInjection) == kNotFound) |
| decoded_url_ = String(); |
| |
| if (!http_body_as_string_.IsEmpty()) { |
| decoded_http_body_ = Canonicalize(http_body_as_string_, kNoTruncation); |
| http_body_as_string_ = String(); |
| if (decoded_http_body_.Find(IsRequiredForInjection) == kNotFound) |
| decoded_http_body_ = String(); |
| if (decoded_http_body_.length() >= kMiniumLengthForSuffixTree) { |
| decoded_http_body_suffix_tree_ = |
| std::make_unique<SuffixTree<ASCIICodebook>>(decoded_http_body_, |
| kSuffixTreeDepth); |
| } |
| } |
| |
| if (decoded_url_.IsEmpty() && decoded_http_body_.IsEmpty()) |
| is_enabled_ = false; |
| } |
| |
| std::unique_ptr<XSSInfo> XSSAuditor::FilterToken( |
| const FilterTokenRequest& request) { |
| DCHECK_NE(state_, kUninitialized); |
| if (!is_enabled_ || xss_protection_ == kAllowReflectedXSS) |
| return nullptr; |
| |
| bool did_block_script = false; |
| if (request.token.GetType() == HTMLToken::kStartTag) |
| did_block_script = FilterStartToken(request); |
| else if (script_tag_nesting_level_) { |
| if (request.token.GetType() == HTMLToken::kCharacter) |
| did_block_script = FilterCharacterToken(request); |
| else if (request.token.GetType() == HTMLToken::kEndTag) |
| FilterEndToken(request); |
| } |
| |
| if (did_block_script) { |
| bool did_block_entire_page = (xss_protection_ == kBlockReflectedXSS); |
| std::unique_ptr<XSSInfo> xss_info = |
| XSSInfo::Create(document_url_, did_block_entire_page, |
| did_send_valid_xss_protection_header_); |
| return xss_info; |
| } |
| return nullptr; |
| } |
| |
| bool XSSAuditor::FilterStartToken(const FilterTokenRequest& request) { |
| state_ = kFilteringTokens; |
| bool did_block_script = EraseDangerousAttributesIfInjected(request); |
| |
| if (HasName(request.token, kScriptTag)) { |
| did_block_script |= FilterScriptToken(request); |
| DCHECK(request.should_allow_cdata || !script_tag_nesting_level_); |
| script_tag_nesting_level_++; |
| } else if (HasName(request.token, kObjectTag)) |
| did_block_script |= FilterObjectToken(request); |
| else if (HasName(request.token, kParamTag)) |
| did_block_script |= FilterParamToken(request); |
| else if (HasName(request.token, kEmbedTag)) |
| did_block_script |= FilterEmbedToken(request); |
| else if (HasName(request.token, kIFrameTag) || |
| HasName(request.token, kFrameTag)) |
| did_block_script |= FilterFrameToken(request); |
| else if (HasName(request.token, kMetaTag)) |
| did_block_script |= FilterMetaToken(request); |
| else if (HasName(request.token, kBaseTag)) |
| did_block_script |= FilterBaseToken(request); |
| else if (HasName(request.token, kFormTag)) |
| did_block_script |= FilterFormToken(request); |
| else if (HasName(request.token, kInputTag)) |
| did_block_script |= FilterInputToken(request); |
| else if (HasName(request.token, kButtonTag)) |
| did_block_script |= FilterButtonToken(request); |
| else if (HasName(request.token, kLinkTag)) |
| did_block_script |= FilterLinkToken(request); |
| |
| return did_block_script; |
| } |
| |
| void XSSAuditor::FilterEndToken(const FilterTokenRequest& request) { |
| DCHECK(script_tag_nesting_level_); |
| state_ = kFilteringTokens; |
| if (HasName(request.token, kScriptTag)) { |
| script_tag_nesting_level_--; |
| DCHECK(request.should_allow_cdata || !script_tag_nesting_level_); |
| } |
| } |
| |
| bool XSSAuditor::FilterCharacterToken(const FilterTokenRequest& request) { |
| DCHECK(script_tag_nesting_level_); |
| DCHECK_NE(state_, kUninitialized); |
| if (state_ == kPermittingAdjacentCharacterTokens) |
| return false; |
| |
| if (state_ == kFilteringTokens && script_tag_found_in_request_) { |
| String snippet = CanonicalizedSnippetForJavaScript(request); |
| if (IsContainedInRequest(snippet)) |
| state_ = kSuppressingAdjacentCharacterTokens; |
| else if (!snippet.IsEmpty()) |
| state_ = kPermittingAdjacentCharacterTokens; |
| } |
| if (state_ == kSuppressingAdjacentCharacterTokens) { |
| request.token.EraseCharacters(); |
| // Technically, character tokens can't be empty. |
| request.token.AppendToCharacter(' '); |
| return true; |
| } |
| return false; |
| } |
| |
| bool XSSAuditor::FilterScriptToken(const FilterTokenRequest& request) { |
| DCHECK_EQ(request.token.GetType(), HTMLToken::kStartTag); |
| DCHECK(HasName(request.token, kScriptTag)); |
| |
| bool did_block_script = false; |
| script_tag_found_in_request_ = |
| IsContainedInRequest(CanonicalizedSnippetForTagName(request)); |
| if (script_tag_found_in_request_) { |
| did_block_script |= EraseAttributeIfInjected( |
| request, kSrcAttr, BlankURL().GetString(), kSrcLikeAttributeTruncation); |
| did_block_script |= EraseAttributeIfInjected(request, svg_names::kHrefAttr, |
| BlankURL().GetString(), |
| kSrcLikeAttributeTruncation); |
| did_block_script |= EraseAttributeIfInjected( |
| request, xlink_names::kHrefAttr, BlankURL().GetString(), |
| kSrcLikeAttributeTruncation); |
| } |
| return did_block_script; |
| } |
| |
| bool XSSAuditor::FilterObjectToken(const FilterTokenRequest& request) { |
| DCHECK_EQ(request.token.GetType(), HTMLToken::kStartTag); |
| DCHECK(HasName(request.token, kObjectTag)); |
| |
| bool did_block_script = false; |
| if (IsContainedInRequest(CanonicalizedSnippetForTagName(request))) { |
| did_block_script |= |
| EraseAttributeIfInjected(request, kDataAttr, BlankURL().GetString(), |
| kSrcLikeAttributeTruncation); |
| did_block_script |= EraseAttributeIfInjected(request, kTypeAttr); |
| did_block_script |= EraseAttributeIfInjected(request, kClassidAttr); |
| } |
| return did_block_script; |
| } |
| |
| bool XSSAuditor::FilterParamToken(const FilterTokenRequest& request) { |
| DCHECK_EQ(request.token.GetType(), HTMLToken::kStartTag); |
| DCHECK(HasName(request.token, kParamTag)); |
| |
| wtf_size_t index_of_name_attribute; |
| if (!FindAttributeWithName(request.token, kNameAttr, index_of_name_attribute)) |
| return false; |
| |
| const HTMLToken::Attribute& name_attribute = |
| request.token.Attributes().at(index_of_name_attribute); |
| if (!HTMLParamElement::IsURLParameter(name_attribute.Value())) |
| return false; |
| |
| return EraseAttributeIfInjected(request, kValueAttr, BlankURL().GetString(), |
| kSrcLikeAttributeTruncation); |
| } |
| |
| bool XSSAuditor::FilterEmbedToken(const FilterTokenRequest& request) { |
| DCHECK_EQ(request.token.GetType(), HTMLToken::kStartTag); |
| DCHECK(HasName(request.token, kEmbedTag)); |
| |
| bool did_block_script = false; |
| if (IsContainedInRequest(CanonicalizedSnippetForTagName(request))) { |
| did_block_script |= EraseAttributeIfInjected(request, kCodeAttr, String(), |
| kSrcLikeAttributeTruncation); |
| did_block_script |= EraseAttributeIfInjected( |
| request, kSrcAttr, BlankURL().GetString(), kSrcLikeAttributeTruncation); |
| did_block_script |= EraseAttributeIfInjected(request, kTypeAttr); |
| } |
| return did_block_script; |
| } |
| |
| bool XSSAuditor::FilterFrameToken(const FilterTokenRequest& request) { |
| DCHECK_EQ(request.token.GetType(), HTMLToken::kStartTag); |
| DCHECK(HasName(request.token, kIFrameTag) || |
| HasName(request.token, kFrameTag)); |
| |
| bool did_block_script = EraseAttributeIfInjected( |
| request, kSrcdocAttr, String(), kScriptLikeAttributeTruncation); |
| if (IsContainedInRequest(CanonicalizedSnippetForTagName(request))) |
| did_block_script |= EraseAttributeIfInjected(request, kSrcAttr, String(), |
| kSrcLikeAttributeTruncation); |
| |
| return did_block_script; |
| } |
| |
| bool XSSAuditor::FilterMetaToken(const FilterTokenRequest& request) { |
| DCHECK_EQ(request.token.GetType(), HTMLToken::kStartTag); |
| DCHECK(HasName(request.token, kMetaTag)); |
| |
| return EraseAttributeIfInjected(request, kHttpEquivAttr); |
| } |
| |
| bool XSSAuditor::FilterBaseToken(const FilterTokenRequest& request) { |
| DCHECK_EQ(request.token.GetType(), HTMLToken::kStartTag); |
| DCHECK(HasName(request.token, kBaseTag)); |
| |
| return EraseAttributeIfInjected(request, kHrefAttr, String(), |
| kSrcLikeAttributeTruncation); |
| } |
| |
| bool XSSAuditor::FilterFormToken(const FilterTokenRequest& request) { |
| DCHECK_EQ(request.token.GetType(), HTMLToken::kStartTag); |
| DCHECK(HasName(request.token, kFormTag)); |
| |
| return EraseAttributeIfInjected(request, kActionAttr, kURLWithUniqueOrigin, |
| kSrcLikeAttributeTruncation); |
| } |
| |
| bool XSSAuditor::FilterInputToken(const FilterTokenRequest& request) { |
| DCHECK_EQ(request.token.GetType(), HTMLToken::kStartTag); |
| DCHECK(HasName(request.token, kInputTag)); |
| |
| return EraseAttributeIfInjected(request, kFormactionAttr, |
| kURLWithUniqueOrigin, |
| kSrcLikeAttributeTruncation); |
| } |
| |
| bool XSSAuditor::FilterButtonToken(const FilterTokenRequest& request) { |
| DCHECK_EQ(request.token.GetType(), HTMLToken::kStartTag); |
| DCHECK(HasName(request.token, kButtonTag)); |
| |
| return EraseAttributeIfInjected(request, kFormactionAttr, |
| kURLWithUniqueOrigin, |
| kSrcLikeAttributeTruncation); |
| } |
| |
| bool XSSAuditor::FilterLinkToken(const FilterTokenRequest& request) { |
| DCHECK_EQ(request.token.GetType(), HTMLToken::kStartTag); |
| DCHECK(HasName(request.token, kLinkTag)); |
| |
| wtf_size_t index_of_attribute = 0; |
| if (!FindAttributeWithName(request.token, kRelAttr, index_of_attribute)) |
| return false; |
| |
| const HTMLToken::Attribute& attribute = |
| request.token.Attributes().at(index_of_attribute); |
| LinkRelAttribute parsed_attribute(attribute.Value()); |
| if (!parsed_attribute.IsImport()) |
| return false; |
| |
| return EraseAttributeIfInjected(request, kHrefAttr, kURLWithUniqueOrigin, |
| kSrcLikeAttributeTruncation, |
| kAllowSameOriginHref); |
| } |
| |
| bool XSSAuditor::EraseDangerousAttributesIfInjected( |
| const FilterTokenRequest& request) { |
| bool did_block_script = false; |
| for (wtf_size_t i = 0; i < request.token.Attributes().size(); ++i) { |
| bool erase_attribute = false; |
| bool value_contains_java_script_url = false; |
| const HTMLToken::Attribute& attribute = request.token.Attributes().at(i); |
| // FIXME: Don't create a new String for every attribute.value in the |
| // document. |
| if (IsNameOfInlineEventHandler(attribute.NameAsVector())) { |
| erase_attribute = IsContainedInRequest( |
| Canonicalize(SnippetFromAttribute(request, attribute), |
| kScriptLikeAttributeTruncation)); |
| } else if (IsSemicolonSeparatedAttribute(attribute)) { |
| if (IsSemicolonSeparatedValueContainingJavaScriptURL(attribute.Value())) { |
| value_contains_java_script_url = true; |
| erase_attribute = |
| IsContainedInRequest(Canonicalize( |
| NameFromAttribute(request, attribute), kNoTruncation)) && |
| IsContainedInRequest( |
| Canonicalize(SnippetFromAttribute(request, attribute), |
| kSemicolonSeparatedScriptLikeAttributeTruncation)); |
| } |
| } else if (ProtocolIsJavaScript( |
| StripLeadingAndTrailingHTMLSpaces(attribute.Value()))) { |
| value_contains_java_script_url = true; |
| erase_attribute = IsContainedInRequest( |
| Canonicalize(SnippetFromAttribute(request, attribute), |
| kScriptLikeAttributeTruncation)); |
| } |
| if (!erase_attribute) |
| continue; |
| request.token.EraseValueOfAttribute(i); |
| if (value_contains_java_script_url) |
| request.token.AppendToAttributeValue(i, kSafeJavaScriptURL); |
| did_block_script = true; |
| } |
| return did_block_script; |
| } |
| |
| bool XSSAuditor::EraseAttributeIfInjected(const FilterTokenRequest& request, |
| const QualifiedName& attribute_name, |
| const String& replacement_value, |
| TruncationKind treatment, |
| HrefRestriction restriction) { |
| wtf_size_t index_of_attribute = 0; |
| if (!FindAttributeWithName(request.token, attribute_name, index_of_attribute)) |
| return false; |
| |
| const HTMLToken::Attribute& attribute = |
| request.token.Attributes().at(index_of_attribute); |
| if (!IsContainedInRequest( |
| Canonicalize(SnippetFromAttribute(request, attribute), treatment))) |
| return false; |
| |
| if (ThreadSafeMatch(attribute_name, kSrcAttr) || |
| (restriction == kAllowSameOriginHref && |
| ThreadSafeMatch(attribute_name, kHrefAttr))) { |
| if (IsLikelySafeResource(attribute.Value())) |
| return false; |
| } else if (ThreadSafeMatch(attribute_name, kHttpEquivAttr)) { |
| if (!IsDangerousHTTPEquiv(attribute.Value())) |
| return false; |
| } |
| |
| request.token.EraseValueOfAttribute(index_of_attribute); |
| if (!replacement_value.IsEmpty()) |
| request.token.AppendToAttributeValue(index_of_attribute, replacement_value); |
| |
| return true; |
| } |
| |
| String XSSAuditor::CanonicalizedSnippetForTagName( |
| const FilterTokenRequest& request) { |
| String source = request.source_tracker.SourceForToken(request.token); |
| |
| // TODO(tsepez): fix HTMLSourceTracker not to include NULs. |
| // Beware that the source tracker may include leading NULs as part of |
| // the souce for the token. |
| unsigned start = 0; |
| for (start = 0; start < source.length() && source[start] == '\0'; ++start) |
| continue; |
| |
| // Grab a fixed number of characters equal to the length of the token's name |
| // plus one (to account for the "<"). |
| return Canonicalize( |
| source.Substring(start, request.token.GetName().size() + 1), |
| kNoTruncation); |
| } |
| |
| String XSSAuditor::NameFromAttribute(const FilterTokenRequest& request, |
| const HTMLToken::Attribute& attribute) { |
| // The range inlcudes the character which terminates the name. So, |
| // for an input of |name="value"|, the snippet is |name=|. |
| int start = attribute.NameRange().start - request.token.StartIndex(); |
| int end = attribute.ValueRange().start - request.token.StartIndex(); |
| return request.source_tracker.SourceForToken(request.token) |
| .Substring(start, end - start); |
| } |
| |
| String XSSAuditor::SnippetFromAttribute(const FilterTokenRequest& request, |
| const HTMLToken::Attribute& attribute) { |
| // The range doesn't include the character which terminates the value. So, |
| // for an input of |name="value"|, the snippet is |name="value|. For a space |
| // terminated unquoted input of |name=value |, the snippet is |name=value|. |
| // Beware of empty unquoted values at the end of a token, we need to make sure |
| // we don't clip off the equals-sign as there is no trailing space. |
| // FIXME: We should grab one character before the name also. |
| int name_start = attribute.NameRange().start - request.token.StartIndex(); |
| int value_start = attribute.ValueRange().start - request.token.StartIndex(); |
| int value_end = attribute.ValueRange().end - request.token.StartIndex(); |
| int length = value_end - name_start; |
| if (value_start == value_end) |
| length += 1; |
| return request.source_tracker.SourceForToken(request.token) |
| .Substring(name_start, length); |
| } |
| |
| String XSSAuditor::Canonicalize(String snippet, TruncationKind treatment) { |
| String decoded_snippet = FullyDecodeString(snippet, encoding_); |
| |
| if (treatment != kNoTruncation) { |
| if (decoded_snippet.length() > kMaximumFragmentLengthTarget) { |
| // Let the page influence the stopping point to avoid disclosing leading |
| // fragments. Stop when we hit whitespace, since that is unlikely to be |
| // part a leading fragment. |
| wtf_size_t position = kMaximumFragmentLengthTarget; |
| while (position < decoded_snippet.length() && |
| !IsHTMLSpace(decoded_snippet[position])) |
| ++position; |
| decoded_snippet.Truncate(position); |
| } |
| if (treatment == kSrcLikeAttributeTruncation) |
| TruncateForSrcLikeAttribute(decoded_snippet); |
| else if (treatment == kScriptLikeAttributeTruncation) |
| TruncateForScriptLikeAttribute(decoded_snippet); |
| else if (treatment == kSemicolonSeparatedScriptLikeAttributeTruncation) |
| TruncateForSemicolonSeparatedScriptLikeAttribute(decoded_snippet); |
| } |
| |
| return decoded_snippet.RemoveCharacters(&IsNonCanonicalCharacter); |
| } |
| |
| String XSSAuditor::CanonicalizedSnippetForJavaScript( |
| const FilterTokenRequest& request) { |
| String string = request.source_tracker.SourceForToken(request.token); |
| wtf_size_t start_position = 0; |
| wtf_size_t end_position = string.length(); |
| wtf_size_t found_position = kNotFound; |
| wtf_size_t last_non_space_position = kNotFound; |
| |
| // Skip over initial comments to find start of code. |
| while (start_position < end_position) { |
| while (start_position < end_position && |
| IsHTMLSpace<UChar>(string[start_position])) |
| start_position++; |
| |
| // Under SVG/XML rules, only HTML comment syntax matters and the parser |
| // returns these as a separate comment tokens. Having consumed whitespace, |
| // we need not look further for these. |
| if (request.should_allow_cdata) |
| break; |
| |
| // Under HTML rules, both the HTML and JS comment synatx matters, and the |
| // HTML comment ends at the end of the line, not with -->. |
| if (StartsHTMLOpenCommentAt(string, start_position) || |
| StartsSingleLineCommentAt(string, start_position)) { |
| while (start_position < end_position && |
| !IsJSNewline(string[start_position])) |
| start_position++; |
| } else if (StartsMultiLineCommentAt(string, start_position)) { |
| if (start_position + 2 < end_position && |
| (found_position = string.Find("*/", start_position + 2)) != kNotFound) |
| start_position = found_position + 2; |
| else |
| start_position = end_position; |
| } else |
| break; |
| } |
| |
| String result; |
| while (start_position < end_position && !result.length()) { |
| // Stop at next comment (using the same rules as above for SVG/XML vs HTML), |
| // when we encounter a comma, when we encounter a backtick, when we hit an |
| // opening <script> tag, when we encounter a HTML closing comment, or when |
| // we exceed the maximum length target. |
| // - The comma rule covers a common parameter concatenation case performed |
| // by some web servers. |
| // - The backtick rule covers the ECMA6 multi-line template string feature. |
| // - The HTML closing comment rule covers the generous interpretation in |
| // https://tc39.github.io/ecma262/#prod-annexB-HTMLCloseComment. |
| last_non_space_position = kNotFound; |
| for (found_position = start_position; found_position < end_position; |
| found_position++) { |
| if (StartsSingleLineCommentAt(string, found_position) || |
| StartsMultiLineCommentAt(string, found_position)) { |
| break; |
| } |
| if (!request.should_allow_cdata) { |
| if (StartsHTMLOpenCommentAt(string, found_position) || |
| StartsHTMLCloseCommentAt(string, found_position)) { |
| break; |
| } |
| } |
| if (string[found_position] == ',' || string[found_position] == '`') |
| break; |
| |
| if (last_non_space_position != kNotFound && |
| (StartsOpeningScriptTagAt(string, found_position) || |
| StartsClosingScriptTagAt(string, found_position))) { |
| found_position = last_non_space_position + 1; |
| break; |
| } |
| if (found_position > start_position + kMaximumFragmentLengthTarget) { |
| // After hitting the length target, we can only stop at a point where we |
| // know we are not in the middle of a %-escape sequence. For the sake of |
| // simplicity, approximate not stopping inside a (possibly multiply |
| // encoded) %-escape sequence by breaking on whitespace only. We should |
| // have enough text in these cases to avoid false positives. |
| if (IsHTMLSpace<UChar>(string[found_position])) |
| break; |
| } |
| if (!IsHTMLSpace<UChar>(string[found_position])) |
| last_non_space_position = found_position; |
| } |
| result = Canonicalize( |
| string.Substring(start_position, found_position - start_position), |
| kNoTruncation); |
| start_position = found_position + 1; |
| } |
| |
| return result; |
| } |
| |
| bool XSSAuditor::IsContainedInRequest(const String& decoded_snippet) { |
| if (decoded_snippet.IsEmpty()) |
| return false; |
| if (decoded_url_.FindIgnoringCase(decoded_snippet, 0) != kNotFound) |
| return true; |
| if (decoded_http_body_suffix_tree_ && |
| !decoded_http_body_suffix_tree_->MightContain(decoded_snippet)) |
| return false; |
| return decoded_http_body_.FindIgnoringCase(decoded_snippet, 0) != kNotFound; |
| } |
| |
| bool XSSAuditor::IsLikelySafeResource(const String& url) { |
| // Give empty URLs and about:blank a pass. Making a resourceURL from an |
| // empty string below will likely later fail the "no query args test" as |
| // it inherits the document's query args. |
| if (url.IsEmpty() || url == BlankURL().GetString()) |
| return true; |
| |
| // If the resource is loaded from the same host as the enclosing page, it's |
| // probably not an XSS attack, so we reduce false positives by allowing the |
| // request, ignoring scheme and port considerations. If the resource has a |
| // query string, we're more suspicious, however, because that's pretty rare |
| // and the attacker might be able to trick a server-side script into doing |
| // something dangerous with the query string. |
| if (document_url_.Host().IsEmpty()) |
| return false; |
| |
| KURL resource_url(document_url_, url); |
| return (document_url_.Host() == resource_url.Host() && |
| resource_url.Query().IsEmpty()); |
| } |
| |
| bool XSSAuditor::IsSafeToSendToAnotherThread() const { |
| return document_url_.IsSafeToSendToAnotherThread() && |
| decoded_url_.IsSafeToSendToAnotherThread() && |
| decoded_http_body_.IsSafeToSendToAnotherThread() && |
| http_body_as_string_.IsSafeToSendToAnotherThread(); |
| } |
| |
| } // namespace blink |