| // Copyright 2014 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/policy/core/browser/url_blacklist_manager.h" |
| |
| #include <stdint.h> |
| |
| #include <algorithm> |
| #include <limits> |
| #include <set> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/bind.h" |
| #include "base/files/file_path.h" |
| #include "base/location.h" |
| #include "base/logging.h" |
| #include "base/sequenced_task_runner.h" |
| #include "base/single_thread_task_runner.h" |
| #include "base/stl_util.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_util.h" |
| #include "base/task/post_task.h" |
| #include "base/task_runner_util.h" |
| #include "base/threading/sequenced_task_runner_handle.h" |
| #include "base/values.h" |
| #include "components/policy/core/browser/url_blacklist_policy_handler.h" |
| #include "components/policy/core/common/policy_pref_names.h" |
| #include "components/pref_registry/pref_registry_syncable.h" |
| #include "components/prefs/pref_service.h" |
| #include "components/url_formatter/url_fixer.h" |
| #include "net/base/filename_util.h" |
| #include "net/base/net_errors.h" |
| #include "url/third_party/mozilla/url_parse.h" |
| #include "url/url_constants.h" |
| #include "url/url_util.h" |
| |
| using url_matcher::URLMatcher; |
| using url_matcher::URLMatcherCondition; |
| using url_matcher::URLMatcherConditionFactory; |
| using url_matcher::URLMatcherConditionSet; |
| using url_matcher::URLMatcherPortFilter; |
| using url_matcher::URLMatcherSchemeFilter; |
| using url_matcher::URLQueryElementMatcherCondition; |
| |
| namespace policy { |
| |
| namespace { |
| |
| // List of schemes of URLs that should not be blocked by the "*" wildcard in |
| // the blacklist. Note that URLs with these schemes can still be blocked with |
| // a more specific filter e.g. "chrome-extension://*". |
| // The schemes are hardcoded here to avoid dependencies on //extensions and |
| // //chrome. |
| const char* kBypassBlacklistWildcardForSchemes[] = { |
| // For internal extension URLs e.g. the Bookmark Manager and the File |
| // Manager on Chrome OS. |
| "chrome-extension", |
| |
| // NTP on Android. |
| "chrome-native", |
| |
| // NTP on other platforms. |
| "chrome-search", |
| }; |
| |
| // Maximum filters per policy. Filters over this index are ignored. |
| const size_t kMaxFiltersPerPolicy = 1000; |
| |
| // Returns a blacklist based on the given |block| and |allow| pattern lists. |
| std::unique_ptr<URLBlacklist> BuildBlacklist(const base::ListValue* block, |
| const base::ListValue* allow) { |
| auto blacklist = std::make_unique<URLBlacklist>(); |
| blacklist->Block(block); |
| blacklist->Allow(allow); |
| return blacklist; |
| } |
| |
| // Tokenise the parameter |query| and add appropriate query element matcher |
| // conditions to the |query_conditions|. |
| void ProcessQueryToConditions( |
| url_matcher::URLMatcherConditionFactory* condition_factory, |
| const std::string& query, |
| bool allow, |
| std::set<URLQueryElementMatcherCondition>* query_conditions) { |
| url::Component query_left = url::MakeRange(0, query.length()); |
| url::Component key; |
| url::Component value; |
| // Depending on the filter type being black-list or white-list, the matcher |
| // choose any or every match. The idea is a URL should be black-listed if |
| // there is any occurrence of the key value pair. It should be white-listed |
| // only if every occurrence of the key is followed by the value. This avoids |
| // situations such as a user appending a white-listed video parameter in the |
| // end of the query and watching a video of their choice (the last parameter |
| // is ignored by some web servers like youtube's). |
| URLQueryElementMatcherCondition::Type match_type = |
| allow ? URLQueryElementMatcherCondition::MATCH_ALL |
| : URLQueryElementMatcherCondition::MATCH_ANY; |
| |
| while (ExtractQueryKeyValue(query.data(), &query_left, &key, &value)) { |
| URLQueryElementMatcherCondition::QueryElementType query_element_type = |
| value.len ? URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY_VALUE |
| : URLQueryElementMatcherCondition::ELEMENT_TYPE_KEY; |
| URLQueryElementMatcherCondition::QueryValueMatchType query_value_match_type; |
| if (!value.len && key.len && query[key.end() - 1] == '*') { |
| --key.len; |
| query_value_match_type = |
| URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX; |
| } else if (value.len && query[value.end() - 1] == '*') { |
| --value.len; |
| query_value_match_type = |
| URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_PREFIX; |
| } else { |
| query_value_match_type = |
| URLQueryElementMatcherCondition::QUERY_VALUE_MATCH_EXACT; |
| } |
| query_conditions->insert( |
| URLQueryElementMatcherCondition(query.substr(key.begin, key.len), |
| query.substr(value.begin, value.len), |
| query_value_match_type, |
| query_element_type, |
| match_type, |
| condition_factory)); |
| } |
| } |
| |
| bool BypassBlacklistWildcardForURL(const GURL& url) { |
| const std::string& scheme = url.scheme(); |
| for (size_t i = 0; i < base::size(kBypassBlacklistWildcardForSchemes); ++i) { |
| if (scheme == kBypassBlacklistWildcardForSchemes[i]) |
| return true; |
| } |
| return false; |
| } |
| |
| } // namespace |
| |
| struct URLBlacklist::FilterComponents { |
| FilterComponents() : port(0), match_subdomains(true), allow(true) {} |
| ~FilterComponents() = default; |
| FilterComponents(const FilterComponents&) = delete; |
| FilterComponents(FilterComponents&&) = default; |
| FilterComponents& operator=(const FilterComponents&) = delete; |
| FilterComponents& operator=(FilterComponents&&) = default; |
| |
| // Returns true if |this| represents the "*" filter in the blacklist. |
| bool IsBlacklistWildcard() const { |
| return !allow && host.empty() && scheme.empty() && path.empty() && |
| query.empty() && port == 0 && number_of_key_value_pairs == 0 && |
| match_subdomains; |
| } |
| |
| std::string scheme; |
| std::string host; |
| uint16_t port; |
| std::string path; |
| std::string query; |
| int number_of_key_value_pairs; |
| bool match_subdomains; |
| bool allow; |
| }; |
| |
| URLBlacklist::URLBlacklist() : id_(0), url_matcher_(new URLMatcher) {} |
| |
| URLBlacklist::~URLBlacklist() {} |
| |
| void URLBlacklist::AddFilters(bool allow, const base::ListValue* list) { |
| URLMatcherConditionSet::Vector all_conditions; |
| size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize()); |
| std::string pattern; |
| scoped_refptr<URLMatcherConditionSet> condition_set; |
| for (size_t i = 0; i < size; ++i) { |
| bool success = list->GetString(i, &pattern); |
| DCHECK(success); |
| FilterComponents components; |
| components.allow = allow; |
| if (!FilterToComponents(pattern, |
| &components.scheme, |
| &components.host, |
| &components.match_subdomains, |
| &components.port, |
| &components.path, |
| &components.query)) { |
| LOG(ERROR) << "Invalid pattern " << pattern; |
| continue; |
| } |
| |
| condition_set = CreateConditionSet( |
| url_matcher_.get(), ++id_, components.scheme, components.host, |
| components.match_subdomains, components.port, components.path, |
| components.query, allow); |
| components.number_of_key_value_pairs = |
| condition_set->query_conditions().size(); |
| all_conditions.push_back(std::move(condition_set)); |
| filters_[id_] = std::move(components); |
| } |
| url_matcher_->AddConditionSets(all_conditions); |
| } |
| |
| void URLBlacklist::Block(const base::ListValue* filters) { |
| AddFilters(false, filters); |
| } |
| |
| void URLBlacklist::Allow(const base::ListValue* filters) { |
| AddFilters(true, filters); |
| } |
| |
| bool URLBlacklist::IsURLBlocked(const GURL& url) const { |
| return URLBlacklist::GetURLBlacklistState(url) == |
| URLBlacklist::URLBlacklistState::URL_IN_BLACKLIST; |
| } |
| |
| URLBlacklist::URLBlacklistState URLBlacklist::GetURLBlacklistState( |
| const GURL& url) const { |
| std::set<URLMatcherConditionSet::ID> matching_ids = |
| url_matcher_->MatchURL(url); |
| |
| const FilterComponents* max = nullptr; |
| for (auto id = matching_ids.begin(); id != matching_ids.end(); ++id) { |
| auto it = filters_.find(*id); |
| DCHECK(it != filters_.end()); |
| const FilterComponents& filter = it->second; |
| if (!max || FilterTakesPrecedence(filter, *max)) |
| max = &filter; |
| } |
| |
| // Default neutral. |
| if (!max) |
| return URLBlacklist::URLBlacklistState::URL_NEUTRAL_STATE; |
| |
| // Some of the internal Chrome URLs are not affected by the "*" in the |
| // blacklist. Note that the "*" is the lowest priority filter possible, so |
| // any higher priority filter will be applied first. |
| if (max->IsBlacklistWildcard() && BypassBlacklistWildcardForURL(url)) |
| return URLBlacklist::URLBlacklistState::URL_IN_WHITELIST; |
| |
| return max->allow ? |
| URLBlacklist::URLBlacklistState::URL_IN_WHITELIST : |
| URLBlacklist::URLBlacklistState::URL_IN_BLACKLIST; |
| } |
| |
| size_t URLBlacklist::Size() const { |
| return filters_.size(); |
| } |
| |
| // static |
| bool URLBlacklist::FilterToComponents(const std::string& filter, |
| std::string* scheme, |
| std::string* host, |
| bool* match_subdomains, |
| uint16_t* port, |
| std::string* path, |
| std::string* query) { |
| DCHECK(scheme); |
| DCHECK(host); |
| DCHECK(match_subdomains); |
| DCHECK(port); |
| DCHECK(path); |
| DCHECK(query); |
| url::Parsed parsed; |
| const std::string lc_filter = base::ToLowerASCII(filter); |
| const std::string url_scheme = url_formatter::SegmentURL(filter, &parsed); |
| |
| // Check if it's a scheme wildcard pattern. We support both versions |
| // (scheme:* and scheme://*) the later being consistent with old filter |
| // definitions. |
| if (lc_filter == url_scheme + ":*" || lc_filter == url_scheme + "://*") { |
| scheme->assign(url_scheme); |
| host->clear(); |
| *match_subdomains = true; |
| *port = 0; |
| path->clear(); |
| query->clear(); |
| return true; |
| } |
| |
| if (url_scheme == url::kFileScheme) { |
| base::FilePath file_path; |
| if (!net::FileURLToFilePath(GURL(filter), &file_path)) |
| return false; |
| |
| *scheme = url::kFileScheme; |
| host->clear(); |
| *match_subdomains = true; |
| *port = 0; |
| *path = file_path.AsUTF8Unsafe(); |
| #if defined(FILE_PATH_USES_WIN_SEPARATORS) |
| // Separators have to be canonicalized on Windows. |
| std::replace(path->begin(), path->end(), '\\', '/'); |
| *path = "/" + *path; |
| #endif |
| return true; |
| } |
| |
| // According to documentation host can't be empty. |
| if (!parsed.host.is_nonempty()) |
| return false; |
| |
| if (parsed.scheme.is_nonempty()) |
| scheme->assign(url_scheme); |
| else |
| scheme->clear(); |
| |
| host->assign(filter, parsed.host.begin, parsed.host.len); |
| *host = base::ToLowerASCII(*host); |
| // Special '*' host, matches all hosts. |
| if (*host == "*") { |
| host->clear(); |
| *match_subdomains = true; |
| } else if (host->at(0) == '.') { |
| // A leading dot in the pattern syntax means that we don't want to match |
| // subdomains. |
| host->erase(0, 1); |
| *match_subdomains = false; |
| } else { |
| url::RawCanonOutputT<char> output; |
| url::CanonHostInfo host_info; |
| url::CanonicalizeHostVerbose(filter.c_str(), parsed.host, &output, |
| &host_info); |
| if (host_info.family == url::CanonHostInfo::NEUTRAL) { |
| // We want to match subdomains. Add a dot in front to make sure we only |
| // match at domain component boundaries. |
| *host = "." + *host; |
| *match_subdomains = true; |
| } else { |
| *match_subdomains = false; |
| } |
| } |
| |
| if (parsed.port.is_nonempty()) { |
| int int_port; |
| if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len), |
| &int_port)) { |
| return false; |
| } |
| if (int_port <= 0 || int_port > std::numeric_limits<uint16_t>::max()) |
| return false; |
| *port = int_port; |
| } else { |
| // Match any port. |
| *port = 0; |
| } |
| |
| if (parsed.path.is_nonempty()) |
| path->assign(filter, parsed.path.begin, parsed.path.len); |
| else |
| path->clear(); |
| |
| if (parsed.query.is_nonempty()) |
| query->assign(filter, parsed.query.begin, parsed.query.len); |
| else |
| query->clear(); |
| |
| return true; |
| } |
| |
| // static |
| scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet( |
| URLMatcher* url_matcher, |
| int id, |
| const std::string& scheme, |
| const std::string& host, |
| bool match_subdomains, |
| uint16_t port, |
| const std::string& path, |
| const std::string& query, |
| bool allow) { |
| URLMatcherConditionFactory* condition_factory = |
| url_matcher->condition_factory(); |
| std::set<URLMatcherCondition> conditions; |
| conditions.insert(match_subdomains ? |
| condition_factory->CreateHostSuffixPathPrefixCondition(host, path) : |
| condition_factory->CreateHostEqualsPathPrefixCondition(host, path)); |
| |
| std::set<URLQueryElementMatcherCondition> query_conditions; |
| if (!query.empty()) { |
| ProcessQueryToConditions( |
| condition_factory, query, allow, &query_conditions); |
| } |
| |
| std::unique_ptr<URLMatcherSchemeFilter> scheme_filter; |
| if (!scheme.empty()) |
| scheme_filter.reset(new URLMatcherSchemeFilter(scheme)); |
| |
| std::unique_ptr<URLMatcherPortFilter> port_filter; |
| if (port != 0) { |
| std::vector<URLMatcherPortFilter::Range> ranges; |
| ranges.push_back(URLMatcherPortFilter::CreateRange(port)); |
| port_filter.reset(new URLMatcherPortFilter(ranges)); |
| } |
| |
| return base::MakeRefCounted<URLMatcherConditionSet>( |
| id, conditions, query_conditions, std::move(scheme_filter), |
| std::move(port_filter)); |
| } |
| |
| // static |
| bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs, |
| const FilterComponents& rhs) { |
| // The "*" wildcard is the lowest priority filter. |
| if (rhs.IsBlacklistWildcard()) |
| return true; |
| |
| if (lhs.match_subdomains && !rhs.match_subdomains) |
| return false; |
| if (!lhs.match_subdomains && rhs.match_subdomains) |
| return true; |
| |
| size_t host_length = lhs.host.length(); |
| size_t other_host_length = rhs.host.length(); |
| if (host_length != other_host_length) |
| return host_length > other_host_length; |
| |
| size_t path_length = lhs.path.length(); |
| size_t other_path_length = rhs.path.length(); |
| if (path_length != other_path_length) |
| return path_length > other_path_length; |
| |
| if (lhs.number_of_key_value_pairs != rhs.number_of_key_value_pairs) |
| return lhs.number_of_key_value_pairs > rhs.number_of_key_value_pairs; |
| |
| if (lhs.allow && !rhs.allow) |
| return true; |
| |
| return false; |
| } |
| |
| URLBlacklistManager::URLBlacklistManager(PrefService* pref_service) |
| : pref_service_(pref_service), |
| blacklist_(new URLBlacklist), |
| ui_weak_ptr_factory_(this) { |
| // This class assumes that it is created on the same thread that |
| // |pref_service_| lives on. |
| ui_task_runner_ = base::SequencedTaskRunnerHandle::Get(); |
| background_task_runner_ = base::CreateSequencedTaskRunnerWithTraits( |
| {base::TaskPriority::BEST_EFFORT}); |
| |
| pref_change_registrar_.Init(pref_service_); |
| base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate, |
| base::Unretained(this)); |
| pref_change_registrar_.Add(policy_prefs::kUrlBlacklist, callback); |
| pref_change_registrar_.Add(policy_prefs::kUrlWhitelist, callback); |
| |
| // Start enforcing the policies without a delay when they are present at |
| // startup. |
| if (pref_service_->HasPrefPath(policy_prefs::kUrlBlacklist) || |
| pref_service_->HasPrefPath(policy_prefs::kUrlWhitelist)) { |
| SetBlacklist( |
| BuildBlacklist(pref_service_->GetList(policy_prefs::kUrlBlacklist), |
| pref_service_->GetList(policy_prefs::kUrlWhitelist))); |
| } |
| } |
| |
| URLBlacklistManager::~URLBlacklistManager() { |
| DCHECK(ui_task_runner_->RunsTasksInCurrentSequence()); |
| pref_change_registrar_.RemoveAll(); |
| } |
| |
| void URLBlacklistManager::ScheduleUpdate() { |
| DCHECK(ui_task_runner_->RunsTasksInCurrentSequence()); |
| // Cancel pending updates, if any. This can happen if two preferences that |
| // change the blacklist are updated in one message loop cycle. In those cases, |
| // only rebuild the blacklist after all the preference updates are processed. |
| ui_weak_ptr_factory_.InvalidateWeakPtrs(); |
| ui_task_runner_->PostTask(FROM_HERE, |
| base::BindOnce(&URLBlacklistManager::Update, |
| ui_weak_ptr_factory_.GetWeakPtr())); |
| } |
| |
| void URLBlacklistManager::Update() { |
| DCHECK(ui_task_runner_->RunsTasksInCurrentSequence()); |
| |
| // The URLBlacklist is built in the background. Once it's ready, it is passed |
| // to the URLBlacklistManager back on ui_task_runner_. |
| base::PostTaskAndReplyWithResult( |
| background_task_runner_.get(), FROM_HERE, |
| base::BindOnce( |
| &BuildBlacklist, |
| base::Owned( |
| pref_service_->GetList(policy_prefs::kUrlBlacklist)->DeepCopy()), |
| base::Owned( |
| pref_service_->GetList(policy_prefs::kUrlWhitelist)->DeepCopy())), |
| base::BindOnce(&URLBlacklistManager::SetBlacklist, |
| ui_weak_ptr_factory_.GetWeakPtr())); |
| } |
| |
| void URLBlacklistManager::SetBlacklist( |
| std::unique_ptr<URLBlacklist> blacklist) { |
| DCHECK(ui_task_runner_->RunsTasksInCurrentSequence()); |
| blacklist_ = std::move(blacklist); |
| } |
| |
| bool URLBlacklistManager::IsURLBlocked(const GURL& url) const { |
| DCHECK(ui_task_runner_->RunsTasksInCurrentSequence()); |
| // Ignore blob scheme for two reasons: |
| // 1) PlzNavigate uses it to deliver the response to the renderer. |
| // 2) A whitelisted page can use blob URLs internally. |
| return !url.SchemeIs(url::kBlobScheme) && blacklist_->IsURLBlocked(url); |
| } |
| |
| URLBlacklist::URLBlacklistState URLBlacklistManager::GetURLBlacklistState( |
| const GURL& url) const { |
| DCHECK(ui_task_runner_->RunsTasksInCurrentSequence()); |
| return blacklist_->GetURLBlacklistState(url); |
| } |
| |
| // static |
| void URLBlacklistManager::RegisterProfilePrefs( |
| user_prefs::PrefRegistrySyncable* registry) { |
| registry->RegisterListPref(policy_prefs::kUrlBlacklist); |
| registry->RegisterListPref(policy_prefs::kUrlWhitelist); |
| registry->RegisterIntegerPref( |
| policy_prefs::kSafeSitesFilterBehavior, |
| static_cast<int>(SafeSitesFilterBehavior::kSafeSitesFilterDisabled)); |
| } |
| |
| } // namespace policy |