blob: 44076d8e8cce29eb292cf6f721cc8be02a2a498e [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "ios/chrome/browser/reading_list/url_downloader.h"
#include <string>
#include <vector>
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/memory/ptr_util.h"
#include "base/path_service.h"
#include "base/stl_util.h"
#include "base/task/post_task.h"
#include "components/reading_list/core/offline_url_utils.h"
#include "ios/chrome/browser/chrome_paths.h"
#include "ios/chrome/browser/dom_distiller/distiller_viewer.h"
#include "ios/chrome/browser/reading_list/reading_list_distiller_page.h"
#include "ios/chrome/browser/reading_list/reading_list_distiller_page_factory.h"
#include "net/base/escape.h"
#include "net/base/load_flags.h"
#include "net/http/http_response_headers.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
#include "services/network/public/cpp/simple_url_loader.h"
#include "url/gurl.h"
namespace {
// This script disables context menu on img elements.
// The pages are stored locally and long pressing on them will trigger a context
// menu on the file:// URL which cannot be opened. Disable the context menu.
const char kDisableImageContextMenuScript[] =
"<script>"
"document.addEventListener('DOMContentLoaded', function (event) {"
" var imgMenuDisabler = document.createElement('style');"
" imgMenuDisabler.innerHTML = 'img { -webkit-touch-callout: none; }';"
" document.head.appendChild(imgMenuDisabler);"
"}, false);"
"</script>";
} // namespace
// URLDownloader
URLDownloader::URLDownloader(
dom_distiller::DistillerFactory* distiller_factory,
reading_list::ReadingListDistillerPageFactory* distiller_page_factory,
PrefService* prefs,
base::FilePath chrome_profile_path,
scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
const DownloadCompletion& download_completion,
const SuccessCompletion& delete_completion)
: distiller_page_factory_(distiller_page_factory),
distiller_factory_(distiller_factory),
pref_service_(prefs),
download_completion_(download_completion),
delete_completion_(delete_completion),
working_(false),
base_directory_(chrome_profile_path),
mime_type_(),
url_loader_factory_(std::move(url_loader_factory)),
task_runner_(base::CreateSequencedTaskRunnerWithTraits(
{base::MayBlock(), base::TaskPriority::BEST_EFFORT,
base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN})),
task_tracker_() {}
URLDownloader::~URLDownloader() {
task_tracker_.TryCancelAll();
}
void URLDownloader::OfflinePathExists(const base::FilePath& path,
base::Callback<void(bool)> callback) {
task_tracker_.PostTaskAndReplyWithResult(task_runner_.get(), FROM_HERE,
base::Bind(&base::PathExists, path),
callback);
}
void URLDownloader::RemoveOfflineURL(const GURL& url) {
// Remove all download tasks for this url as it would be pointless work.
CancelDownloadOfflineURL(url);
tasks_.push_back(std::make_pair(DELETE, url));
HandleNextTask();
}
void URLDownloader::DownloadOfflineURL(const GURL& url) {
if (!base::ContainsValue(tasks_, std::make_pair(DOWNLOAD, url))) {
tasks_.push_back(std::make_pair(DOWNLOAD, url));
HandleNextTask();
}
}
void URLDownloader::CancelDownloadOfflineURL(const GURL& url) {
tasks_.erase(
std::remove(tasks_.begin(), tasks_.end(), std::make_pair(DOWNLOAD, url)),
tasks_.end());
}
void URLDownloader::DownloadCompletionHandler(
const GURL& url,
const std::string& title,
const base::FilePath& offline_path,
SuccessState success) {
DCHECK(working_);
auto post_delete = base::Bind(
[](URLDownloader* _this, const GURL& url, const std::string& title,
const base::FilePath& offline_path, SuccessState success) {
_this->download_completion_.Run(url, _this->distilled_url_, success,
offline_path, _this->saved_size_,
title);
_this->distiller_.reset();
_this->working_ = false;
_this->HandleNextTask();
},
base::Unretained(this), url, title, offline_path, success);
// If downloading failed, clean up any partial download.
if (success == ERROR) {
base::FilePath directory_path =
reading_list::OfflineURLDirectoryAbsolutePath(base_directory_, url);
task_tracker_.PostTaskAndReply(
task_runner_.get(), FROM_HERE,
base::Bind(
[](const base::FilePath& offline_directory_path) {
base::DeleteFile(offline_directory_path, true);
},
directory_path),
post_delete);
} else {
post_delete.Run();
}
}
void URLDownloader::DeleteCompletionHandler(const GURL& url, bool success) {
DCHECK(working_);
delete_completion_.Run(url, success);
working_ = false;
HandleNextTask();
}
void URLDownloader::HandleNextTask() {
if (working_ || tasks_.empty()) {
return;
}
working_ = true;
Task task = tasks_.front();
tasks_.pop_front();
GURL url = task.second;
base::FilePath directory_path =
reading_list::OfflineURLDirectoryAbsolutePath(base_directory_, url);
if (task.first == DELETE) {
task_tracker_.PostTaskAndReplyWithResult(
task_runner_.get(), FROM_HERE,
base::Bind(&base::DeleteFile, directory_path, true),
base::Bind(&URLDownloader::DeleteCompletionHandler,
base::Unretained(this), url));
} else if (task.first == DOWNLOAD) {
DCHECK(!distiller_);
OfflinePathExists(directory_path, base::Bind(&URLDownloader::DownloadURL,
base::Unretained(this), url));
}
}
void URLDownloader::DownloadURL(const GURL& url, bool offline_url_exists) {
if (offline_url_exists) {
DownloadCompletionHandler(url, std::string(), base::FilePath(),
DOWNLOAD_EXISTS);
return;
}
original_url_ = url;
distilled_url_ = url;
saved_size_ = 0;
std::unique_ptr<reading_list::ReadingListDistillerPage>
reading_list_distiller_page =
distiller_page_factory_->CreateReadingListDistillerPage(url, this);
distiller_.reset(new dom_distiller::DistillerViewer(
distiller_factory_, std::move(reading_list_distiller_page), pref_service_,
url,
base::Bind(&URLDownloader::DistillerCallback, base::Unretained(this))));
}
void URLDownloader::DistilledPageRedirectedToURL(const GURL& page_url,
const GURL& redirected_url) {
DCHECK(original_url_ == page_url);
distilled_url_ = redirected_url;
}
void URLDownloader::DistilledPageHasMimeType(const GURL& original_url,
const std::string& mime_type) {
DCHECK(original_url_ == original_url);
mime_type_ = mime_type;
}
void URLDownloader::OnURLLoadComplete(const GURL& original_url,
base::FilePath response_path) {
// At the moment, only pdf files are downloaded using URLFetcher.
DCHECK(mime_type_ == "application/pdf");
base::FilePath path = reading_list::OfflinePagePath(
original_url_, reading_list::OFFLINE_TYPE_PDF);
std::string mime_type;
if (url_loader_->ResponseInfo()) {
mime_type = url_loader_->ResponseInfo()->mime_type;
}
if (response_path.empty() || mime_type != mime_type_) {
return DownloadCompletionHandler(original_url_, "", path, ERROR);
}
task_tracker_.PostTaskAndReplyWithResult(
task_runner_.get(), FROM_HERE,
base::Bind(&URLDownloader::SavePDFFile, base::Unretained(this),
response_path),
base::Bind(&URLDownloader::DownloadCompletionHandler,
base::Unretained(this), original_url, "", path));
url_loader_.reset();
}
void URLDownloader::CancelTask() {
task_tracker_.TryCancelAll();
distiller_.reset();
}
void URLDownloader::FetchPDFFile() {
const GURL& pdf_url =
distilled_url_.is_valid() ? distilled_url_ : original_url_;
auto resource_request = std::make_unique<network::ResourceRequest>();
resource_request->url = pdf_url;
resource_request->load_flags = net::LOAD_SKIP_CACHE_VALIDATION;
url_loader_ = network::SimpleURLLoader::Create(std::move(resource_request),
NO_TRAFFIC_ANNOTATION_YET);
url_loader_->DownloadToTempFile(
url_loader_factory_.get(),
base::BindOnce(&URLDownloader::OnURLLoadComplete, base::Unretained(this),
pdf_url));
}
URLDownloader::SuccessState URLDownloader::SavePDFFile(
const base::FilePath& temporary_path) {
if (CreateOfflineURLDirectory(original_url_)) {
base::FilePath path = reading_list::OfflinePagePath(
original_url_, reading_list::OFFLINE_TYPE_PDF);
base::FilePath absolute_path =
reading_list::OfflineURLAbsolutePathFromRelativePath(base_directory_,
path);
if (base::Move(temporary_path, absolute_path)) {
int64_t pdf_file_size;
base::GetFileSize(absolute_path, &pdf_file_size);
saved_size_ += pdf_file_size;
return DOWNLOAD_SUCCESS;
} else {
return ERROR;
}
}
return ERROR;
}
void URLDownloader::DistillerCallback(
const GURL& page_url,
const std::string& html,
const std::vector<dom_distiller::DistillerViewerInterface::ImageInfo>&
images,
const std::string& title) {
if (html.empty()) {
// The page may not be HTML. Check the mime-type to see if another handler
// can save offline content.
if (mime_type_ == "application/pdf") {
// PDF handler just downloads the PDF file.
FetchPDFFile();
return;
}
// This content cannot be processed, return an error value to the client.
DownloadCompletionHandler(page_url, std::string(), base::FilePath(), ERROR);
return;
}
std::vector<dom_distiller::DistillerViewer::ImageInfo> images_block = images;
std::string block_html = html;
task_tracker_.PostTaskAndReplyWithResult(
task_runner_.get(), FROM_HERE,
base::Bind(&URLDownloader::SaveDistilledHTML, base::Unretained(this),
page_url, images_block, block_html),
base::Bind(&URLDownloader::DownloadCompletionHandler,
base::Unretained(this), page_url, title,
reading_list::OfflinePagePath(
page_url, reading_list::OFFLINE_TYPE_HTML)));
}
URLDownloader::SuccessState URLDownloader::SaveDistilledHTML(
const GURL& url,
const std::vector<dom_distiller::DistillerViewerInterface::ImageInfo>&
images,
const std::string& html) {
if (CreateOfflineURLDirectory(url)) {
return SaveHTMLForURL(SaveAndReplaceImagesInHTML(url, html, images), url)
? DOWNLOAD_SUCCESS
: ERROR;
}
return ERROR;
}
bool URLDownloader::CreateOfflineURLDirectory(const GURL& url) {
base::FilePath directory_path =
reading_list::OfflineURLDirectoryAbsolutePath(base_directory_, url);
if (!DirectoryExists(directory_path)) {
return CreateDirectoryAndGetError(directory_path, nil);
}
return true;
}
bool URLDownloader::SaveImage(const GURL& url,
const GURL& image_url,
const std::string& data,
std::string* image_name) {
std::string image_hash = base::MD5String(image_url.spec());
*image_name = image_hash;
base::FilePath directory_path =
reading_list::OfflineURLDirectoryAbsolutePath(base_directory_, url);
base::FilePath path = directory_path.Append(image_hash);
if (!base::PathExists(path)) {
int written = base::WriteFile(path, data.c_str(), data.length());
if (written <= 0) {
return false;
}
saved_size_ += written;
return true;
}
return true;
}
std::string URLDownloader::SaveAndReplaceImagesInHTML(
const GURL& url,
const std::string& html,
const std::vector<dom_distiller::DistillerViewerInterface::ImageInfo>&
images) {
std::string mutable_html = html;
bool local_images_found = false;
for (size_t i = 0; i < images.size(); i++) {
if (images[i].url.SchemeIs(url::kDataScheme)) {
// Data URI, the data part of the image is empty, no need to store it.
continue;
}
std::string local_image_name;
// Mixed content is HTTP images on HTTPS pages.
bool image_is_mixed_content = distilled_url_.SchemeIsCryptographic() &&
!images[i].url.SchemeIsCryptographic();
// Only save images if it is not mixed content and image data is valid.
if (!image_is_mixed_content && images[i].url.is_valid() &&
!images[i].data.empty()) {
if (!SaveImage(url, images[i].url, images[i].data, &local_image_name)) {
return std::string();
}
}
std::string image_url = net::EscapeForHTML(images[i].url.spec());
size_t image_url_size = image_url.size();
size_t pos = mutable_html.find(image_url, 0);
while (pos != std::string::npos) {
local_images_found = true;
mutable_html.replace(pos, image_url_size, local_image_name);
pos = mutable_html.find(image_url, pos + local_image_name.size());
}
}
if (local_images_found) {
mutable_html += kDisableImageContextMenuScript;
}
return mutable_html;
}
bool URLDownloader::SaveHTMLForURL(std::string html, const GURL& url) {
if (html.empty()) {
return false;
}
base::FilePath path = reading_list::OfflineURLAbsolutePathFromRelativePath(
base_directory_,
reading_list::OfflinePagePath(url, reading_list::OFFLINE_TYPE_HTML));
int written = base::WriteFile(path, html.c_str(), html.length());
if (written <= 0) {
return false;
}
saved_size_ += written;
return true;
}