blob: 85946eddd0c63c0a31637bfda22ebca9f325042e [file] [log] [blame]
/*
* Copyright (C) 2013 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "core/loader/TextResourceDecoderBuilder.h"
#include "core/dom/DOMImplementation.h"
#include "core/dom/Document.h"
#include "core/frame/LocalFrame.h"
#include "core/frame/Settings.h"
#include "platform/weborigin/SecurityOrigin.h"
#include <memory>
namespace blink {
static inline bool canReferToParentFrameEncoding(const LocalFrame* frame, const LocalFrame* parentFrame)
{
return parentFrame && parentFrame->document()->getSecurityOrigin()->canAccess(frame->document()->getSecurityOrigin());
}
namespace {
struct LegacyEncoding {
const char* domain;
const char* encoding;
};
static const LegacyEncoding encodings[] = {
{ "au", "windows-1252" },
{ "az", "ISO-8859-9" },
{ "bd", "windows-1252" },
{ "bg", "windows-1251" },
{ "br", "windows-1252" },
{ "ca", "windows-1252" },
{ "ch", "windows-1252" },
{ "cn", "GBK" },
{ "cz", "windows-1250" },
{ "de", "windows-1252" },
{ "dk", "windows-1252" },
{ "ee", "windows-1256" },
{ "eg", "windows-1257" },
{ "et", "windows-1252" },
{ "fi", "windows-1252" },
{ "fr", "windows-1252" },
{ "gb", "windows-1252" },
{ "gr", "ISO-8859-7" },
{ "hk", "Big5" },
{ "hr", "windows-1250" },
{ "hu", "ISO-8859-2" },
{ "il", "windows-1255" },
{ "ir", "windows-1257" },
{ "is", "windows-1252" },
{ "it", "windows-1252" },
{ "jp", "Shift_JIS" },
{ "kr", "windows-949" },
{ "lt", "windows-1256" },
{ "lv", "windows-1256" },
{ "mk", "windows-1251" },
{ "nl", "windows-1252" },
{ "no", "windows-1252" },
{ "pl", "ISO-8859-2" },
{ "pt", "windows-1252" },
{ "ro", "ISO-8859-2" },
{ "rs", "windows-1251" },
{ "ru", "windows-1251" },
{ "se", "windows-1252" },
{ "si", "ISO-8859-2" },
{ "sk", "windows-1250" },
{ "th", "windows-874" },
{ "tr", "ISO-8859-9" },
{ "tw", "Big5" },
{ "tz", "windows-1252" },
{ "ua", "windows-1251" },
{ "us", "windows-1252" },
{ "vn", "windows-1258" },
{ "xa", "windows-1252" },
{ "xb", "windows-1257" }
};
static const WTF::TextEncoding getEncodingFromDomain(const KURL& url)
{
Vector<String> tokens;
url.host().split(".", tokens);
if (!tokens.isEmpty()) {
auto tld = tokens.last();
for (size_t i = 0; i < WTF_ARRAY_LENGTH(encodings); i++) {
if (tld == encodings[i].domain)
return WTF::TextEncoding(encodings[i].encoding);
}
}
return WTF::TextEncoding();
}
} // namespace
TextResourceDecoderBuilder::TextResourceDecoderBuilder(const AtomicString& mimeType, const AtomicString& encoding)
: m_mimeType(mimeType)
, m_encoding(encoding)
{
}
TextResourceDecoderBuilder::~TextResourceDecoderBuilder()
{
}
inline std::unique_ptr<TextResourceDecoder> TextResourceDecoderBuilder::createDecoderInstance(Document* document)
{
const WTF::TextEncoding encodingFromDomain = getEncodingFromDomain(document->url());
if (LocalFrame* frame = document->frame()) {
if (Settings* settings = frame->settings()) {
// Disable autodetection for XML to honor the default encoding (UTF-8) for unlabelled documents.
return TextResourceDecoder::create(m_mimeType, encodingFromDomain.isValid() ? encodingFromDomain : settings->defaultTextEncodingName(), !DOMImplementation::isXMLMIMEType(m_mimeType));
}
}
return TextResourceDecoder::create(m_mimeType, encodingFromDomain);
}
inline void TextResourceDecoderBuilder::setupEncoding(TextResourceDecoder* decoder, Document* document)
{
LocalFrame* frame = document->frame();
LocalFrame* parentFrame = 0;
if (frame && frame->tree().parent() && frame->tree().parent()->isLocalFrame())
parentFrame = toLocalFrame(frame->tree().parent());
if (!m_encoding.isEmpty())
decoder->setEncoding(m_encoding.getString(), TextResourceDecoder::EncodingFromHTTPHeader);
// Set the hint encoding to the parent frame encoding only if
// the parent and the current frames share the security origin.
// We impose this condition because somebody can make a child frameg63
// containing a carefully crafted html/javascript in one encoding
// that can be mistaken for hintEncoding (or related encoding) by
// an auto detector. When interpreted in the latter, it could be
// an attack vector.
// FIXME: This might be too cautious for non-7bit-encodings and
// we may consider relaxing this later after testing.
if (frame && canReferToParentFrameEncoding(frame, parentFrame)) {
if (parentFrame->document()->encodingWasDetectedHeuristically())
decoder->setHintEncoding(parentFrame->document()->encoding());
if (m_encoding.isEmpty())
decoder->setEncoding(parentFrame->document()->encoding(), TextResourceDecoder::EncodingFromParentFrame);
}
}
std::unique_ptr<TextResourceDecoder> TextResourceDecoderBuilder::buildFor(Document* document)
{
std::unique_ptr<TextResourceDecoder> decoder = createDecoderInstance(document);
setupEncoding(decoder.get(), document);
return decoder;
}
void TextResourceDecoderBuilder::clear()
{
m_encoding = nullAtom;
}
} // namespace blink