/*
 * Copyright (C) 2013 Google Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *     * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above
 * copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the
 * distribution.
 *     * Neither the name of Google Inc. nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "core/loader/TextResourceDecoderBuilder.h"

#include "core/dom/DOMImplementation.h"
#include "core/dom/Document.h"
#include "core/frame/LocalFrame.h"
#include "core/frame/Settings.h"
#include "platform/weborigin/SecurityOrigin.h"
#include <memory>

namespace blink {

static inline bool canReferToParentFrameEncoding(
    const LocalFrame* frame,
    const LocalFrame* parentFrame) {
  return parentFrame &&
         parentFrame->document()->getSecurityOrigin()->canAccess(
             frame->document()->getSecurityOrigin());
}

namespace {

struct LegacyEncoding {
  const char* domain;
  const char* encoding;
};

static const LegacyEncoding encodings[] = {
    {"au", "windows-1252"}, {"az", "ISO-8859-9"},   {"bd", "windows-1252"},
    {"bg", "windows-1251"}, {"br", "windows-1252"}, {"ca", "windows-1252"},
    {"ch", "windows-1252"}, {"cn", "GBK"},          {"cz", "windows-1250"},
    {"de", "windows-1252"}, {"dk", "windows-1252"}, {"ee", "windows-1256"},
    {"eg", "windows-1257"}, {"et", "windows-1252"}, {"fi", "windows-1252"},
    {"fr", "windows-1252"}, {"gb", "windows-1252"}, {"gr", "ISO-8859-7"},
    {"hk", "Big5"},         {"hr", "windows-1250"}, {"hu", "ISO-8859-2"},
    {"il", "windows-1255"}, {"ir", "windows-1257"}, {"is", "windows-1252"},
    {"it", "windows-1252"}, {"jp", "Shift_JIS"},    {"kr", "windows-949"},
    {"lt", "windows-1256"}, {"lv", "windows-1256"}, {"mk", "windows-1251"},
    {"nl", "windows-1252"}, {"no", "windows-1252"}, {"pl", "ISO-8859-2"},
    {"pt", "windows-1252"}, {"ro", "ISO-8859-2"},   {"rs", "windows-1251"},
    {"ru", "windows-1251"}, {"se", "windows-1252"}, {"si", "ISO-8859-2"},
    {"sk", "windows-1250"}, {"th", "windows-874"},  {"tr", "ISO-8859-9"},
    {"tw", "Big5"},         {"tz", "windows-1252"}, {"ua", "windows-1251"},
    {"us", "windows-1252"}, {"vn", "windows-1258"}, {"xa", "windows-1252"},
    {"xb", "windows-1257"}};

static const WTF::TextEncoding getEncodingFromDomain(const KURL& url) {
  Vector<String> tokens;
  url.host().split(".", tokens);
  if (!tokens.isEmpty()) {
    auto tld = tokens.last();
    for (size_t i = 0; i < WTF_ARRAY_LENGTH(encodings); i++) {
      if (tld == encodings[i].domain)
        return WTF::TextEncoding(encodings[i].encoding);
    }
  }
  return WTF::TextEncoding();
}

}  // namespace

TextResourceDecoderBuilder::TextResourceDecoderBuilder(
    const AtomicString& mimeType,
    const AtomicString& encoding)
    : m_mimeType(mimeType), m_encoding(encoding) {}

TextResourceDecoderBuilder::~TextResourceDecoderBuilder() {}

inline std::unique_ptr<TextResourceDecoder>
TextResourceDecoderBuilder::createDecoderInstance(Document* document) {
  const WTF::TextEncoding encodingFromDomain =
      getEncodingFromDomain(document->url());
  if (LocalFrame* frame = document->frame()) {
    if (Settings* settings = frame->settings()) {
      // Disable autodetection for XML to honor the default encoding (UTF-8) for
      // unlabelled documents.
      return TextResourceDecoder::create(
          m_mimeType,
          encodingFromDomain.isValid() ? encodingFromDomain
                                       : settings->defaultTextEncodingName(),
          !DOMImplementation::isXMLMIMEType(m_mimeType));
    }
  }

  return TextResourceDecoder::create(m_mimeType, encodingFromDomain);
}

inline void TextResourceDecoderBuilder::setupEncoding(
    TextResourceDecoder* decoder,
    Document* document) {
  LocalFrame* frame = document->frame();
  LocalFrame* parentFrame = 0;
  if (frame && frame->tree().parent() && frame->tree().parent()->isLocalFrame())
    parentFrame = toLocalFrame(frame->tree().parent());

  if (!m_encoding.isEmpty())
    decoder->setEncoding(m_encoding.getString(),
                         TextResourceDecoder::EncodingFromHTTPHeader);

  // Set the hint encoding to the parent frame encoding only if the parent and
  // the current frames share the security origin. We impose this condition
  // because somebody can make a child frameg63 containing a carefully crafted
  // html/javascript in one encoding that can be mistaken for hintEncoding (or
  // related encoding) by an auto detector. When interpreted in the latter, it
  // could be an attack vector.
  // FIXME: This might be too cautious for non-7bit-encodings and we may
  // consider relaxing this later after testing.
  if (frame && canReferToParentFrameEncoding(frame, parentFrame)) {
    if (parentFrame->document()->encodingWasDetectedHeuristically())
      decoder->setHintEncoding(parentFrame->document()->encoding());

    if (m_encoding.isEmpty())
      decoder->setEncoding(parentFrame->document()->encoding(),
                           TextResourceDecoder::EncodingFromParentFrame);
  }
}

std::unique_ptr<TextResourceDecoder> TextResourceDecoderBuilder::buildFor(
    Document* document) {
  std::unique_ptr<TextResourceDecoder> decoder =
      createDecoderInstance(document);
  setupEncoding(decoder.get(), document);
  return decoder;
}

void TextResourceDecoderBuilder::clear() {
  m_encoding = nullAtom;
}

}  // namespace blink
