// Copyright (c) 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <algorithm>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>

#include "base/base_paths.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/i18n/icu_util.h"
#include "base/path_service.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "third_party/icu/source/common/unicode/unistr.h"
#include "third_party/icu/source/common/unicode/utypes.h"
#include "third_party/icu/source/i18n/unicode/uspoof.h"

std::string GetSkeleton(const std::string& domain,
                        const USpoofChecker* spoof_checker) {
  UErrorCode status = U_ZERO_ERROR;
  icu::UnicodeString ustr_skeleton;
  uspoof_getSkeletonUnicodeString(spoof_checker, 0 /* not used */,
                                  icu::UnicodeString::fromUTF8(domain),
                                  ustr_skeleton, &status);
  std::string skeleton;
  return U_SUCCESS(status) ? ustr_skeleton.toUTF8String(skeleton) : skeleton;
}

base::FilePath GetPath(base::StringPiece basename) {
  base::FilePath path;
  base::PathService::Get(base::DIR_SOURCE_ROOT, &path);
  return path.Append(FILE_PATH_LITERAL("components"))
      .Append(FILE_PATH_LITERAL("url_formatter"))
      .Append(FILE_PATH_LITERAL("top_domains"))
      .AppendASCII(basename);
}

bool WriteToFile(const std::string& content, base::StringPiece basename) {
  base::FilePath path = GetPath(basename);
  size_t size = content.size();
  bool succeeded =
      static_cast<size_t>(base::WriteFile(path, content.data(), size)) == size;
  if (!succeeded)
    std::cerr << "Failed to write to " << path.AsUTF8Unsafe() << '\n';
  return succeeded;
}

int main(int argc, const char** argv) {
  if (argc != 1) {
    std::cerr << "Generates the list of top domain skeletons to use as input to"
                 "\nbase/dafsa/make_dafsa.py.\nUsage: "
              << argv[0] << '\n';
    return 1;
  }

  base::i18n::InitializeICU();
  base::FilePath input_file = GetPath("alexa_domains.list");
  std::string input_content;
  if (!base::ReadFileToString(input_file, &input_content)) {
    std::cerr << "Failed to read the input file " << input_file.AsUTF8Unsafe()
              << '\n';
    return 1;
  }

  UErrorCode status = U_ZERO_ERROR;
  USpoofChecker* spoof_checker = uspoof_open(&status);
  if (U_FAILURE(status)) {
    std::cerr << "Failed to create an ICU uspoof_checker due to "
              << u_errorName(status) << ".\n";
    return 1;
  }

  std::stringstream input(input_content);
  std::string output =
      R"(// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// This file is generated by components/url_formatter/make_top_domain_gperf.cc
// DO NOT MANUALLY EDIT!

// Each entry is the skeleton of a top domain for the confusability check
// in components/url_formatter/url_formatter.cc.
%%
)";

  std::string domain;
  size_t max_labels = 0;
  std::string domain_with_max_labels;
  while (std::getline(input, domain)) {
    if (domain[0] == '#')
      continue;
    std::string skeleton = GetSkeleton(domain, spoof_checker);
    if (skeleton.empty()) {
      std::cerr << "Failed to generate the skeleton of " << domain << '\n';
      output += "// " + domain + '\n';
    } else {
      output += skeleton + ", 1\n";
    }
    std::vector<base::StringPiece> labels = base::SplitStringPiece(
        domain, ".", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
    if (labels.size() > max_labels) {
      domain_with_max_labels = domain;
      max_labels = labels.size();
    }
  }

  output += "%%\n";

  if (!WriteToFile(output, "alexa_skeletons.gperf"))
    return 1;

  std::cout << "The first domain with the largest number of labels is "
            << domain_with_max_labels << " and has " << max_labels
            << " labels.\n";

  return 0;
}
