// cedar -- C++ implementation of Efficiently-updatable Double ARray trie
//  $Id: bench_static.cc 1915 2017-06-06 04:54:35Z ynaga $
// Copyright (c) 2013-2015 Naoki Yoshinaga <ynaga@tkl.iis.u-tokyo.ac.jp>
#ifdef __APPLE__
#include <mach/mach.h>
#endif
#include <unistd.h>
#include <fcntl.h>
#include <sys/time.h>
#include <cstdio>
#include <cstring>
#include <vector>
#include <string>
#include <fstream>
#ifdef USE_PREFIX_TRIE
#include <cedarpp.h>
#else
#include <cedar.h>
#endif
#if defined (USE_DARTS_CLONE)
#include <darts-clone-0.32g.h>
#elif defined (USE_DARTS_CLONE_OLD)
#include <darts-clone-0.32e5.h>
#else
#include <darts.h>
#endif
#include <tx/tx.hpp>
#include <ux/ux.hpp>
#include <marisa.h>

// static const
static const size_t BUFFER_SIZE = 1 << 16;
// typedef
#if   defined (USE_CEDAR_UNORDERED)
typedef cedar::da <int, -1, -2, false>  cedar_t;
#else
typedef cedar::da <int>                 cedar_t;
#endif
typedef Darts::DoubleArray              darts_t;
typedef tx_tool::tx                     tx_t;
typedef ux::Trie                        ux_t;
typedef marisa::Trie                    marisa_t;

size_t read_data (const char* file, char*& data) {
  int fd = ::open (file, O_RDONLY);
  if (fd < 0)
    { std::fprintf (stderr, "no such file: %s\n", file); std::exit (1); }
  size_t size = static_cast <size_t> (::lseek (fd, 0L, SEEK_END));
  data = new char[size];
  ::lseek (fd, 0L, SEEK_SET);
  ::read  (fd, data, size);
  ::close (fd);
  return size;
}

#ifdef USE_BINARY_DATA
#define KEY_SEP '\0'
inline char* find_sep (char* p) { while (*p != '\0') ++p; return p; }
#else
#define KEY_SEP '\n'
inline char* find_sep (char* p) { while (*p != '\n') ++p; *p = '\0'; return p; }
#endif

template <typename T>
inline T* create () { return new T (); }

template <typename T>
inline void destroy (T* t) { delete t; }

// darts/darts-clone
template <typename T>
void build (T* t, int fd, int& n, const char* index) {
  std::vector <const char*> key;
  std::vector <size_t>      len;
  std::vector <int>         val;
  char data[BUFFER_SIZE];
  char* start (data), *end (data), *tail (data + BUFFER_SIZE - 1), *tail_ (data);
  while ((tail_ = end + ::read (fd, end, tail - end)) != end) {
    for (*tail_ = KEY_SEP; (end = find_sep (end)) != tail_; start = ++end) {
      key.push_back (::strdup (start));
      len.push_back (end - start);
      val.push_back (++n);
    }
    std::memmove (data, start, tail_ - start);
    end = data + (tail_ - start); start = data;
  }
  t->build (key.size (), &key[0], &len[0], &val[0]);
  t->save (index);
  for (std::vector <const char*>::iterator it = key.begin ();
       it != key.end (); ++it)
    ::free (const_cast <char*> (*it));
}

// cedar
template <>
void build (cedar_t* t, int fd, int& n, const char* index) {
  char data[BUFFER_SIZE];
  char* start (data), *end (data), *tail (data + BUFFER_SIZE - 1), *tail_ (data);
  while ((tail_ = end + ::read (fd, end, tail - end)) != end) {
    for (*tail_ = KEY_SEP; (end = find_sep (end)) != tail_; start = ++end)
      t->update (start, end - start) = ++n;
    std::memmove (data, start, tail_ - start);
    end = data + (tail_ - start); start = data;
  }
  t->save (index);
}

// tx
template <>
void build (tx_t* t, int fd, int& n, const char* index) {
  std::vector <std::string> str;
  char data[BUFFER_SIZE];
  char* start (data), *end (data), *tail (data + BUFFER_SIZE - 1), *tail_ (data);
  while ((tail_ = end + ::read (fd, end, tail - end)) != end) {
    for (*tail_ = KEY_SEP; (end = find_sep (end)) != tail_; start = ++end)
      str.push_back (start), ++n;
    std::memmove (data, start, tail_ - start);
    end = data + (tail_ - start); start = data;
  }
  t->build (str, index);
}

// ux
template <>
void build (ux_t* t, int fd, int& n, const char* index) {
  std::vector <std::string> str;
  char data[BUFFER_SIZE];
  char* start (data), *end (data), *tail (data + BUFFER_SIZE - 1), *tail_ (data);
  while ((tail_ = end + ::read (fd, end, tail - end)) != end) {
    for (*tail_ = KEY_SEP; (end = find_sep (end)) != tail_; start = ++end)
      str.push_back (start), ++n;
    std::memmove (data, start, tail_ - start);
    end = data + (tail_ - start); start = data;
  }
  t->build (str);
  t->save (index);
}

// marisa
template <>
void build (marisa_t* t, int fd, int& n, const char* index) {
  marisa::Keyset keyset;
  char data[BUFFER_SIZE];
  char* start (data), *end (data), *tail (data + BUFFER_SIZE - 1), *tail_ (data);
  while ((tail_ = end + ::read (fd, end, tail - end)) != end) {
    for (*tail_ = KEY_SEP; (end = find_sep (end)) != tail_; start = ++end)
      keyset.push_back (start), ++n;
    std::memmove (data, start, tail_ - start);
    end = data + (tail_ - start); start = data;
  }
  t->build (keyset);
  t->save (index);
}

// cedar/darts/darts-clone
template <typename T>
inline bool lookup_key (const T& t, const char* key, size_t len)
{ return t.template exactMatchSearch <int> (key, len) >= 0; }

// tx
template <>
inline bool lookup_key (const tx_t& t, const char* key, size_t len)
{ size_t n = 0; return t.prefixSearch (key, len, n) != tx_tool::tx::NOTFOUND && n == len; }

// ux
template <>
inline bool lookup_key (const ux_t& t, const char* key, size_t len)
{ size_t n = 0; return t.prefixSearch (key, len, n) != ux::NOTFOUND && n == len; }

// marisa
template <>
inline bool lookup_key (const marisa_t& t, const char* key, size_t len)
{ static marisa::Agent agent; agent.set_query (key, len); return t.lookup (agent); }

// lookup
template <typename T>
void lookup (const T& t, char* data, size_t size, int& n_, int& n) {
  for (char* start (data), *end (data), *tail (data + size);
       end != tail; start = ++end) {
    end = find_sep (end);
    if (lookup_key (t, start, end - start))
      ++n_;
    ++n;
  }
}

// cedar/darts/darts-clone
template <typename T>
void read_trie (T* t, const char* index) { t->open (index); }

// tx
template <>
void read_trie (tx_t* t, const char* index) { t->read (index); }

// ux
template <>
void read_trie (ux_t* t, const char* index) { t->load (index); }

// marisa-trie
template <>
void read_trie (marisa_t* t, const char* index) { t->load (index); }

size_t get_size (const char* index) {
  int fd = ::open (index, O_RDONLY);
  if (fd < 0)
    { std::fprintf (stderr, "no such file: %s\n", index); std::exit (1); }
  size_t size = static_cast <size_t> (::lseek (fd, 0L, SEEK_END));
  ::close (fd);
  return size;
}

template <typename T>
void bench (const char* keys, const char* index, const char* queries, const char* label) {
  std::fprintf (stderr, "---- %-25s --------------------------\n", label);
  //
  T* t = create <T> ();
  struct timeval st, et;
  if (std::strcmp (keys, "-") != 0) {
    int fd = ::open (keys, O_RDONLY);
    if (fd < 0)
      { std::fprintf (stderr, "no such file: %s\n", keys); std::exit (1); }
    // build trie
    int n = 0;
    ::gettimeofday (&st, NULL);
    build (t, fd, n, index);
    ::gettimeofday (&et, NULL);
    double elapsed = (et.tv_sec - st.tv_sec) + (et.tv_usec - st.tv_usec) * 1e-6;
    std::fprintf (stderr, "%-20s %.2f sec (%.2f nsec per key)\n",
                  "Time to insert:", elapsed, elapsed * 1e9 / n);
    std::fprintf (stderr, "%-20s %d\n", "Words:", n);
    ::close (fd);
    // trie size
    size_t rss = get_size (index);
    std::fprintf (stderr, "%-20s %.2f MiB (%ld bytes)\n",
                  "Trie size:", rss / 1048576.0, rss);
  } else if (std::strcmp (queries, "-") != 0) {
    // load data
    char* data = 0;
    const size_t size = read_data (queries, data);
    // search
    read_trie (t, index);
    int n (0), n_ (0);
    ::gettimeofday (&st, NULL);
    lookup (*t, data, size, n_, n);
    ::gettimeofday (&et, NULL);
    double elapsed = (et.tv_sec - st.tv_sec) + (et.tv_usec - st.tv_usec) * 1e-6;
    std::fprintf (stderr, "%-20s %.2f sec (%.2f nsec per key)\n",
                  "Time to search:", elapsed, elapsed * 1e9 / n);
    std::fprintf (stderr, "%-20s %d\n", "Words:", n);
    std::fprintf (stderr, "%-20s %d\n", "Found:", n_);
    delete [] data;
  }
  destroy (t);
}

int main (int argc, char** argv) {
  if (argc < 4)
    { std::fprintf (stderr, "Usage: %s keys index queries\n", argv[0]); std::exit (1); }
  //
#ifdef USE_CEDAR
#if   defined (USE_PREFIX_TRIE)
  bench <cedar_t>   (argv[1], argv[2], argv[3], "cedar (prefix)");
#elif defined (USE_REDUCED_TRIE)
  bench <cedar_t>   (argv[1], argv[2], argv[3], "cedar (reduced)");
#else
  bench <cedar_t>   (argv[1], argv[2], argv[3], "cedar");
#endif
#endif
#ifdef USE_CEDAR_UNORDERED
#if   defined (USE_PREFIX_TRIE)
  bench <cedar_t>   (argv[1], argv[2], argv[3], "cedar unordered (prefix)");
#elif defined (USE_REDUCED_TRIE)
  bench <cedar_t>   (argv[1], argv[2], argv[3], "cedar unordered (reduced)");
#else
  bench <cedar_t>   (argv[1], argv[2], argv[3], "cedar unordered");
#endif
#endif
#ifdef USE_DARTS
  bench <darts_t>   (argv[1], argv[2], argv[3], "darts");
#endif
#ifdef USE_DARTS_CLONE
  bench <darts_t>  (argv[1], argv[2], argv[3], "darts-clone");
#endif
#ifdef USE_DARTS_CLONE_OLD
  bench <darts_t>  (argv[1], argv[2], argv[3], "darts-clone_old");
#endif
#ifdef USE_TX
  bench <tx_t>      (argv[1], argv[2], argv[3], "tx");
#endif
#ifdef USE_UX
  bench <ux_t>      (argv[1], argv[2], argv[3], "ux");
#endif
#ifdef USE_MARISA
  bench <marisa_t>  (argv[1], argv[2], argv[3], "marisa-trie");
#endif
}
/*
  g++ -DUSE_CEDAR -DHAVE_CONFIG_H -I. -I.. -I$HOME/local/include -O2 -g bench_static.cc -o bench_static_marisa -L$HOME/local/lib -ltx -lux -lmarisa -ltrie
  g++ -DUSE_CEDAR -DHAVE_CONFIG_H -DUSE_BINARY_DATA -I. -I.. -I$HOME/local/include -O2 -g bench_static.cc -o bench_static_marisa_bin -L$HOME/local/lib -ltx -lux -lmarisa -ltrie
*/
