// cedar -- C++ implementation of Efficiently-updatable Double ARray trie
//  $Id: bench.cc 1918 2018-05-16 08:56:47Z ynaga $
// Copyright (c) 2013-2015 Naoki Yoshinaga <ynaga@tkl.iis.u-tokyo.ac.jp>
#include <unistd.h>
#include <fcntl.h>
#include <sys/time.h>
#include <cstdio>
#include <cstring>
#include <cstddef> // for ternary search tree
#include <vector>
#include <string>
#include <numeric>
// C++ containers
#include <unordered_map>
// cedar variants
#ifdef USE_PREFIX_TRIE
#include <cedarpp.h>
#else
#include <cedar.h>
#endif
// Google's containers and variants
#include <sparsepp/spp.h>
#include <google/dense_hash_map>
// Cache-concious containers
#ifdef USE_JUDY
#include <Judy.h>
#endif
#include <city.h>
#ifdef USE_HAT
#include <htrie_map.h>
#endif
#ifdef USE_AHASH
#include <array_map.h>
#endif
#ifdef USE_HHASH
#include <hopscotch_map.h>
#endif
#if defined (USE_POPLARPP) || defined (USE_POPLARCG)
#include <poplar.hpp>
#endif

#ifdef USE_TUNED
#define HASH_SUFFIX "t"
#else
#define HASH_SUFFIX ""

#endif

// taken from https://gist.github.com/Tessil/72e11891fc155f5b2eb53de22cbc4053
struct str_hash {
    std::size_t operator () (const char* key, std::size_t key_size) const
    { return CityHash64 (key, key_size); }
    std::size_t operator () (const std::string& key) const
    { return CityHash64 (key.c_str (), key.size ()); }
};

// check const char* equality when size is unavailable
struct str_equal {
    bool operator()(const char* p, const char* q) const
    {
      while (*p || *q)
        if (*p != *q) return false;
      return true;
    }
};

// static const
static const size_t BUFFER_SIZE = 1 << 16;
// typedef
#if   defined (USE_CEDAR_UNORDERED)
typedef cedar::da <int, -1, -2, false> cedar_t;
#else
typedef cedar::da <int> cedar_t;
#endif

#ifdef USE_JUDY
typedef Pvoid_t judy_t;
#endif

#ifdef USE_POPLARPP
typedef poplar::MapPP <int> poplar_t;
#endif
#ifdef USE_POPLARCG
typedef poplar::MapCG <int> poplar_t;
#endif

#ifdef USE_TUNED
typedef std::unordered_map <const char*, int, str_hash, str_equal> hash_t;
typedef google::dense_hash_map <const char*, int, str_hash, str_equal> gdhash_t;
typedef spp::sparse_hash_map <const char*, int, str_hash, str_equal> shash_t;
#ifdef USE_AHASH
typedef tsl::array_map <char, int, str_hash> ahash_t;
#endif
#ifdef USE_HAT
typedef tsl::htrie_map <char, int, str_hash> hat_t;
#endif
#ifdef USE_HHASH
typedef tsl::hopscotch_map <const char*, int, str_hash, str_equal> hhash_t;
#endif
#else
typedef std::unordered_map <std::string, int> hash_t;
typedef google::dense_hash_map <std::string, int> gdhash_t;
typedef spp::sparse_hash_map <std::string, int> shash_t;
#ifdef USE_AHASH
typedef tsl::array_map <char, int> ahash_t;
#endif
#ifdef USE_HAT
typedef tsl::htrie_map <char, int> hat_t;
#endif
#ifdef USE_HHASH
typedef tsl::hopscotch_map <std::string, int> hhash_t;
#endif
#endif


size_t read_data (const char* file, char*& data) {
  int fd = ::open (file, O_RDONLY);
  if (fd < 0)
    { std::fprintf (stderr, "no such file: %s\n", file); std::exit (1); }
  size_t size = static_cast <size_t> (::lseek (fd, 0L, SEEK_END));
  data = new char[size];
  ::lseek (fd, 0L, SEEK_SET);
  ::read  (fd, data, size);
  ::close (fd);
  return size;
}

#ifdef USE_BINARY_DATA
#define KEY_SEP '\0'
inline char* find_sep (char* p) { while (*p != '\0') ++p; return p; }
#else
#define KEY_SEP '\n'
inline char* find_sep (char* p) { while (*p != '\n') ++p; *p = '\0'; return p; }
#endif

template <typename T>
inline T* create () { return new T (); }

template <>
inline gdhash_t* create () { gdhash_t* p = new gdhash_t; p->set_empty_key (""); return p; }

template <typename T>
inline void destroy (T* t) { delete t; }

#ifdef USE_TUNED
#ifdef USE_HHASH
template <typename>
inline void destroy (hhash_t* t) {
  for (hhash_t::const_iterator it = t->begin (); it != t->end (); ++it)
    delete [] it->first;
  delete t;
}
#endif

#ifdef USE_GDHASH
template <>
inline void destroy (gdhash_t* t) {
  for (gdhash_t::const_iterator it = t->begin (); it != t->end (); ++it)
    delete [] it->first;
  delete t;
}
#endif

#ifdef USE_SHASH
template <>
inline void destroy (shash_t* t) {
  for (shash_t::const_iterator it = t->begin (); it != t->end (); ++it)
    delete [] it->first;
  delete t;
}
#endif

#ifdef USE_HASH
template <>
inline void destroy (hash_t* t) {
  for (hash_t::const_iterator it = t->begin (); it != t->end (); ++it)
    delete [] it->first;
  delete t;
}
#endif
#endif

#ifdef USE_JUDY
template <>
inline void destroy (judy_t* t) { Word_t bytes = 0; JSLFA (bytes, *t); delete t; }
#endif

#ifdef USE_TUNED
template <typename T> //  hash
inline void insert_key (T* t, const char* key, size_t len, int n)
{ t->insert ({::strndup (key, len), n}); }
template <typename T>  // hash
inline bool lookup_key (const T& t, const char* key, size_t len)
{ return t.find (key) != t.end (); }
#else
template <typename T> //  hash
inline void insert_key (T* t, const char* key, size_t len, int n)
{ t->insert ({{key, len}, n}); }
template <typename T>  // hash
inline bool lookup_key (const T& t, const char* key, size_t len)
{ return t.find ({key, len}) != t.end (); }
#endif

#if defined (USE_POPLARPP) || defined (USE_POPLARCG)
template <>
inline void insert_key (poplar_t* t, const char* key, size_t len, int n)
{ *t->update (key, len) = n; }
template <>
inline bool lookup_key (const poplar_t& t, const char* key, size_t len)
{ return t.find (key, len) != nullptr; }
#endif

// hat-trie
#ifdef USE_HAT
template <>
inline void insert_key (hat_t* t, const char* key, const size_t len, int n)
{ t->insert_ks (key, len, n); }
template <>
inline bool lookup_key (const hat_t& t, const char* key, const size_t len)
{ return t.find_ks (key, len) != t.end (); }
#endif

// array-hash
#ifdef USE_AHASH
template <>
inline void insert_key (ahash_t* t, const char* key, const size_t len, int n)
{ t->insert_ks (key, len, n); }
template <>
inline bool lookup_key (const ahash_t& t, const char* key, const size_t len)
{ return t.find_ks (key, len) != t.end (); }
#endif

// cedar
template <>
inline void insert_key (cedar_t* t, const char* key, const size_t len, int n)
{ t->update (key, len) = n; }
template <>
inline bool lookup_key (const cedar_t& t, const char* key, const size_t len)
{ return t.exactMatchSearch <int> (key, len) >= 0; }

// judy array
#ifdef USE_JUDY
template <>
inline void insert_key (judy_t* t, const char* key, const size_t len, int n)
{ Word_t* PValue = 0; JSLI (PValue, *t, key); *PValue = n; }
template <>
inline bool lookup_key (const judy_t& t, const char* key, size_t len)
{ PWord_t PValue = 0; JSLG (PValue, t, key); return PValue; }
#endif

template <typename T>
void insert (T* t, int fd, int& n) {
  char data[BUFFER_SIZE];
  char* start (data), *end (data), *tail (data + BUFFER_SIZE - 1), *tail_ (data);
  while ((tail_ = end + ::read (fd, end, tail - end)) != end) {
    for (*tail_ = KEY_SEP; (end = find_sep (end)) != tail_; start = ++end)
      insert_key (t, start, end - start, ++n);
    std::memmove (data, start, tail_ - start);
    end = data + (tail_ - start); start = data;
  }
}

// lookup
template <typename T>
void lookup (const T& t, char* data, size_t size, int& n_, int& n) {
  for (char* start (data), *end (data), *tail (data + size);
       end != tail; start = ++end) {
    end = find_sep (end);
    if (lookup_key (t, start, end - start))
      ++n_;
    ++n;
  }
}

template <typename T>
void bench (const char* keys, const char* queries, const char* label) {
  std::fprintf (stderr, "---- %-25s --------------------------\n", label);
  //
  T* t = create <T> ();
  struct timeval st, et;
  {
    int fd = ::open (keys, O_RDONLY);
    if (fd < 0)
      { std::fprintf (stderr, "no such file: %s\n", keys); std::exit (1); }
    // build trie
    int n = 0;
    ::gettimeofday (&st, NULL);
    insert (t, fd, n);
    ::gettimeofday (&et, NULL);
    double elapsed = (et.tv_sec - st.tv_sec) + (et.tv_usec - st.tv_usec) * 1e-6;
    std::fprintf (stderr, "%-20s %.2f sec (%.2f nsec per key)\n",
                  "Time to insert:", elapsed, elapsed * 1e9 / n);
    std::fprintf (stderr, "%-20s %d\n", "Words:", n);
    ::close (fd);
  }
  if (std::strcmp (queries, "-") != 0) {
    // load data
    char* data = 0;
    const size_t size = read_data (queries, data);
    // search
    int n (0), n_ (0);
    ::gettimeofday (&st, NULL);
    lookup (*t, data, size, n_, n);
    ::gettimeofday (&et, NULL);
    double elapsed = (et.tv_sec - st.tv_sec) + (et.tv_usec - st.tv_usec) * 1e-6;
    std::fprintf (stderr, "%-20s %.2f sec (%.2f nsec per key)\n",
                  "Time to search:", elapsed, elapsed * 1e9 / n);
    std::fprintf (stderr, "%-20s %d\n", "Words:", n);
    std::fprintf (stderr, "%-20s %d\n", "Found:", n_);
    delete [] data;
  }
  destroy (t);
}

int main (int argc, char** argv) {
  if (argc < 3)
    { std::fprintf (stderr, "Usage: %s keys queries\n", argv[0]); std::exit (1); }
  //
#ifdef USE_CEDAR
#if   defined (USE_PREFIX_TRIE)
  bench <cedar_t>   (argv[1], argv[2], "cedar (prefix)");
#elif defined (USE_REDUCED_TRIE)
  bench <cedar_t>   (argv[1], argv[2], "cedar (reduced)");
#else
  bench <cedar_t>   (argv[1], argv[2], "cedar");
#endif
#endif
#ifdef USE_CEDAR_UNORDERED
#if   defined (USE_PREFIX_TRIE)
  bench <cedar_t>   (argv[1], argv[2], "cedar unordered (prefix)");
#elif defined (USE_REDUCED_TRIE)
  bench <cedar_t>   (argv[1], argv[2], "cedar unordered (reduced)");
#else
  bench <cedar_t>   (argv[1], argv[2], "cedar unordered");
#endif
#endif
#ifdef USE_JUDY
  bench <judy_t>    (argv[1], argv[2], "judy");
#endif
#ifdef USE_HAT
  bench <hat_t>     (argv[1], argv[2], "hat" HASH_SUFFIX);
#endif
#ifdef USE_AHASH
  bench <ahash_t>   (argv[1], argv[2], "ahash" HASH_SUFFIX);
#endif
#ifdef USE_HHASH
  bench <hhash_t>   (argv[1], argv[2], "hhash" HASH_SUFFIX);
#endif
#ifdef USE_SHASH
  bench <shash_t>   (argv[1], argv[2],  "shash" HASH_SUFFIX);
#endif
#ifdef USE_GOOGLE_DHASH
  bench <gdhash_t>  (argv[1], argv[2], "gdhash" HASH_SUFFIX);
#endif
#ifdef USE_HASH
  bench <hash_t>    (argv[1], argv[2], "hash" HASH_SUFFIX);
#endif
#ifdef USE_POPLARPP
  bench <poplar_t> (argv[1], argv[2], "poplarpp");
#endif
#ifdef USE_POPLARCG
  bench <poplar_t> (argv[1], argv[2], "poplarcg");
#endif
}
/*
  gcc -Wall -O2 -g -c jsw_atree.c
  gcc -Wall -O2 -g -c tst.c
  gcc -WALL -O2 -g -std=c99 -c critbit.c
  g++ -DUSE_CEDAR -DHAVE_CONFIG_H -fpermissive -std=c++11 -I. -I.. -I$HOME/local/include -O2 -g critbit.o bench.cc -o bench -L$HOME/local/lib -lhat-trie -lJudy -ltrie -ldict
  g++ -DUSE_CEDAR -DHAVE_CONFIG_H -DUSE_BINARY_DATA -fpermissive -std=c++11 -I. -I$HOME/local/include -O2 -g critbit.o bench.cc -o bench_bin -L$HOME/local/lib -lhat-trie -lJudy -ltrie -ldict
*/
