#include <sstream>
#include <iostream>
#include <fstream>
#include <vector>
#include <tr1/unordered_map>

#include <boost/functional/hash.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/program_options.hpp>
#include <boost/program_options/variables_map.hpp>

#include "sampler.h"
#include "filelib.h"
#include "stringlib.h"
#include "weights.h"
#include "scorer.h"
#include "inside_outside.h"
#include "hg_io.h"
#include "kbest.h"
#include "viterbi.h"

// This is Figure 4 (Algorithm Sampler) from Hopkins&May (2011)

using namespace std;
namespace po = boost::program_options;

struct ApproxVectorHasher {
  static const size_t MASK = 0xFFFFFFFFull;
  union UType {
    double f;   // leave as double
    size_t i;
  };
  static inline double round(const double x) {
    UType t;
    t.f = x;
    size_t r = t.i & MASK;
    if ((r << 1) > MASK)
      t.i += MASK - r + 1;
    else
      t.i &= (1ull - MASK);
    return t.f;
  }
  size_t operator()(const SparseVector<weight_t>& x) const {
    size_t h = 0x573915839;
    for (SparseVector<weight_t>::const_iterator it = x.begin(); it != x.end(); ++it) {
      UType t;
      t.f = it->second;
      if (t.f) {
        size_t z = (t.i >> 32);
        boost::hash_combine(h, it->first);
        boost::hash_combine(h, z);
      }
    }
    return h;
  }
};

struct ApproxVectorEquals {
  bool operator()(const SparseVector<weight_t>& a, const SparseVector<weight_t>& b) const {
    SparseVector<weight_t>::const_iterator bit = b.begin();
    for (SparseVector<weight_t>::const_iterator ait = a.begin(); ait != a.end(); ++ait) {
      if (bit == b.end() ||
          ait->first != bit->first ||
          ApproxVectorHasher::round(ait->second) != ApproxVectorHasher::round(bit->second))
        return false;
      ++bit;
    }
    if (bit != b.end()) return false;
    return true;
  }
};

boost::shared_ptr<MT19937> rng;

void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
  po::options_description opts("Configuration options");
  opts.add_options()
        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation (tokenized text)")
        ("weights,w",po::value<string>(), "[REQD] Weights files from current iterations")
        ("kbest_repository,K",po::value<string>()->default_value("./kbest"),"K-best list repository (directory)")
        ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
        ("source,s",po::value<string>()->default_value(""), "Source file (ignored, except for AER)")
        ("loss_function,l",po::value<string>()->default_value("ibm_bleu"), "Loss function being optimized")
        ("kbest_size,k",po::value<unsigned>()->default_value(1500u), "Top k-hypotheses to extract")
        ("candidate_pairs,G", po::value<unsigned>()->default_value(5000u), "Number of pairs to sample per hypothesis (Gamma)")
        ("best_pairs,X", po::value<unsigned>()->default_value(50u), "Number of pairs, ranked by magnitude of objective delta, to retain (Xi)")
        ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)")
        ("help,h", "Help");
  po::options_description dcmdline_options;
  dcmdline_options.add(opts);
  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
  bool flag = false;
  if (!conf->count("reference")) {
    cerr << "Please specify one or more references using -r <REF.TXT>\n";
    flag = true;
  }
  if (!conf->count("weights")) {
    cerr << "Please specify weights using -w <WEIGHTS.TXT>\n";
    flag = true;
  }
  if (flag || conf->count("help")) {
    cerr << dcmdline_options << endl;
    exit(1);
  }
}

struct HypInfo {
  HypInfo() : g_(-100.0f) {}
  HypInfo(const vector<WordID>& h, const SparseVector<weight_t>& feats) : hyp(h), g_(-100.0f), x(feats) {}

  // lazy evaluation
  double g(const SentenceScorer& scorer) const {
    if (g_ == -100.0f)
      g_ = scorer.ScoreCandidate(hyp)->ComputeScore();
    return g_;
  }
  vector<WordID> hyp;
  mutable float g_;
  SparseVector<weight_t> x;
};

struct HypInfoCompare {
  bool operator()(const HypInfo& a, const HypInfo& b) const {
    ApproxVectorEquals comp;
    return (a.hyp == b.hyp && comp(a.x,b.x));
  }
};

struct HypInfoHasher {
  size_t operator()(const HypInfo& x) const {
    boost::hash<vector<WordID> > hhasher;
    ApproxVectorHasher vhasher;
    size_t ha = hhasher(x.hyp);
    boost::hash_combine(ha, vhasher(x.x));
    return ha;
  }
};

void WriteKBest(const string& file, const vector<HypInfo>& kbest) {
  WriteFile wf(file);
  ostream& out = *wf.stream();
  out.precision(10);
  for (int i = 0; i < kbest.size(); ++i) {
    out << TD::GetString(kbest[i].hyp) << endl;
    out << kbest[i].x << endl;
  }
}

void ParseSparseVector(string& line, size_t cur, SparseVector<weight_t>* out) {
  SparseVector<weight_t>& x = *out;
  size_t last_start = cur;
  size_t last_comma = string::npos;
  while(cur <= line.size()) {
    if (line[cur] == ' ' || cur == line.size()) {
      if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) {
        cerr << "[ERROR] " << line << endl << "  position = " << cur << endl;
        exit(1);
      }
      const int fid = FD::Convert(line.substr(last_start, last_comma - last_start));
      if (cur < line.size()) line[cur] = 0;
      const double val = strtod(&line[last_comma + 1], NULL);
      x.set_value(fid, val);

      last_comma = string::npos;
      last_start = cur+1;
    } else {
      if (line[cur] == '=')
        last_comma = cur;
    }
    ++cur;
  }
}

void ReadKBest(const string& file, vector<HypInfo>* kbest) {
  cerr << "Reading from " << file << endl;
  ReadFile rf(file);
  istream& in = *rf.stream();
  string cand;
  string feats;
  while(getline(in, cand)) {
    getline(in, feats);
    assert(in);
    kbest->push_back(HypInfo());
    TD::ConvertSentence(cand, &kbest->back().hyp);
    ParseSparseVector(feats, 0, &kbest->back().x);
  }
  cerr << "  read " << kbest->size() << " hypotheses\n";
}

void Dedup(vector<HypInfo>* h) {
  cerr << "Dedup in=" << h->size();
  tr1::unordered_set<HypInfo, HypInfoHasher, HypInfoCompare> u;
  while(h->size() > 0) {
    u.insert(h->back());
    h->pop_back();
  }
  tr1::unordered_set<HypInfo, HypInfoHasher, HypInfoCompare>::iterator it = u.begin();
  while (it != u.end()) {
    h->push_back(*it);
    it = u.erase(it);
  }
  cerr << "  out=" << h->size() << endl;
}

struct ThresholdAlpha {
  explicit ThresholdAlpha(double t = 0.05) : threshold(t) {}
  double operator()(double mag) const {
    if (mag < threshold) return 0.0; else return 1.0;
  }
  const double threshold;
};

struct TrainingInstance {
  TrainingInstance(const SparseVector<weight_t>& feats, bool positive, float diff) : x(feats), y(positive), gdiff(diff) {}
  SparseVector<weight_t> x;
#undef DEBUGGING_PRO
#ifdef DEBUGGING_PRO
  vector<WordID> a;
  vector<WordID> b;
#endif
  bool y;
  float gdiff;
};
#ifdef DEBUGGING_PRO
ostream& operator<<(ostream& os, const TrainingInstance& d) {
  return os << d.gdiff << " y=" << d.y << "\tA:" << TD::GetString(d.a) << "\n\tB: " << TD::GetString(d.b) << "\n\tX: " << d.x;
}
#endif

struct DiffOrder {
  bool operator()(const TrainingInstance& a, const TrainingInstance& b) const {
    return a.gdiff > b.gdiff;
  }
};

void Sample(const unsigned gamma, const unsigned xi, const vector<HypInfo>& J_i, const SentenceScorer& scorer, const bool invert_score, vector<TrainingInstance>* pv) {
  vector<TrainingInstance> v1, v2;
  float avg_diff = 0;
  for (unsigned i = 0; i < gamma; ++i) {
    const size_t a = rng->inclusive(0, J_i.size() - 1)();
    const size_t b = rng->inclusive(0, J_i.size() - 1)();
    if (a == b) continue;
    float ga = J_i[a].g(scorer);
    float gb = J_i[b].g(scorer);
    bool positive = gb < ga;
    if (invert_score) positive = !positive;
    const float gdiff = fabs(ga - gb);
    if (!gdiff) continue;
    avg_diff += gdiff;
    SparseVector<weight_t> xdiff = (J_i[a].x - J_i[b].x).erase_zeros();
    if (xdiff.empty()) {
      cerr << "Empty diff:\n  " << TD::GetString(J_i[a].hyp) << endl << "x=" << J_i[a].x << endl;
      cerr << "  " << TD::GetString(J_i[b].hyp) << endl << "x=" << J_i[b].x << endl;
      continue;
    }
    v1.push_back(TrainingInstance(xdiff, positive, gdiff));
#ifdef DEBUGGING_PRO
    v1.back().a = J_i[a].hyp;
    v1.back().b = J_i[b].hyp;
    cerr << "N: " << v1.back() << endl;
#endif
  }
  avg_diff /= v1.size();

  for (unsigned i = 0; i < v1.size(); ++i) {
    double p = 1.0 / (1.0 + exp(-avg_diff - v1[i].gdiff));
    // cerr << "avg_diff=" << avg_diff << "  gdiff=" << v1[i].gdiff << "  p=" << p << endl;
    if (rng->next() < p) v2.push_back(v1[i]);
  }
  vector<TrainingInstance>::iterator mid = v2.begin() + xi;
  if (xi > v2.size()) mid = v2.end();
  partial_sort(v2.begin(), mid, v2.end(), DiffOrder());
  copy(v2.begin(), mid, back_inserter(*pv));
#ifdef DEBUGGING_PRO
  if (v2.size() >= 5) {
    for (int i =0; i < (mid - v2.begin()); ++i) {
      cerr << v2[i] << endl;
    }
    cerr << pv->back() << endl;
  }
#endif
}

int main(int argc, char** argv) {
  po::variables_map conf;
  InitCommandLine(argc, argv, &conf);
  if (conf.count("random_seed"))
    rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
  else
    rng.reset(new MT19937);
  const string loss_function = conf["loss_function"].as<string>();

  ScoreType type = ScoreTypeFromString(loss_function);
  DocScorer ds(type, conf["reference"].as<vector<string> >(), conf["source"].as<string>());
  cerr << "Loaded " << ds.size() << " references for scoring with " << loss_function << endl;
  Hypergraph hg;
  string last_file;
  ReadFile in_read(conf["input"].as<string>());
  istream &in=*in_read.stream();
  const unsigned kbest_size = conf["kbest_size"].as<unsigned>();
  const unsigned gamma = conf["candidate_pairs"].as<unsigned>();
  const unsigned xi = conf["best_pairs"].as<unsigned>();
  string weightsf = conf["weights"].as<string>();
  vector<weight_t> weights;
  Weights::InitFromFile(weightsf, &weights);
  string kbest_repo = conf["kbest_repository"].as<string>();
  MkDirP(kbest_repo);
  while(in) {
    vector<TrainingInstance> v;
    string line;
    getline(in, line);
    if (line.empty()) continue;
    istringstream is(line);
    int sent_id;
    string file;
    // path-to-file (JSON) sent_id
    is >> file >> sent_id;
    ReadFile rf(file);
    ostringstream os;
    vector<HypInfo> J_i;
    os << kbest_repo << "/kbest." << sent_id << ".txt.gz";
    const string kbest_file = os.str();
    if (FileExists(kbest_file))
      ReadKBest(kbest_file, &J_i);
    HypergraphIO::ReadFromJSON(rf.stream(), &hg);
    hg.Reweight(weights);
    KBest::KBestDerivations<vector<WordID>, ESentenceTraversal> kbest(hg, kbest_size);

    for (int i = 0; i < kbest_size; ++i) {
      const KBest::KBestDerivations<vector<WordID>, ESentenceTraversal>::Derivation* d =
        kbest.LazyKthBest(hg.nodes_.size() - 1, i);
      if (!d) break;
      J_i.push_back(HypInfo(d->yield, d->feature_values));
    }
    Dedup(&J_i);
    WriteKBest(kbest_file, J_i);

    Sample(gamma, xi, J_i, *ds[sent_id], (type == TER), &v);
    for (unsigned i = 0; i < v.size(); ++i) {
      const TrainingInstance& vi = v[i];
      cout << vi.y << "\t" << vi.x << endl;
      cout << (!vi.y) << "\t" << (vi.x * -1.0) << endl;
    }
  }
  return 0;
}