/*
 * Featurize a grammar in striped format
 */
#include <iostream>
#include <sstream>
#include <string>
#include <map>
#include <vector>
#include <utility>
#include <cstdlib>
#include <tr1/unordered_map>

#include "lex_trans_tbl.h"
#include "sparse_vector.h"
#include "sentence_pair.h"
#include "extract.h"
#include "fdict.h"
#include "tdict.h"
#include "filelib.h"
#include "striped_grammar.h"

#include <boost/tuple/tuple.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/functional/hash.hpp>
#include <boost/program_options.hpp>
#include <boost/program_options/variables_map.hpp>

using namespace std;
using namespace std::tr1;
using boost::shared_ptr;
namespace po = boost::program_options;

static string aligned_corpus;
static const size_t MAX_LINE_LENGTH = 64000000;

// Data structures for indexing and counting rules
//typedef boost::tuple< WordID, vector<WordID>, vector<WordID> > RuleTuple;
struct RuleTuple {
  RuleTuple(const WordID& lhs, const vector<WordID>& s, const vector<WordID>& t)
  : m_lhs(lhs), m_source(s), m_target(t) {
    hash_value();
    m_dirty = false;
  }

  size_t hash_value() const {
//    if (m_dirty) {
      size_t hash = 0;
      boost::hash_combine(hash, m_lhs);
      boost::hash_combine(hash, m_source);
      boost::hash_combine(hash, m_target);
//    }
//    m_dirty = false;
    return hash;
  }

  bool operator==(RuleTuple const& b) const
  { return m_lhs == b.m_lhs && m_source == b.m_source && m_target == b.m_target; }

  WordID& lhs() { m_dirty=true; return m_lhs; }
  vector<WordID>& source() { m_dirty=true; return m_source; }
  vector<WordID>& target() { m_dirty=true; return m_target; }
  const WordID& lhs() const { return m_lhs; }
  const vector<WordID>& source() const { return m_source; }
  const vector<WordID>& target() const { return m_target; }

//  mutable size_t m_hash;
private:
  WordID m_lhs;
  vector<WordID> m_source, m_target;
  mutable bool m_dirty;
};
std::size_t hash_value(RuleTuple const& b) { return b.hash_value(); }
bool operator<(RuleTuple const& l, RuleTuple const& r) {
  if (l.lhs() < r.lhs()) return true;
  else if (l.lhs() == r.lhs()) {
    if (l.source() < r.source()) return true;
    else if (l.source() == r.source()) {
      if (l.target() < r.target()) return true;
    }
  }
  return false;
}

ostream& operator<<(ostream& o, RuleTuple const& r) {
  o << "(" << r.lhs() << "-->" << "<";
  for (vector<WordID>::const_iterator it=r.source().begin(); it!=r.source().end(); ++it)
    o << TD::Convert(*it) << " ";
  o << "|||";
  for (vector<WordID>::const_iterator it=r.target().begin(); it!=r.target().end(); ++it)
    o << " " << TD::Convert(*it);
  o << ">)";
  return o;
}

template <typename Key>
struct FreqCount {
  //typedef unordered_map<Key, int, boost::hash<Key> > Counts;
  typedef map<Key, int> Counts;
  Counts counts;

  int inc(const Key& r, int c=1) {
    pair<typename Counts::iterator,bool> itb
      = counts.insert(make_pair(r,c));
    if (!itb.second)
      itb.first->second += c;
    return itb.first->second;
  }

  int inc_if_exists(const Key& r, int c=1) {
    typename Counts::iterator it = counts.find(r);
    if (it != counts.end())
      it->second += c;
    return it->second;
  }

  int count(const Key& r) const {
    typename Counts::const_iterator it = counts.find(r);
    if (it == counts.end()) return 0;
    return it->second;
  }

  int operator()(const Key& r) const { return count(r); }
};
typedef FreqCount<RuleTuple> RuleFreqCount;

class FeatureExtractor;
class FERegistry;
struct FEFactoryBase {
  virtual ~FEFactoryBase() {}
  virtual boost::shared_ptr<FeatureExtractor> Create() const = 0;
};


class FERegistry {
  friend class FEFactoryBase;
 public:
  FERegistry() {}
  boost::shared_ptr<FeatureExtractor> Create(const std::string& ffname) const {
    map<string, boost::shared_ptr<FEFactoryBase> >::const_iterator it = reg_.find(ffname);
    boost::shared_ptr<FeatureExtractor> res;
    if (it == reg_.end()) {
      cerr << "I don't know how to create feature " << ffname << endl;
    } else {
      res = it->second->Create();
    }
    return res;
  }
  void DisplayList(ostream* out) const {
    bool first = true;
    for (map<string, boost::shared_ptr<FEFactoryBase> >::const_iterator it = reg_.begin();
        it != reg_.end(); ++it) {
      if (first) {first=false;} else {*out << ' ';}
      *out << it->first;
    }
  }

  void Register(const std::string& ffname, FEFactoryBase* factory) {
    if (reg_.find(ffname) != reg_.end()) {
      cerr << "Duplicate registration of FeatureExtractor with name " << ffname << "!\n";
      exit(1);
    }
    reg_[ffname].reset(factory);
  }

 private:
  std::map<std::string, boost::shared_ptr<FEFactoryBase> > reg_;
};

template<class FE>
class FEFactory : public FEFactoryBase {
  boost::shared_ptr<FeatureExtractor> Create() const {
    return boost::shared_ptr<FeatureExtractor>(new FE);
  }
};

void InitCommandLine(const FERegistry& r, int argc, char** argv, po::variables_map* conf) {
  po::options_description opts("Configuration options");
  ostringstream feats;
  feats << "[multiple] Features to extract (";
  r.DisplayList(&feats);
  feats << ")";
  opts.add_options()
        ("filtered_grammar,g", po::value<string>(), "Grammar to add features to")
        ("list_features,L", "List extractable features")
        ("feature,f", po::value<vector<string> >()->composing(), feats.str().c_str())
        ("aligned_corpus,c", po::value<string>(), "Aligned corpus (single line format)")
        ("help,h", "Print this help message and exit");
  po::options_description clo("Command line options");
  po::options_description dcmdline_options;
  dcmdline_options.add(opts);

  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
  po::notify(*conf);

  if (conf->count("help") || conf->count("aligned_corpus")==0 || conf->count("feature") == 0) {
    cerr << "\nUsage: featurize_grammar -g FILTERED-GRAMMAR.gz -c ALIGNED_CORPUS.fr-en-al -f Feat1 -f Feat2 ... < UNFILTERED-GRAMMAR\n";
    cerr << dcmdline_options << endl;
    exit(1);
  }
}

static const bool DEBUG = false;

void LexTranslationTable::createTTable(const char* buf){
  AnnotatedParallelSentence sent;
  sent.ParseInputLine(buf);

  //iterate over the alignment to compute aligned words

  for(int i =0;i<sent.aligned.width();i++)
    {
      for (int j=0;j<sent.aligned.height();j++)
        {
          if (DEBUG) cerr << sent.aligned(i,j) << " ";
          if( sent.aligned(i,j))
            {
              if (DEBUG) cerr << TD::Convert(sent.f[i])  << " aligned to " << TD::Convert(sent.e[j]);
              ++word_translation[pair<WordID,WordID> (sent.f[i], sent.e[j])];
              ++total_foreign[sent.f[i]];
              ++total_english[sent.e[j]];
            }
        }
      if (DEBUG)  cerr << endl;
    }
  if (DEBUG) cerr << endl;

  const WordID NULL_ = TD::Convert("NULL");
  //handle unaligned words - align them to null
  for (int j =0; j < sent.e_len; j++) {
    if (sent.e_aligned[j]) continue;
    ++word_translation[pair<WordID,WordID> (NULL_, sent.e[j])];
    ++total_foreign[NULL_];
    ++total_english[sent.e[j]];
  }

  for (int i =0; i < sent.f_len; i++) {
    if (sent.f_aligned[i]) continue;
    ++word_translation[pair<WordID,WordID> (sent.f[i], NULL_)];
    ++total_english[NULL_];
    ++total_foreign[sent.f[i]];
  }
}

inline float safenlog(float v) {
  if (v == 1.0f) return 0.0f;
  float res = -log(v);
  if (res > 100.0f) res = 100.0f;
  return res;
}

static bool IsZero(float f) { return (f > 0.999 && f < 1.001); }

struct FeatureExtractor {
  // create any keys necessary
  virtual void ObserveFilteredRule(const WordID /* lhs */,
                                   const vector<WordID>& /* src */,
                                   const vector<WordID>& /* trg */) {}

  // compute statistics over keys, the same lhs-src-trg tuple may be seen
  // more than once
  virtual void ObserveUnfilteredRule(const WordID /* lhs */,
                                     const vector<WordID>& /* src */,
                                     const vector<WordID>& /* trg */,
                                     const RuleStatistics& /* info */) {}

  // compute features, a unique lhs-src-trg tuple will be seen exactly once
  virtual void ExtractFeatures(const WordID lhs,
                               const vector<WordID>& src,
                               const vector<WordID>& trg,
                               const RuleStatistics& info,
                               SparseVector<float>* result) const = 0;

  virtual ~FeatureExtractor() {}
};

struct LogRuleCount : public FeatureExtractor {
  LogRuleCount() :
    fid_(FD::Convert("LogRuleCount")),
    sfid_(FD::Convert("SingletonRule")),
    kCFE(FD::Convert("CFE")) {}
  virtual void ExtractFeatures(const WordID lhs,
                               const vector<WordID>& src,
                               const vector<WordID>& trg,
                               const RuleStatistics& info,
                               SparseVector<float>* result) const {
    (void) lhs; (void) src; (void) trg;
    //result->set_value(fid_, log(info.counts.get(kCFE)));
    result->set_value(fid_, log(info.counts.get(kCFE)));
    if (IsZero(info.counts.get(kCFE)))
      result->set_value(sfid_, 1);
  }
  const int fid_;
  const int sfid_;
  const int kCFE;
};

struct RulePenalty : public FeatureExtractor {
  RulePenalty() : fid_(FD::Convert("RulePenalty")) {}
  virtual void ExtractFeatures(const WordID /*lhs*/,
                               const vector<WordID>& /*src*/,
                               const vector<WordID>& /*trg*/,
                               const RuleStatistics& /*info*/,
                               SparseVector<float>* result) const
  { result->set_value(fid_, 1); }

  const int fid_;
};

// The negative log of the condition rule probs
// ignoring the identities of the  non-terminals.
// i.e. the prob Hiero would assign.
// Also extracts Labelled features.
struct XFeatures: public FeatureExtractor {
  XFeatures() :
    fid_xfe(FD::Convert("XFE")),
    fid_xef(FD::Convert("XEF")),
    fid_labelledfe(FD::Convert("LabelledFE")),
    fid_labelledef(FD::Convert("LabelledEF")),
    fid_xesingleton(FD::Convert("XE_Singleton")),
    fid_xfsingleton(FD::Convert("XF_Singleton")),
    kCFE(FD::Convert("CFE")) {}
  virtual void ObserveFilteredRule(const WordID /*lhs*/,
                                   const vector<WordID>& src,
                                   const vector<WordID>& trg) {
    RuleTuple r(-1, src, trg);
    map_rule(r);
    rule_counts.inc(r, 0);
    source_counts.inc(r.source(), 0);
    target_counts.inc(r.target(), 0);
  }

  virtual void ObserveUnfilteredRule(const WordID /*lhs*/,
                                     const vector<WordID>& src,
                                     const vector<WordID>& trg,
                                     const RuleStatistics& info) {
    RuleTuple r(-1, src, trg);
    map_rule(r);
    const int count = info.counts.get(kCFE);
    assert(count > 0);
    rule_counts.inc_if_exists(r, count);
    source_counts.inc_if_exists(r.source(), count);
    target_counts.inc_if_exists(r.target(), count);
  }

  virtual void ExtractFeatures(const WordID /*lhs*/,
                               const vector<WordID>& src,
                               const vector<WordID>& trg,
                               const RuleStatistics& info,
                               SparseVector<float>* result) const {
    RuleTuple r(-1, src, trg);
    map_rule(r);
    double l_r_freq = log(rule_counts(r));

    const int t_c = target_counts(r.target());
    assert(t_c > 0);
    result->set_value(fid_xfe, log(t_c) - l_r_freq);
    result->set_value(fid_labelledfe, log(t_c) - log(info.counts.get(kCFE)));
//    if (t_c == 1)
//      result->set_value(fid_xesingleton, 1.0);

    const int s_c = source_counts(r.source());
    assert(s_c > 0);
    result->set_value(fid_xef, log(s_c) - l_r_freq);
    result->set_value(fid_labelledef, log(s_c) - log(info.counts.get(kCFE)));
//    if (s_c == 1)
//      result->set_value(fid_xfsingleton, 1.0);
  }

  void map_rule(RuleTuple& r) const {
    vector<WordID> indexes; int i=0;
    for (vector<WordID>::iterator it = r.target().begin(); it != r.target().end(); ++it) {
      if (*it <= 0)
        indexes.push_back(*it);
    }
    for (vector<WordID>::iterator it = r.source().begin(); it != r.source().end(); ++it) {
      if (*it <= 0)
        *it = indexes.at(i++);
    }
  }

  const int fid_xfe, fid_xef;
  const int fid_labelledfe, fid_labelledef;
  const int fid_xesingleton, fid_xfsingleton;
  const int kCFE;
  RuleFreqCount rule_counts;
  FreqCount< vector<WordID> > source_counts, target_counts;
};


struct LabelledRuleConditionals: public FeatureExtractor {
  LabelledRuleConditionals() :
    fid_fe(FD::Convert("LabelledFE")),
    fid_ef(FD::Convert("LabelledEF")),
    kCFE(FD::Convert("CFE")) {}
  virtual void ObserveFilteredRule(const WordID lhs,
                                   const vector<WordID>& src,
                                   const vector<WordID>& trg) {
    RuleTuple r(lhs, src, trg);
    rule_counts.inc(r, 0);
    source_counts.inc(r.source(), 0);

    target_counts.inc(r.target(), 0);
  }

  virtual void ObserveUnfilteredRule(const WordID lhs,
                                     const vector<WordID>& src,
                                     const vector<WordID>& trg,
                                     const RuleStatistics& info) {
    RuleTuple r(lhs, src, trg);
    rule_counts.inc_if_exists(r, info.counts.get(kCFE));
    source_counts.inc_if_exists(r.source(), info.counts.get(kCFE));

    target_counts.inc_if_exists(r.target(), info.counts.get(kCFE));
  }

  virtual void ExtractFeatures(const WordID lhs,
                               const vector<WordID>& src,
                               const vector<WordID>& trg,
                               const RuleStatistics& /*info*/,
                               SparseVector<float>* result) const {
    RuleTuple r(lhs, src, trg);
    double l_r_freq = log(rule_counts(r));
    result->set_value(fid_fe, log(target_counts(r.target())) - l_r_freq);
    result->set_value(fid_ef, log(source_counts(r.source())) - l_r_freq);
  }

  const int fid_fe, fid_ef;
  const int kCFE;
  RuleFreqCount rule_counts;
  FreqCount< vector<WordID> > source_counts, target_counts;
};

struct LHSProb: public FeatureExtractor {
  LHSProb() : fid_(FD::Convert("LHSProb")), kCFE(FD::Convert("CFE")), total_count(0) {}

  virtual void ObserveUnfilteredRule(const WordID lhs,
                                     const vector<WordID>& /*src*/,
                                     const vector<WordID>& /*trg*/,
                                     const RuleStatistics& info) {
    int count = info.counts.get(kCFE);
    total_count += count;
    lhs_counts.inc(lhs, count);
  }

  virtual void ExtractFeatures(const WordID lhs,
                               const vector<WordID>& /*src*/,
                               const vector<WordID>& /*trg*/,
                               const RuleStatistics& /*info*/,
                               SparseVector<float>* result) const {
    double lhs_log_prob =  log(total_count) - log(lhs_counts(lhs));
    result->set_value(fid_, lhs_log_prob);
  }

  const int fid_;
  const int kCFE;
  int total_count;
  FreqCount<WordID> lhs_counts;
};

// Proper rule generative probability: p( s,t | lhs)
struct GenerativeProb: public FeatureExtractor {
  GenerativeProb() :
    fid_(FD::Convert("GenerativeProb")),
    kCFE(FD::Convert("CFE")) {}

  virtual void ObserveUnfilteredRule(const WordID lhs,
                                     const vector<WordID>& /*src*/,
                                     const vector<WordID>& /*trg*/,
                                     const RuleStatistics& info)
  { lhs_counts.inc(lhs, info.counts.get(kCFE)); }

  virtual void ExtractFeatures(const WordID lhs,
                               const vector<WordID>& /*src*/,
                               const vector<WordID>& /*trg*/,
                               const RuleStatistics& info,
                               SparseVector<float>* result) const {
    double log_prob = log(lhs_counts(lhs)) - log(info.counts.get(kCFE));
    result->set_value(fid_, log_prob);
  }

  const int fid_;
  const int kCFE;
  FreqCount<WordID> lhs_counts;
};

// remove terminals from the rules before estimating the conditional prob
struct LabellingShape: public FeatureExtractor {
  LabellingShape() : fid_(FD::Convert("LabellingShape")), kCFE(FD::Convert("CFE")) {}

  virtual void ObserveFilteredRule(const WordID /*lhs*/,
                                   const vector<WordID>& src,
                                   const vector<WordID>& trg) {
    RuleTuple r(-1, src, trg);
    map_rule(r);
    rule_counts.inc(r, 0);
    source_counts.inc(r.source(), 0);
  }

  virtual void ObserveUnfilteredRule(const WordID /*lhs*/,
                                     const vector<WordID>& src,
                                     const vector<WordID>& trg,
                                     const RuleStatistics& info) {
    RuleTuple r(-1, src, trg);
    map_rule(r);
    rule_counts.inc_if_exists(r, info.counts.get(kCFE));
    source_counts.inc_if_exists(r.source(), info.counts.get(kCFE));
  }

  virtual void ExtractFeatures(const WordID /*lhs*/,
                               const vector<WordID>& src,
                               const vector<WordID>& trg,
                               const RuleStatistics& /*info*/,
                               SparseVector<float>* result) const {
    RuleTuple r(-1, src, trg);
    map_rule(r);
    double l_r_freq = log(rule_counts(r));
    result->set_value(fid_, log(source_counts(r.source())) - l_r_freq);
  }

  // Replace all terminals with generic -1
  void map_rule(RuleTuple& r) const {
    for (vector<WordID>::iterator it = r.target().begin(); it != r.target().end(); ++it)
      if (*it <= 0) *it = -1;
    for (vector<WordID>::iterator it = r.source().begin(); it != r.source().end(); ++it)
      if (*it <= 0) *it = -1;
  }

  const int fid_, kCFE;
  RuleFreqCount rule_counts;
  FreqCount< vector<WordID> > source_counts;
};


// this extracts the lexical translation prob features
// in BOTH directions.
struct LexProbExtractor : public FeatureExtractor {
  LexProbExtractor() :
      e2f_(FD::Convert("LexE2F")), f2e_(FD::Convert("LexF2E")) {
    ReadFile rf(aligned_corpus);
    //create lexical translation table
    cerr << "Computing lexical translation probabilities from " << aligned_corpus << "..." << endl;
    char* buf = new char[MAX_LINE_LENGTH];
    istream& alignment = *rf.stream();
    while(alignment) {
      alignment.getline(buf, MAX_LINE_LENGTH);
      if (buf[0] == 0) continue;
      table.createTTable(buf);
    }
    delete[] buf;
  }

  virtual void ExtractFeatures(const WordID /*lhs*/,
                               const vector<WordID>& src,
                               const vector<WordID>& trg,
                               const RuleStatistics& info,
                               SparseVector<float>* result) const {
    map <WordID, pair<int, float> > foreign_aligned;
    map <WordID, pair<int, float> > english_aligned;

    //Loop over all the alignment points to compute lexical translation probability
    const vector< pair<short,short> >& al = info.aligns;
    vector< pair<short,short> >::const_iterator ita;
    for (ita = al.begin(); ita != al.end(); ++ita) {
            if (DEBUG) {
              cerr << "\nA:" << ita->first << "," << ita->second << "::";
              cerr <<  TD::Convert(src[ita->first]) << "-" << TD::Convert(trg[ita->second]);
            }

            //Lookup this alignment probability in the table
            int temp = table.word_translation[pair<WordID,WordID> (src[ita->first],trg[ita->second])];
            float f2e=0, e2f=0;
            if ( table.total_foreign[src[ita->first]] != 0)
              f2e = (float) temp / table.total_foreign[src[ita->first]];
            if ( table.total_english[trg[ita->second]] !=0 )
              e2f = (float) temp / table.total_english[trg[ita->second]];
            if (DEBUG) printf (" %d %E %E\n", temp, f2e, e2f);

            //local counts to keep track of which things haven't been aligned, to later compute their null alignment
            if (foreign_aligned.count(src[ita->first])) {
              foreign_aligned[ src[ita->first] ].first++;
              foreign_aligned[ src[ita->first] ].second += e2f;
            } else {
              foreign_aligned[ src[ita->first] ] = pair<int,float> (1,e2f);
            }

            if (english_aligned.count( trg[ ita->second] )) {
               english_aligned[ trg[ ita->second] ].first++;
               english_aligned[ trg[ ita->second] ].second += f2e;
            } else {
               english_aligned[ trg[ ita->second] ] = pair<int,float> (1,f2e);
            }
          }

          float final_lex_f2e=1, final_lex_e2f=1;
          static const WordID NULL_ = TD::Convert("NULL");

          //compute lexical weight P(F|E) and include unaligned foreign words
           for(int i=0;i<src.size(); i++) {
               if (!table.total_foreign.count(src[i])) continue;      //if we dont have it in the translation table, we won't know its lexical weight

               if (foreign_aligned.count(src[i]))
                 {
                   pair<int, float> temp_lex_prob = foreign_aligned[src[i]];
                   final_lex_e2f *= temp_lex_prob.second / temp_lex_prob.first;
                 }
               else //dealing with null alignment
                 {
                   int temp_count = table.word_translation[pair<WordID,WordID> (src[i],NULL_)];
                   float temp_e2f = (float) temp_count / table.total_english[NULL_];
                   final_lex_e2f *= temp_e2f;
                 }

             }

           //compute P(E|F) unaligned english words
           for(int j=0; j< trg.size(); j++) {
               if (!table.total_english.count(trg[j])) continue;

               if (english_aligned.count(trg[j]))
                 {
                   pair<int, float> temp_lex_prob = english_aligned[trg[j]];
                   final_lex_f2e *= temp_lex_prob.second / temp_lex_prob.first;
                 }
               else //dealing with null
                 {
                   int temp_count = table.word_translation[pair<WordID,WordID> (NULL_,trg[j])];
                   float temp_f2e = (float) temp_count / table.total_foreign[NULL_];
                   final_lex_f2e *= temp_f2e;
                 }
           }
     result->set_value(e2f_, safenlog(final_lex_e2f));
     result->set_value(f2e_, safenlog(final_lex_f2e));
  }
  const int e2f_, f2e_;
  mutable LexTranslationTable table;
};

struct Featurizer {
  Featurizer(const vector<boost::shared_ptr<FeatureExtractor> >& ex) : extractors(ex) {
  }
  void Callback1(WordID lhs, const vector<WordID>& src, const ID2RuleStatistics& trgs) {
    for (ID2RuleStatistics::const_iterator it = trgs.begin(); it != trgs.end(); ++it) {
      for (int i = 0; i < extractors.size(); ++i)
        extractors[i]->ObserveFilteredRule(lhs, src, it->first);
    }
  }
  void Callback2(WordID lhs, const vector<WordID>& src, const ID2RuleStatistics& trgs) {
    for (ID2RuleStatistics::const_iterator it = trgs.begin(); it != trgs.end(); ++it) {
      for (int i = 0; i < extractors.size(); ++i)
        extractors[i]->ObserveUnfilteredRule(lhs, src, it->first, it->second);
    }
  }
  void Callback3(WordID lhs, const vector<WordID>& src, const ID2RuleStatistics& trgs) {
    for (ID2RuleStatistics::const_iterator it = trgs.begin(); it != trgs.end(); ++it) {
      SparseVector<float> feats;
      for (int i = 0; i < extractors.size(); ++i)
        extractors[i]->ExtractFeatures(lhs, src, it->first, it->second, &feats);
      cout << '[' << TD::Convert(-lhs) << "] ||| ";
      WriteNamed(src, &cout);
      cout << " ||| ";
      WriteAnonymous(it->first, &cout);
      cout << " ||| ";
      print(cout,feats,"=");
      cout << endl;
    }
  }
 private:
  vector<boost::shared_ptr<FeatureExtractor> > extractors;
};

void cb1(WordID lhs, const vector<WordID>& src_rhs, const ID2RuleStatistics& rules, void* extra) {
  static_cast<Featurizer*>(extra)->Callback1(lhs, src_rhs, rules);
}

void cb2(WordID lhs, const vector<WordID>& src_rhs, const ID2RuleStatistics& rules, void* extra) {
  static_cast<Featurizer*>(extra)->Callback2(lhs, src_rhs, rules);
}

void cb3(WordID lhs, const vector<WordID>& src_rhs, const ID2RuleStatistics& rules, void* extra) {
  static_cast<Featurizer*>(extra)->Callback3(lhs, src_rhs, rules);
}

int main(int argc, char** argv){
  FERegistry reg;
  reg.Register("LogRuleCount", new FEFactory<LogRuleCount>);
  reg.Register("LexProb", new FEFactory<LexProbExtractor>);
  reg.Register("XFeatures", new FEFactory<XFeatures>);
  reg.Register("LabelledRuleConditionals", new FEFactory<LabelledRuleConditionals>);
  reg.Register("RulePenalty", new FEFactory<RulePenalty>);
  reg.Register("LHSProb", new FEFactory<LHSProb>);
  reg.Register("LabellingShape", new FEFactory<LabellingShape>);
  reg.Register("GenerativeProb", new FEFactory<GenerativeProb>);
  po::variables_map conf;
  InitCommandLine(reg, argc, argv, &conf);
  aligned_corpus = conf["aligned_corpus"].as<string>();  // GLOBAL VAR
  ReadFile fg1(conf["filtered_grammar"].as<string>());

  vector<string> feats = conf["feature"].as<vector<string> >();
  vector<boost::shared_ptr<FeatureExtractor> > extractors(feats.size());
  for (int i = 0; i < feats.size(); ++i)
    extractors[i] = reg.Create(feats[i]);
  Featurizer fizer(extractors);

  cerr << "Reading filtered grammar to detect keys..." << endl;
  StripedGrammarLexer::ReadStripedGrammar(fg1.stream(), cb1, &fizer);

  cerr << "Reading unfiltered grammar..." << endl;
  StripedGrammarLexer::ReadStripedGrammar(&cin, cb2, &fizer);

  ReadFile fg2(conf["filtered_grammar"].as<string>());
  cerr << "Reading filtered grammar and adding features..." << endl;
  StripedGrammarLexer::ReadStripedGrammar(fg2.stream(), cb3, &fizer);

  return 0;
}