#include "ff_charset.h"

#include "tdict.h"
#include "hg.h"
#include "fdict.h"
#include "stringlib.h"

using namespace std;

NonLatinCount::NonLatinCount(const string& param) : FeatureFunction(), fid_(FD::Convert("NonLatinCount")) {}

bool ContainsNonLatin(const string& word) {
  unsigned cur = 0;
  while(cur < word.size()) {
    const int size = UTF8Len(word[cur]);
    if (size > 1) return true;
    cur += size;  
  }
  return false;
}

void NonLatinCount::TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                          const Hypergraph::Edge& edge,
                                          const std::vector<const void*>& ant_contexts,
                                          SparseVector<double>* features,
                                          SparseVector<double>* estimated_features,
                                          void* context) const {
  const vector<WordID>& e = edge.rule_->e();
  int count = 0;
  for (int i = 0; i < e.size(); ++i) {
    if (e[i] > 0) {
      map<WordID, bool>::iterator it = is_non_latin_.find(e[i]);
      if (it == is_non_latin_.end()) {
        if ((is_non_latin_[e[i]] = ContainsNonLatin(TD::Convert(e[i]))))
          ++count;
      } else {
        if (it->second)
          ++count;
      }
    }
  }
  if (count) features->set_value(fid_, count);
}