#include <vector>
#include <sstream>
#include <fstream>
#include <string>
#include <iostream>
#include <map>
#include "hg.h"
#include "ff_dwarf.h"
#include "dwarf.h"
#include "wordid.h"
#include "tdict.h"
#include "filelib.h"
#include "sentence_metadata.h"
#include "stringlib.h"

using namespace std;

Dwarf::Dwarf(const std::string& param) { 
/* Param is a space separated string which contains any or all of the following:
   oris|orit|doms|domt=filename 
   e.g. oris=/fs/clip-galep3eval/hendra/z2e/oris128.gz
*/
  sSOS="<s>";
  sEOS="</s>";
  kSOS=TD::Convert(sSOS);
  kEOS=TD::Convert(sEOS);
  kGOAL=TD::Convert("S")*-1;
  _sent_id = (int *)malloc(sizeof(int));
  *_sent_id = -1;
  if (DEBUG) cerr << "here = " << *_sent_id << endl;
  _fwcount = (int *)malloc(sizeof(int));
  *_fwcount = -1;
  cerr << "initializing dwarf" << endl;
  flag_oris=false; flag_orit=false; flag_doms=false; flag_domt=false; flag_tfw_count=false;
  flag_bdoms=false; flag_porislr=false, flag_porisrl=false, flag_goris=false; flag_pgorislr=false, flag_pgorisrl=false;
  flag_pdomslr=false; flag_pdomsrl=false; flag_pgdomslr=false; flag_pgdomsrl=false; flag_gdoms=false;
  flag_oris_backward=false; flag_orit_backward=false; 
  explicit_soseos=false;
  SetStateSize(STATE_SIZE*sizeof(int));   
  als = new Alignment();  
  als->clearAls(Alignment::MAX_WORDS,Alignment::MAX_WORDS);
  istringstream iss(param); string w;
  while(iss >> w) {
    int equal = w.find_first_of("=");
    if (equal!=string::npos) {
      string model = w.substr(0,equal);
      vector<string> params; 
      Tokenize(w.substr(equal+1),',',&params);
      string fn = params[0];
      if (model == "minfreq") {
        cerr << "model minfreq " << fn << endl;
        als->setFreqCutoff(atoi(fn.c_str()));
      } else if (model == "oris") {
        flag_oris = readOrientation(&toris,fn,&sfw); 
        if (flag_oris) {
          oris_ = FD::Convert("OrientationSource");
          //oris_bo1_ = FD::Convert("OrientationSource_BO1");
          //oris_bo2_ = FD::Convert("OrientationSource_BO2");
        }
        if (params.size()>1) als->setAlphaOris(atof(params[1].c_str()));
        if (params.size()>2) als->setBetaOris(atof(params[2].c_str()));
      } else if (model == "porislr") {
        flag_porislr = readOrientation(&tporislr,fn,&sfw,true);
        poris_nlr = 0;
        if (flag_porislr) {
          porislr_ = FD::Convert("OrientationSourcePositionfulLeftRight");
        }
        if (params.size()>1) poris_nlr = atoi(params[1].c_str());
        if (DEBUG) cerr << "  maximum poris depth=" << poris_nlr  << endl;
      } else if (model == "porisrl") {
        flag_porisrl = readOrientation(&tporisrl,fn,&sfw,true);
        poris_nrl = 0;
        if (flag_porisrl) {
          porisrl_ = FD::Convert("OrientationSourcePositionfulRightLeft");
        }
        if (params.size()>1) poris_nrl = atoi(params[1].c_str());
        if (DEBUG) cerr << "  maximum poris depth=" << poris_nrl  << endl;
      } else if (model=="goris") {
        flag_goris = readOrientation(&tgoris,fn,&sfw);
        if (flag_goris) {
          goris_ = FD::Convert("OrientationSourceGeneralized");
        }
        if (params.size()>1) {
          readTags(params[1],&tags);
          generalizeOrientation(&tgoris,tags);          
        }
      } else if (model=="pgorislr") {
        flag_pgorislr = readOrientation(&tpgorislr,fn,&sfw,true);
        pgoris_nlr = 0;
        if (flag_pgorislr) {
          pgorislr_ = FD::Convert("OrientationSourceGeneralizedPositionfulLeftRight");
        }
        if (DEBUG) {
          cerr << "BEFORE GENERALIZATION" << endl;
          tpgorislr.print();
        }
        if (params.size()>1) pgoris_nlr = atoi(params[1].c_str());
        if (params.size()>2) {
          readTags(params[2],&tags);
          generalizeOrientation(&tpgorislr,tags,true);
        }
        if (DEBUG) {
          cerr << "AFTER GENERALIZATION" << endl;
          tpgorislr.print();
        }
      } else if (model=="pgorisrl") {
        flag_pgorisrl = readOrientation(&tpgorisrl,fn,&sfw,true);
        pgoris_nrl = 0;
        if (flag_pgorisrl) {
          pgorisrl_ = FD::Convert("OrientationSourceGeneralizedPositionfulLeftRight");
        } 
        if (params.size()>1) pgoris_nrl = atoi(params[1].c_str());
        if (params.size()>2) {
          readTags(params[2],&tags);
          generalizeOrientation(&tpgorisrl,tags,true);
        }
      } else if (model == "oris_backward") {
        flag_oris_backward = true;
        if (!flag_oris) readOrientation(&toris,fn,&sfw);
        oris_backward_ = FD::Convert("OrientationSourceBackward");
        if (params.size()>1) als->setAlphaOris(atof(params[1].c_str()));
        if (params.size()>2) als->setBetaOris(atof(params[2].c_str()));
      } else if (model == "orit") {
        flag_orit = readOrientation(&torit,fn,&tfw); 
        if (flag_orit) {
          orit_ = FD::Convert("OrientationTarget");
          //orit_bo1_ = FD::Convert("OrientationTarget_BO1");
          //orit_bo2_ = FD::Convert("OrientationTarget_BO2");
        }
        if (params.size()>1) als->setAlphaOrit(atof(params[1].c_str()));
        if (params.size()>2) als->setBetaOrit(atof(params[2].c_str()));
      } else if (model == "orit_backward") {
        flag_orit_backward = true;
        if (!flag_orit) readOrientation(&torit,fn,&tfw);
        orit_backward_ = FD::Convert("OrientationTargetBackward");
        if (params.size()>1) als->setAlphaOrit(atof(params[1].c_str()));
        if (params.size()>2) als->setBetaOrit(atof(params[2].c_str()));
      } else if (model == "doms") {
        flag_doms = readDominance(&tdoms,fn,&sfw); 
        if (flag_doms) {
          doms_ = FD::Convert("DominanceSource");
          //doms_bo1_ = FD::Convert("DominanceSource_BO1");
          //doms_bo2_ = FD::Convert("DominanceSource_BO2");
        }
        if (params.size()>1) als->setAlphaDoms(atof(params[1].c_str()));
        if (params.size()>2) als->setBetaDoms(atof(params[2].c_str()));
      } else if (model == "pdomsrl") {
        flag_pdomsrl = readDominance(&tpdomsrl,fn,&sfw,true);
        if (flag_pdomsrl) {
          pdomsrl_ = FD::Convert("DominanceSourcePositionfulRightLeft");
        }
        if (params.size()>1) pdoms_nrl = atoi(params[1].c_str());
      } else if (model == "pdomslr") {
        flag_pdomslr = readDominance(&tpdomslr,fn,&sfw,true);
        tpdomslr.print();
        if (flag_pdomslr) {
          pdomslr_ = FD::Convert("DominanceSourcePositionfulLeftRight");
        }
        if (params.size()>1) pdoms_nlr = atoi(params[1].c_str());
      } else if (model == "pgdomsrl") {
        flag_pgdomsrl = readDominance(&tpgdomsrl,fn,&sfw,true);
        if (flag_pgdomsrl) {
          pgdomsrl_ = FD::Convert("DominanceSourceGeneralizedPositionfulRightLeft");
        }
        if (params.size()>1) pgdoms_nrl = atoi(params[1].c_str());
        if (params.size()>2) {
          readTags(params[2],&tags);
          generalizeDominance(&tpgdomsrl,tags,true);
        }
      } else if (model == "pgdomslr") {
        flag_pgdomslr = readDominance(&tpgdomslr,fn,&sfw,true);
        if (flag_pgdomslr) {
          pgdomslr_ = FD::Convert("DominanceSourceGeneralizedPositionfulLeftRight");
        }
        if (params.size()>1) pgdoms_nlr = atoi(params[1].c_str());
        if (params.size()>2) {
          readTags(params[2],&tags);
          if (DEBUG) {
            for (map<WordID,WordID>::const_iterator it=tags.begin(); it!=tags.end(); it++) {
              cerr << "tags = " << TD::Convert(it->first) << ", " << TD::Convert(it->second) << endl;
            }
          } 
          generalizeDominance(&tpgdomslr,tags,true);
        }
        if (DEBUG) tpgdomslr.print();
      } else if (model == "bdoms") {
        flag_bdoms = readDominance(&tbdoms,fn,&sfw);
        if (flag_bdoms) {
          bdoms_ = FD::Convert("BorderDominanceSource");
        }
      } else if (model == "domt") {
        flag_domt = readDominance(&tdomt,fn,&tfw); 
        if (flag_domt) {
          domt_ = FD::Convert("DominanceTarget");
          //domt_bo1_ = FD::Convert("DominanceTarget_BO1");
          //domt_bo2_ = FD::Convert("DominanceTarget_BO2");
        }
        if (params.size()>1) als->setAlphaDomt(atof(params[1].c_str()));
        if (params.size()>2) als->setBetaDomt(atof(params[2].c_str()));
      } else if (model== "tfw_count") {
        flag_tfw_count = readList(fn,&tfw);
        tfw_count_ = FD::Convert("TargetFunctionWordsCount");        
      } else {
        cerr << "DWARF doesn't understand this model: " << model << endl;
      }
    } else {
      if (w=="tfw_count") {
        flag_tfw_count = true;
        tfw_count_ = FD::Convert("TargetFunctionWordsCount");
      } else if (w=="oris_backward") {
        flag_oris_backward = true;
        oris_backward_ = FD::Convert("OrientationSourceBackward"); 
      } else if (w=="orit_backward") {
        flag_orit_backward = true;
        orit_backward_ = FD::Convert("OrientationTargetBackward");
      } else if (w=="explicit_soseos") {
        explicit_soseos=true;
      } else {
        cerr << "DWARF doesn't need this param: " << param << endl; 
      }
    }  
  }
  for (map<WordID,int>::const_iterator it=sfw.begin(); it!=sfw.end() && DEBUG; it++) {
    cerr << "   FW:" << TD::Convert(it->first) << endl;
  }
}

void Dwarf::TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                     const Hypergraph::Edge& edge,
                                     const std::vector<const void*>& ant_contexts,
                                     SparseVector<double>* features,
                                     SparseVector<double>* estimated_features,
                                     void* context) const {
  if (DEBUG) cerr << "TraversalFeaturesImpl" << endl;
  double cost, bonus, bo1, bo2, bo1_bonus, bo2_bonus;
  double bdoms_state_mono= 0; double bdoms_state_nonmono = 0;
  TRule r = *edge.rule_;
  if (DEBUG) cerr << " sent_id=" << *_sent_id << ", " << smeta.GetSentenceID() << endl;
  if (DEBUG) cerr << "rule = " << r.AsString() << endl; 
  if (DEBUG) cerr << "rule[i,j] = " << edge.i_ << "," << edge.j_ << endl;
  if (*_sent_id != smeta.GetSentenceID()) { //new sentence
    *_sent_id = smeta.GetSentenceID();
    const Lattice l = smeta.GetSourceLattice();
    *_fwcount=0;
    for (int i=0; i<smeta.GetSourceLength(); i++) {
      if (sfw.find(l[i][0].label)!=sfw.end()) {
        *_fwcount+=1;
      }
    }
    if (DEBUG) cerr << "new sentence[" << *_sent_id << "]="<<*_fwcount<<endl;
  }
  bool nofw = als->prepare(*edge.rule_, ant_contexts, sfw, tfw,smeta.GetSourceLattice(),edge.i_,edge.j_); 
  bool isFinal = (edge.i_==0 && edge.j_==smeta.GetSourceLength() && r.GetLHS()==kGOAL);
  // prepare *nofw* outputs whether the resulting alignment, contains function words or not
  // if not, the models do not have to be calcualted and *simplify* is very simple
  if (DEBUG) cerr << "nofw = " << nofw << endl;
  if (flag_tfw_count) {
    double count = 0;
    for (int i=0; i<r.e_.size(); i++) {
      if (tfw.find(r.e_[i])!=tfw.end()) count++;
    }
    features->set_value(tfw_count_,count);  
  }
  if (flag_oris) {
    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0;
    if (!nofw) als->computeOrientationSource(toris,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus); 
    if (isFinal&&!explicit_soseos) {
      cost += bonus;
      bonus = 0;
    }
    features->set_value(oris_,cost); 
    //features->set_value(oris_bo1_,bo1); 
    //features->set_value(oris_bo2_,bo2);
    estimated_features->set_value(oris_,bonus); 
    //estimated_features->set_value(oris_bo1_,bo1_bonus); 
    //estimated_features->set_value(oris_bo2_,bo2_bonus);
  }
  if (flag_porislr) {
    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0;
    if (!nofw) 
      als->computeOrientationSourcePos(tporislr,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,poris_nlr,0);
    if (isFinal&&!explicit_soseos) {
      cost += bonus;
      bonus = 0;
    }
    features->set_value(porislr_,cost);
    estimated_features->set_value(porislr_,bonus);
  }
  if (flag_porisrl) {
    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0;
    if (!nofw)
      als->computeOrientationSourcePos(tporisrl,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,0,poris_nrl);
    if (isFinal&&!explicit_soseos) {
      cost += bonus;
      bonus = 0;
    }
    features->set_value(porisrl_,cost);
    estimated_features->set_value(porisrl_,bonus);
  }
  if (flag_pgorislr) {
    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0;
    if (!nofw)
      als->computeOrientationSourcePos(tpgorislr,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,pgoris_nlr,0);
    if (isFinal&&!explicit_soseos) {
      cost += bonus;
      bonus = 0;
    }
    features->set_value(pgorislr_,cost);
    estimated_features->set_value(pgorislr_,bonus);
  }
  if (flag_pgorisrl) {
    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0;
    if (!nofw)
      als->computeOrientationSourcePos(tpgorisrl,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,0,pgoris_nrl);
    if (isFinal&&!explicit_soseos) {
      cost += bonus;
      bonus = 0;
    }
    features->set_value(pgorisrl_,cost);
    estimated_features->set_value(pgorisrl_,bonus);
  }
  if (flag_goris) {
    cost=0; bonus=0;
    if (!nofw) als->computeOrientationSource(tgoris,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus);
    if (isFinal&&!explicit_soseos) {
      cost += bonus;
      bonus = 0;
    }
    features->set_value(goris_,cost);
    estimated_features->set_value(goris_,bonus);
  }
  if (flag_oris_backward) {
    cost=0; bonus=0;
    if (!nofw) 
      als->computeOrientationSourceBackward(toris,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus);
    if (isFinal&&!explicit_soseos) {
      cost += bonus;
      bonus = 0;
    }
    features->set_value(oris_backward_,cost);
    estimated_features->set_value(oris_backward_,bonus);
  }
  WordID _lfw = kSOS;
  WordID _rfw = kEOS; 
  if (flag_doms || flag_pdomslr || flag_pdomsrl || flag_pgdomslr || flag_pgdomsrl) {
    if (DEBUG) cerr << "   seeking lfw and rfw" << endl;
    int start = edge.i_;
    int end   = edge.j_;
    if (DEBUG) cerr << "   start=" << start << ", end=" << end << endl;
    const Lattice l = smeta.GetSourceLattice();
    for (int idx=start-1; idx>=0; idx--) {
      if (DEBUG) cerr << "  checking idx=" << idx << ", label=" << l[idx][0].label << "-" << TD::Convert(l[idx][0].label) << endl;
      if (sfw.find(l[idx][0].label) !=sfw.end()) {
        if (DEBUG) cerr << "+";
        _lfw=l[idx][0].label; break;
      }
    }
    for (int idx=end; idx<l.size(); idx++) { // end or end+1
      if (DEBUG) cerr << "  checking idx=" << idx << ", label=" << l[idx][0].label << "-" << TD::Convert(l[idx][0].label) << endl;
      if (sfw.find(l[idx][0].label)!=sfw.end()) {
        if (DEBUG) cerr << ".";
        _rfw=l[idx][0].label; break;
      }
    }
    if (isFinal&&!explicit_soseos) {
      _lfw=kSOS; _rfw=kEOS;
    }
  }
  if (flag_doms) {
    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0;
    if (!nofw) als->computeDominanceSource(tdoms,_lfw,_rfw,&cost,&bonus,
                                           &bo1,&bo1_bonus,&bo2,&bo2_bonus); 
    if (DEBUG) cerr << "   COST=" << cost << ", BONUS=" << bonus << endl;
    if (isFinal&&!explicit_soseos) {
      cost += bonus;
      if (DEBUG) cerr << "    final and !explicit_soseos, thus cost = " << cost <<  endl;
      bonus = 0;
    }
    features->set_value(doms_,cost); 
    estimated_features->set_value(doms_,bonus);
  }
  if (flag_pdomslr) {
   if (DEBUG) cerr << " flag_pdomslr true, nofw=" << nofw << endl;
   if (DEBUG) cerr << "   lfw=" << _lfw << ", rfw=" << _rfw << endl;
   if (DEBUG) cerr << "   kSOS=" << kSOS << ", kEOS=" << kEOS << endl;
    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0;
    if (!nofw) als->computeDominanceSourcePos(tpdomslr,_lfw,_rfw,&cost,&bonus,
                                           &bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,pdoms_nlr,0);
    if (isFinal&&!explicit_soseos) {
      cost += bonus;
      bonus = 0;
    }
    features->set_value(pdomslr_,cost);
    estimated_features->set_value(pdomslr_,bonus);  
  }
  if (flag_pdomsrl) {
    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0;
    if (!nofw) als->computeDominanceSourcePos(tpdomsrl,_lfw,_rfw,&cost,&bonus,
                                           &bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,0,pdoms_nrl);
    if (isFinal&&!explicit_soseos) {
      cost += bonus;
      bonus = 0;
    }
    features->set_value(pdomsrl_,cost);
    estimated_features->set_value(pdomsrl_,bonus); 
  }
  if (flag_pgdomslr) {
    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0;
    if (!nofw) als->computeDominanceSourcePos(tpgdomslr,_lfw,_rfw,&cost,&bonus,
                                           &bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,pgdoms_nlr,0);
    if (isFinal&&!explicit_soseos) {
      cost += bonus;
      bonus = 0;
    }
    features->set_value(pgdomslr_,cost);
    estimated_features->set_value(pgdomslr_,bonus);  
  }
  if (flag_pgdomsrl) {    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0;
    if (!nofw) als->computeDominanceSourcePos(tpgdomsrl,_lfw,_rfw,&cost,&bonus,
                                           &bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,0,pgdoms_nrl);
    if (isFinal&&!explicit_soseos) {
      cost += bonus;
      bonus = 0;
    }
    features->set_value(pgdomsrl_,cost);
    estimated_features->set_value(pgdomsrl_,bonus); 
  }


  if (flag_bdoms) {
    cost=0; bonus=0; bdoms_state_mono=0; bdoms_state_nonmono=0; 
    if (!nofw)
      als->computeBorderDominanceSource(tbdoms,&cost,&bonus,
        &bdoms_state_mono, &bdoms_state_nonmono,*edge.rule_, ant_contexts, sfw);
    features->set_value(bdoms_,cost);
    estimated_features->set_value(bdoms_,bonus); 
  }
  if (flag_orit) {
    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0;
    if (!nofw) als->computeOrientationTarget(torit,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus); 
    if (DEBUG) cerr << "cost=" << cost << ", bonus=" << bonus << ", bo1=" << bo1 << ", bo1_bonus=" << bo1_bonus << ", bo2=" << bo2 << ", bo2_bonus=" << bo2_bonus << endl;
    features->set_value(orit_,cost); 
    //features->set_value(orit_bo1_,bo1); 
    //features->set_value(orit_bo2_,bo2);
    estimated_features->set_value(orit_,bonus);
    //estimated_features->set_value(orit_bo1_,bo1_bonus);
    //estimated_features->set_value(orit_bo2_,bo2_bonus);
  }
  if (flag_orit_backward) {
    cost=0; bonus=0;
    if (!nofw) als->computeOrientationTargetBackward(torit,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus);
    features->set_value(orit_backward_,cost);
    estimated_features->set_value(orit_backward_,bonus);
  }
  if (flag_domt) {
    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0;
    WordID _lfw=-1; int start = edge.i_;
    WordID _rfw=-1; int end   = edge.j_;
    if (smeta.HasReference()) {
      const Lattice l = smeta.GetReference();
      for (int idx=start-1; idx>=0; idx--) {
        if (l.size()>0)
          if (tfw.find(l[idx][0].label) !=tfw.end()) {
            _lfw=l[idx][0].label; break;
          }
      }
      for (int idx=end; idx<l.size(); idx++) { // end or end+1
        if (l[idx].size()>0)
          if (tfw.find(l[idx][0].label)!=tfw.end()) {
            _rfw=l[idx][0].label; break;
          }
      }
    }
    //neighboringFWs(smeta.GetReference(),edge.i_,edge.j_,tfw,&_lfw,&_rfw);
    if (!nofw) als->computeDominanceTarget(tdomt,_lfw,_rfw,&cost,&bonus,
                                           &bo1,&bo1_bonus,&bo2,&bo2_bonus);
    features->set_value(domt_,cost); 
    //features->set_value(domt_bo1_,bo1); 
    //features->set_value(domt_bo2_,bo2);
    estimated_features->set_value(domt_,bonus);
    //estimated_features->set_value(domt_bo1_,bo1_bonus);
    //estimated_features->set_value(domt_bo2_,bo2_bonus);
  }
  int* vcontext = reinterpret_cast<int *>(context);
  if (!nofw) {
    als->BorderingSFWsOnly();
    als->BorderingTFWsOnly();
    als->simplify(vcontext);  
  } else {
    als->simplify_nofw(vcontext);
  }
  vcontext[50] = DoubleToInteger(bdoms_state_mono);
  vcontext[51] = DoubleToInteger(bdoms_state_nonmono);
  vcontext[STATE_SIZE-1] = Alignment::link(edge.i_,edge.j_); 
  if (DEBUG) {
    cerr << "state@traverse = ";
    for (int idx=0; idx<STATE_SIZE; idx++) cerr << idx << "." << vcontext[idx] << " ";
    cerr << endl;
    cerr << "bdoms_state_mono=" << bdoms_state_mono << ", state[50]=" << IntegerToDouble(vcontext[50]) << endl;
    cerr << "bdoms_state_nonmono=" << bdoms_state_nonmono << ", state[51]=" << IntegerToDouble(vcontext[51]) << endl;
  }
}

int Dwarf::DoubleToInteger(double val) {
  float x = (float)val;
  float* px = &x;
  int* pix = reinterpret_cast<int *>(px);
  return *pix; 
}

double Dwarf::IntegerToDouble(int val) {
  int *py = &val;
  float* pd = reinterpret_cast<float *>(py);
  return (double)*pd;
}

void Dwarf::neighboringFWs(const Lattice& l, const int& i, const int& j, const map<WordID,int>& fw_hash, int* lfw, int* rfw) {
  *lfw=0; *rfw=0;
  int idx=i-l[i][0].dist2next;
  while (idx>=0) {
    if (l[idx].size()>0) { 
      if (fw_hash.find(l[idx][0].label)!=fw_hash.end()) {
        lfw++;  
      }
    }
    idx-=l[idx][0].dist2next;
  }
  idx=j+l[j][0].dist2next;
  while (idx<l.size()) {
    if (l[idx].size()>0) { 
      if (fw_hash.find(l[idx][0].label)!=fw_hash.end()) {
        rfw++;
      }
    }
    idx+=l[idx][0].dist2next;
  }
}

bool Dwarf::readOrientation(CountTable* table, const std::string& filename, std::map<WordID,int> *fw, bool pos) {
  // the input format is
  // source target 0 1 2 3 4 0 1 2 3 4
  // 0 -> MA, 1 -> RA, 2 -> MG, 3 -> RG, 4 -> NO_NEIGHBOR
  // first 01234 corresponds to the left neighbor, the second 01234 corresponds to the right neighbor
  // append 2 more at the end as precomputed total
  
  // TONS of hack here. CountTable should be wrapped as a class  
  // TODO: check whether the file exists or not, return false if not
  if (DEBUG) cerr << "  readOrientation(" << filename << ", pos=" << pos << ")" << endl;
  ReadFile rf(filename);  
  istream& in = *rf.stream();
  table->setup(24,pos);
  table->ultimate = new int[24];
  for (int i=0; i<24; i++) table->ultimate[i]=0;
  ostringstream oss;
  while (in) {
    string line;
    getline(in,line);
    if (line=="") break;
    istringstream tokenizer(line);
    string sourceidx, source, target, word;
    tokenizer >> source >> target; 
    if (pos) {
      sourceidx = source;
      source = sourceidx.substr(0,sourceidx.find_last_of("/"));
    }
    if (fw->find(TD::Convert(source))==fw->end()) fw->insert(pair<WordID,int>(TD::Convert(source),1));


    int* element = new int[24];
    element[5] = 0;
    for (int i=0; i<5; i++) {
      element[i] = 0;
      if (tokenizer >> word) element[i] = atoi(word.c_str());
      element[5] += element[i];
    }
    element[11] = 0;
    for (int i=6; i<11; i++) {
      element[i] = 0;
      if (tokenizer >> word) element[i] = atoi(word.c_str());
      element[11] += element[i];
    }
    element[17] = 0;
    for (int i=12; i<17; i++) {
      element[i] = 0;
      if (tokenizer >> word) element[i] = atoi(word.c_str());
      element[17] += element[i];
    }
    element[23] = 0;
    for (int i=18; i<23; i++) {
      element[i] = 0;
      if (tokenizer >> word) element[i] = atoi(word.c_str());
      element[23] += element[i];
    }
    for (int i=0; i<24; i++) table->ultimate[i] += element[i];
    oss << source << " " << target;
    WordID key_id = TD::Convert(oss.str());
    oss.str("");
    if (table->model.find(key_id)!=table->model.end()) {  
      for (int i=0; i<24; i++) table->model[key_id][i]+=element[i];
    } else {
      int* el2 = new int[24];
      for (int i=0; i<24; i++) el2[i] = element[i];
      table->model.insert(pair<WordID,int*>(key_id,el2));
    }
    
    oss << source;
    key_id = TD::Convert(oss.str());
    oss.str("");
    if (table->model.find(key_id)!=table->model.end()) {    
      for (int i=0; i<24; i++) table->model[key_id][i]+=element[i];
    } else {
      int* el2 = new int[24];
      for (int i=0; i<24; i++) el2[i] = element[i];
      table->model.insert(pair<WordID,int*>(key_id,el2));
    }

    if (pos) {
      oss << sourceidx << " " << target;
      key_id = TD::Convert(oss.str());
      oss.str(""); 
      if (table->model.find(key_id)!=table->model.end()) {
        for (int i=0; i<24; i++) table->model[key_id][i]+=element[i];
      } else {
        int* el2 = new int[24];
        for (int i=0; i<24; i++) el2[i] = element[i];
        table->model.insert(pair<WordID,int*>(key_id,el2));
      }
    }
    delete[] element;
  }  
  return true;    
}

bool Dwarf::readList(const std::string& filename, std::map<WordID,int>* fw) {
  ReadFile rf(filename);
  istream& in = *rf.stream();
  while (in) {
    string word;
    getline(in,word);
    if (fw->find(TD::Convert(word))==fw->end()) fw->insert(pair<WordID,int>(TD::Convert(word),1)); 
  }
  return true;
}

bool Dwarf::readDominance(CountTable* table, const std::string& filename, std::map<WordID,int>* fw, bool pos) {
  // the input format is 
  // source1 source2 target1 target2 0 1 2 3
  // 0 -> dontcase 1->leftfirst 2->rightfirst 3->neither 
  if (DEBUG) cerr << "readDominance(" << filename << ",pos="<< pos << ")" << endl;
  ReadFile rf(filename);
  istream& in = *rf.stream();
  table->ultimate = new int[5];
  table->setup(5,pos);
  for (int i=0; i<5; i++) table->ultimate[i]=0;
  while (in) {
    string line, word;
    getline(in,line);
    if (line=="") break;
    string source1idx, source2idx, target1, target2, source1, source2;
    ostringstream oss; 
    WordID key_id;
    istringstream tokenizer(line);
    tokenizer >> source1 >> source2 >> target1 >> target2; 
    if (pos) {
      source1idx = source1;
      source2idx = source2;
      source1 = source1idx.substr(0,source1idx.find_last_of("/"));
      source2 = source2idx.substr(0,source2idx.find_last_of("/"));
    }
    if (fw->find(TD::Convert(source1))==fw->end()) fw->insert(pair<WordID,int>(TD::Convert(source1),1));
    if (fw->find(TD::Convert(source2))==fw->end()) fw->insert(pair<WordID,int>(TD::Convert(source2),1));

    int* element = new int[5];
    element[4]=0;
    for (int i=0; i<4; i++) {
      element[i]  = 0;
      if (tokenizer >> word) element[i] = atoi(word.c_str());
      element[4]+=element[i];
    }
    for (int i=0; i<5; i++) table->ultimate[i] += element[i];

    oss << source1 << " " << source2 << " " << target1 << " " << target2;
    key_id = TD::Convert(oss.str());
    oss.str("");
    if (table->model.find(key_id)!=table->model.end()) { 
      for (int i=0; i<5; i++) table->model[key_id][i]+=element[i];
    } else {
      int* el2 = new int[5]; 
      for (int i=0; i<5; i++) el2[i]=element[i];
      table->model.insert(pair<WordID,int*>(key_id,el2));
    }

    oss << source1 << " " << source2;
    key_id = TD::Convert(oss.str());
    oss.str("");
    if (table->model.find(key_id)!=table->model.end()) {  
      for (int i=0; i<5; i++) table->model[key_id][i]+=element[i];
    } else {
      int* el2 = new int[5]; 
      for (int i=0; i<5; i++) el2[i]=element[i];
      table->model.insert(pair<WordID,int*>(key_id,el2));
    }

    if (pos) {
      oss << source1idx << " " << source2idx << " " << target1 << " " << target2;
      key_id = TD::Convert(oss.str());
      oss.str("");
      if (table->model.find(key_id)!=table->model.end()) {  
        for (int i=0; i<5; i++) table->model[key_id][i]+=element[i];
      } else {
        int* el2 = new int[5]; 
        for (int i=0; i<5; i++) el2[i]=element[i];
        table->model.insert(pair<WordID,int*>(key_id,el2));
      }
    }
    delete element;
  }

  return true;    
}

bool Dwarf::readTags(const std::string& filename, std::map<WordID,WordID>* tags) {
  ReadFile rf(filename);
  istream& in = *rf.stream();
  while(in) {
    string line, word, tag;
    getline(in,line);
    if (line=="") break;
    istringstream tokenizer(line);
    tokenizer >> tag >> word;
    tags->insert(pair<WordID,WordID>(TD::Convert(word),TD::Convert(tag)));
  }
  return true;
}

bool Dwarf::generalizeOrientation(CountTable* table, const std::map<WordID,WordID>& tags, bool pos) {
  map<string,int*> generalized;
  for (map<WordID,int*>::iterator it=table->model.begin(); it!=table->model.end(); it++) {
    string source, target;
    istringstream tokenizer(TD::Convert(it->first));
    tokenizer >> source >> target;
    string idx = "";
    if (pos) {
      int found = source.find_last_of("/");
      if (found!=string::npos && found>0) { 
        idx = source.substr(found+1);
        source = source.substr(0,found);
      }
    }
    map<WordID,WordID>::const_iterator tags_iter = tags.find(TD::Convert(source));
    if (tags_iter!=tags.end()) {
      ostringstream genkey;
      genkey << TD::Convert(tags_iter->second);
      if (idx!="") genkey << "/" << idx;
      if (target!="") genkey << " " << target;
      int* model;
      if (generalized.find(genkey.str())!=generalized.end()) {
        model = generalized[genkey.str()];
        for (int i=0; i<24; i++) model[i] += it->second[i];
      } else {
        int* el = new int[24];
        for (int i=0; i<24; i++) el[i] = it->second[i];
        generalized.insert(pair<string,int*>(genkey.str(),el));
      }
    }
  }
  for (map<WordID,int*>::iterator it=table->model.begin(); it!=table->model.end(); it++) {
    string source, target;
    istringstream tokenizer(TD::Convert(it->first));
    tokenizer >> source >> target;
    string idx = "";
    if (pos) {
      int found = source.find_last_of("/");
      if (found!=string::npos && found>0) {
        idx = source.substr(found+1);
        source = source.substr(0,found);
      }
    }
    map<WordID,WordID>::const_iterator tags_iter = tags.find(TD::Convert(source));
    if (tags_iter!=tags.end()) {
      ostringstream genkey;
      genkey << TD::Convert(tags_iter->second);
      if (idx!="") genkey << "/" << idx;
      if (target!="") genkey << " " << target;
      if (generalized.find(genkey.str())!=generalized.end()) {
        delete it->second;
        it->second = generalized[genkey.str()];
      }
    }
  }
  return false; // no idea if this is right
}
 


bool Dwarf::generalizeDominance(CountTable* table, const std::map<WordID,WordID>& tags, bool pos) {
  map<string,int*> generalized;
  ostringstream oss;
  for (map<WordID,int*>::iterator it=table->model.begin(); it!=table->model.end(); it++) {
    string source1, source2, target1, target2;
    string idx1 = ""; string idx2 = "";
    istringstream tokenizer(TD::Convert(it->first));
    tokenizer >> source1 >> source2 >> target1 >> target2;
    if (DEBUG) cerr << "source1=|" << source1 << "|, source2=|" << source2 << "|, target1=|" << target1 << "|, target2=|" << target2 << "|" << endl;
    if (pos) {
      int found1 = source1.find_last_of("/");
      int found2 = source2.find_last_of("/");
      if (found1!=string::npos && found2!=string::npos && found1>0 && found2>0) {
        idx1 = source1.substr(found1+1);
        source1 = source1.substr(0,found1);
        idx2 = source2.substr(found2+1);
        source2 = source2.substr(0,found2);
      }
    }
    if (DEBUG) 
      cerr << "[U]source1='" << source1 << "', idx1='"<< idx1 << "', source2='" << source2 << "', idx2='"<< idx2 << "', target1='" << target1 << "', target2='" << target2 << "'" << endl;
    map<WordID,WordID>::const_iterator tags_iter1 = tags.find(TD::Convert(source1));
    map<WordID,WordID>::const_iterator tags_iter2 = tags.find(TD::Convert(source2));
    if (tags_iter1!=tags.end()) 
      source1 = TD::Convert(tags_iter1->second);
    oss << source1;
    if (idx1!="") oss << "/" << idx1;
    if (tags_iter2!=tags.end())
      source2 = TD::Convert(tags_iter2->second);
    oss << " " << source2;
    if (idx2!="") oss << "/" << idx2;
    if (target1!="" && target2!="") oss << " " << target1 << " " << target2;
    
    if (DEBUG) cerr << "generalized key = '" << oss.str() << "'" << endl; 
    if (generalized.find(oss.str())!=generalized.end()) {
      int* model = generalized[oss.str()];
      for (int i=0; i<5; i++) model[i] += it->second[i];
    } else {
      int* model = new int[5];
      for (int i=0; i<5; i++) model[i] = it->second[i];
      generalized.insert(pair<string,int*>(oss.str(),model));
    }    
    oss.str("");
  }
  
  if (DEBUG) {
    for (map<string,int*>::const_iterator it=generalized.begin(); it!=generalized.end(); it++) {
      cerr << "GENERALIZED = " << it->first << ", ";
      for (int i=0; i<5; i++) cerr << it->second[i] << " ";
      cerr << endl;
    }
  }

  for (map<WordID,int*>::iterator it=table->model.begin(); it!=table->model.end(); it++) {
    string source1, source2, target1, target2;
    string idx1 = ""; string idx2 = "";
    istringstream tokenizer(TD::Convert(it->first));
    tokenizer >> source1 >> source2 >> target1 >> target2;
    if (pos) {
      int found1 = source1.find_last_of("/");
      int found2 = source2.find_last_of("/");
      if (found1!=string::npos && found2!=string::npos && found1>0 && found2>0) {
        idx1 = source1.substr(found1+1);
        source1 = source1.substr(0,found1);
        idx2 = source2.substr(found2+1);
        source2 = source2.substr(0,found2);
      }
    }
    map<WordID,WordID>::const_iterator tags_iter1 = tags.find(TD::Convert(source1));
    map<WordID,WordID>::const_iterator tags_iter2 = tags.find(TD::Convert(source2));
    if (tags_iter1!=tags.end())
      source1 = TD::Convert(tags_iter1->second);
    oss << source1;
    if (idx1!="") oss << "/" << idx1;
    if (tags_iter2!=tags.end())
      source2 = TD::Convert(tags_iter2->second);
    oss << " " << source2;
    if (idx2!="") oss << "/" << idx2;
    if (target1!="" && target2!="") oss << " " << target1 << " " << target2;
    
    if (generalized.find(oss.str())!=generalized.end()) {
      if (DEBUG) cerr << " generalizing "<< TD::Convert(it->first) << " into " << oss.str() << endl; 
      if (DEBUG) {
        cerr << "  model from ";
        for (int i=0; i<5; i++) cerr << it->second[i] << " "; 
        cerr << endl;
      }
      delete it->second;
      it->second = generalized[oss.str()];
      if (DEBUG) {
        cerr << "  into ";
        for (int i=0; i<5; i++) cerr << it->second[i] << " "; 
        cerr << endl;
      }
    }    
    oss.str("");
  }

}