diff options
Diffstat (limited to 'decoder')
42 files changed, 351 insertions, 5736 deletions
| diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 914faaea..8280b22c 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -41,7 +41,6 @@ libcdec_a_SOURCES = \    cfg_options.h \    csplit.h \    decoder.h \ -  dwarf.h \    earley_composer.h \    exp_semiring.h \    factored_lexicon_helper.h \ @@ -51,18 +50,21 @@ libcdec_a_SOURCES = \    ff_charset.h \    ff_context.h \    ff_csplit.h \ -  ff_dwarf.h \    ff_external.h \    ff_factory.h \    ff_klm.h \    ff_lm.h \    ff_ngrams.h \ +  ff_parse_match.h \    ff_register.h \    ff_rules.h \    ff_ruleshape.h \    ff_sample_fsa.h \ +  ff_soft_syntax.h \ +  ff_soft_syntax_mindist.h \    ff_source_path.h \    ff_source_syntax.h \ +  ff_source_syntax2.h \    ff_spans.h \    ff_tagger.h \    ff_wordalign.h \ @@ -96,68 +98,64 @@ libcdec_a_SOURCES = \    sentences.h \    tagger.h \    translator.h \ -  tromble_loss.h \    trule.h \    viterbi.h \ -  forest_writer.cc \ -  maxtrans_blunsom.cc \ +  aligner.cc \ +  apply_models.cc \ +  bottom_up_parser.cc \ +  cdec.cc \    cdec_ff.cc \    cfg.cc \ -  dwarf.cc \ -  ff_dwarf.cc \ -  ff_external.cc \ -  rule_lexer.cc \ -  fst_translator.cc \    csplit.cc \ -  translator.cc \ -  rescore_translator.cc \ -  scfg_translator.cc \ -  hg.cc \ -  hg_io.cc \ -  hg_remove_eps.cc \    decoder.cc \ -  hg_intersect.cc \ -  hg_union.cc \ -  hg_sampler.cc \ -  factored_lexicon_helper.cc \ -  viterbi.cc \ -  lattice.cc \ -  aligner.cc \ -  apply_models.cc \    earley_composer.cc \ -  phrasetable_fst.cc \ -  trule.cc \ +  factored_lexicon_helper.cc \    ff.cc \ -  ffset.cc \    ff_basic.cc \ -  ff_rules.cc \ -  ff_wordset.cc \ -  ff_context.cc \ +  ff_bleu.cc \    ff_charset.cc \ -  ff_lm.cc \ +  ff_context.cc \ +  ff_csplit.cc \ +  ff_external.cc \ +  ff_factory.cc \    ff_klm.cc \ +  ff_lm.cc \    ff_ngrams.cc \ -  ff_spans.cc \ +  ff_parse_match.cc \ +  ff_rules.cc \    ff_ruleshape.cc \ -  ff_wordalign.cc \ -  ff_csplit.cc \ -  ff_tagger.cc \ +  ff_soft_syntax.cc \ +  ff_soft_syntax_mindist.cc \    ff_source_path.cc \ -  ff_parse_match.cc \ -	ff_soft_syntax.cc \ -	ff_soft_syntax2.cc \    ff_source_syntax.cc \ -	ff_source_syntax_p.cc \    ff_source_syntax2.cc \ -  ff_source_syntax2_p.cc \ -  ff_bleu.cc \ -  ff_factory.cc \ +  ff_spans.cc \ +  ff_tagger.cc \ +  ff_wordalign.cc \ +  ff_wordset.cc \ +  ffset.cc \ +  forest_writer.cc \ +  fst_translator.cc \ +  grammar.cc \ +  hg.cc \ +  hg_intersect.cc \ +  hg_io.cc \ +  hg_remove_eps.cc \ +  hg_sampler.cc \ +  hg_union.cc \    incremental.cc \ +  json_parse.cc \ +  lattice.cc \    lexalign.cc \    lextrans.cc \ -  tagger.cc \ -  bottom_up_parser.cc \ +  maxtrans_blunsom.cc \    phrasebased_translator.cc \ -  JSON_parser.c \ -  json_parse.cc \ -  grammar.cc +  phrasetable_fst.cc \ +  rescore_translator.cc \ +  rule_lexer.cc \ +  scfg_translator.cc \ +  tagger.cc \ +  translator.cc \ +  trule.cc \ +  viterbi.cc \ +  JSON_parser.c diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc index 330de9e2..4cd8b36f 100644 --- a/decoder/apply_models.cc +++ b/decoder/apply_models.cc @@ -8,8 +8,14 @@  #include <vector>  #include <algorithm> -#include <tr1/unordered_map> -#include <tr1/unordered_set> +#ifndef HAVE_OLD_CPP +# include <unordered_map> +# include <unordered_set> +#else +# include <tr1/unordered_map> +# include <tr1/unordered_set> +namespace std { using std::tr1::unordered_map; using std::tr1::unordered_set; } +#endif  #include <boost/functional/hash.hpp> @@ -23,7 +29,6 @@  #define FAST_CP_2 3  using namespace std; -using namespace std::tr1;  struct Candidate;  typedef SmallVectorInt JVector; diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index e7b31f50..d586c1d1 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -15,26 +15,16 @@  #include "ff_ruleshape.h"  #include "ff_bleu.h"  #include "ff_soft_syntax.h" -#include "ff_soft_syntax2.h" +#include "ff_soft_syntax_mindist.h"  #include "ff_source_path.h" - -  #include "ff_parse_match.h"  #include "ff_source_syntax.h" -#include "ff_source_syntax_p.h"  #include "ff_source_syntax2.h" -#include "ff_source_syntax2_p.h" - -  #include "ff_register.h"  #include "ff_charset.h"  #include "ff_wordset.h" -#include "ff_dwarf.h"  #include "ff_external.h" -#ifdef HAVE_GLC -#include <cdec/ff_glc.h> -#endif  void register_feature_functions() {    static bool registered = false; @@ -51,30 +41,16 @@ void register_feature_functions() {    RegisterFF<BLEUModel>();    //TODO: use for all features the new Register which requires static FF::usage(false,false) give name -#ifdef HAVE_RANDLM -  ff_registry.Register("RandLM", new FFFactory<LanguageModelRandLM>); -#endif    ff_registry.Register("SpanFeatures", new FFFactory<SpanFeatures>());    ff_registry.Register("NgramFeatures", new FFFactory<NgramDetector>());    ff_registry.Register("RuleContextFeatures", new FFFactory<RuleContextFeatures>());    ff_registry.Register("RuleIdentityFeatures", new FFFactory<RuleIdentityFeatures>()); - -    ff_registry.Register("ParseMatchFeatures", new FFFactory<ParseMatchFeatures>); - -  ff_registry.Register("SoftSyntacticFeatures", new FFFactory<SoftSyntacticFeatures>); -  ff_registry.Register("SoftSyntacticFeatures2", new FFFactory<SoftSyntacticFeatures2>); - +  ff_registry.Register("SoftSyntaxFeatures", new FFFactory<SoftSyntaxFeatures>); +  ff_registry.Register("SoftSyntaxFeaturesMindist", new FFFactory<SoftSyntaxFeaturesMindist>);    ff_registry.Register("SourceSyntaxFeatures", new FFFactory<SourceSyntaxFeatures>); -  ff_registry.Register("SourceSyntaxFeatures2", new FFFactory<SourceSyntaxFeatures2>); -    ff_registry.Register("SourceSpanSizeFeatures", new FFFactory<SourceSpanSizeFeatures>); - -  //ff_registry.Register("PSourceSyntaxFeatures", new FFFactory<PSourceSyntaxFeatures>); -  //ff_registry.Register("PSourceSpanSizeFeatures", new FFFactory<PSourceSpanSizeFeatures>); -  //ff_registry.Register("PSourceSyntaxFeatures2", new FFFactory<PSourceSyntaxFeatures2>); - - +  ff_registry.Register("SourceSyntaxFeatures2", new FFFactory<SourceSyntaxFeatures2>);    ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory<CMR2008ReorderingFeatures>());    ff_registry.Register("RuleSourceBigramFeatures", new FFFactory<RuleSourceBigramFeatures>());    ff_registry.Register("RuleTargetBigramFeatures", new FFFactory<RuleTargetBigramFeatures>()); @@ -98,10 +74,6 @@ void register_feature_functions() {    ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);    ff_registry.Register("SourcePathFeatures", new FFFactory<SourcePathFeatures>);    ff_registry.Register("WordSet", new FFFactory<WordSet>); -  ff_registry.Register("Dwarf", new FFFactory<Dwarf>);    ff_registry.Register("External", new FFFactory<ExternalFeature>); -#ifdef HAVE_GLC -  ff_registry.Register("ContextCRF", new FFFactory<Model1Features>); -#endif  } diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 31e6dc46..da65713a 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -1,6 +1,11 @@  #include "decoder.h" -#include <tr1/unordered_map> +#ifndef HAVE_OLD_CPP +# include <unordered_map> +#else +# include <tr1/unordered_map> +namespace std { using std::tr1::unordered_map; } +#endif  #include <boost/program_options.hpp>  #include <boost/program_options/variables_map.hpp>  #include <boost/make_shared.hpp> @@ -61,7 +66,6 @@  static const double kMINUS_EPSILON = -1e-6;  // don't be too strict  using namespace std; -using namespace std::tr1;  namespace po = boost::program_options;  static bool verbose_feature_functions=true; @@ -90,7 +94,7 @@ struct ELengthWeightFunction {    }  };  inline void ShowBanner() { -  cerr << "cdec v1.0 (c) 2009-2011 by Chris Dyer\n"; +  cerr << "cdec (c) 2009--2013 by Chris Dyer\n";  }  inline string str(char const* name,po::variables_map const& conf) { diff --git a/decoder/dwarf.cc b/decoder/dwarf.cc deleted file mode 100644 index fb0404a6..00000000 --- a/decoder/dwarf.cc +++ /dev/null @@ -1,3209 +0,0 @@ -#include "dwarf.h" -#include "tdict.h" -#include "wordid.h" -#include "lattice.h" -#include "ff_dwarf.h" -#include <assert.h> -#include <algorithm> -#include <ostream> -#include <sstream> -#include <iostream> -#include <fstream> -#include <vector> -#include <map> -#include <set> -#include <boost/functional/hash.hpp> -#include <tr1/unordered_map> -#include <boost/tuple/tuple.hpp> - -using namespace std; -using namespace std::tr1; -using namespace boost::tuples; -using namespace boost; - -Alignment::Alignment() { -  //unordered_map<std::vector<WordID>,int> XX; -  _I=0; -  _J=0; -  kSOS = TD::Convert("<s>"); -  kEOS = TD::Convert("</s>"); -  kUNK = TD::Convert("**UNKNOWN**"); -  SourceFWAntsIdxs = new int*[MAX_ARITY]; -  SourceFWAntsAbsIdxs = new int*[MAX_ARITY]; -  TargetFWAntsIdxs = new int*[MAX_ARITY]; -  SourceAntsIdxs = new int*[MAX_ARITY]; -  TargetAntsIdxs = new int*[MAX_ARITY]; -  AntsAl = new int*[MAX_ARITY]; -  for (int idx=0; idx<MAX_ARITY; idx++) { -    SourceAntsIdxs[idx] = new int[40]; -    SourceFWAntsIdxs[idx] = new int[40]; -    SourceFWAntsAbsIdxs[idx] = new int[40]; -    TargetAntsIdxs[idx] = new int[40]; -    TargetFWAntsIdxs[idx] = new int[40]; -    AntsAl[idx] = new int[40]; -  }     -  for (int j=0; j<MAX_WORDS; j++)  -    for (int i=0; i<MAX_WORDS; i++) _matrix[j][i]=false;  -  for (int j=0; j<MAX_WORDS; j++) { -    _tSpan[j][0]=MINIMUM_INIT; -    _sSpan[j][1]=MAXIMUM_INIT; -  } -  for (int i=0; i<MAX_WORDS; i++) { -    _sSpan[i][0]=MINIMUM_INIT; -    _sSpan[i][1]=MAXIMUM_INIT; -  } -  alpha_oris=0.1; -  alpha_orit=0.1; -  alpha_doms=0.1; -  alpha_domt=0.1; -  beta_oris=0.1; -  beta_orit=0.1; -  beta_doms=0.1; -  beta_domt=0.1; -} - -void Alignment::set(int j,int i) { -// create a link between j and i, update their corresponding span accordingly -  if (DEBUG) cerr << "set(" << j << "," << i << ")" << endl; -  assert(0<=j && j<MAX_WORDS); -  assert(0<=i && i<MAX_WORDS); -  if (0<=j && j<MAX_WORDS && 0<=i && i<MAX_WORDS) { -    _matrix[j][i] = true; -    _tSpan[j][0]=least(i,_tSpan[j][0]); -    _tSpan[j][1]=most(i,_tSpan[j][1]); -    _sSpan[i][0]=least(j,_sSpan[i][0]); -    _sSpan[i][1]=most(j,_sSpan[i][1]); -  } -  _J=most(j+1,_J); -  _I=most(i+1,_I); -} - -void Alignment::reset(int j,int i) { //probably won't be used, since the alignment is not dynamic -// remove the link between j and i, update their corresponding span accordingly -  if (DEBUG) cerr << "reset(" << j << "," << i << ")" << endl; -  assert(0<=j && j<MAX_WORDS); -  assert(0<=i && i<MAX_WORDS); -  _matrix[j][i] = false; -  if (j==_sSpan[i][0] || j==_sSpan[i][1]) { -    int min=MINIMUM_INIT; -    int max=MAXIMUM_INIT; -    for (int idx=_sSpan[i][0]; idx<=_sSpan[i][1]; idx++) { -      if (_matrix[idx][i]) { -        min=least(min,idx); -        max=most(max,idx); -      } -    } -    _sSpan[i][0]=min; -    _sSpan[i][1]=max; -  } -  if (i==_tSpan[j][0] || i==_tSpan[j][1]) { -    int min=MINIMUM_INIT; -    int max=MAXIMUM_INIT; -    for (int idx=_tSpan[j][0]; idx<=_tSpan[j][1]; idx++) { -      if (_matrix[j][idx]) { -        min=least(min,idx); -        max=most(max,idx); -      } -    } -    _tSpan[j][0]=min; -    _tSpan[j][1]=max; -  } -} - -int Alignment::targetOf(int j, int start) { -  assert(j>=0); -  if (start==-1) start = _tSpan[j][0]; -  if (_tSpan[j][0]==MINIMUM_INIT) return -1; -  for (int idx=start; idx<=_tSpan[j][1]; idx++) { -    if (_matrix[j][idx]) return idx; -  } -  return -1; -} - -int Alignment::sourceOf(int i, int start) { -  assert(i>=0); -  if (start==-1) start = _sSpan[i][0]; -  if (_sSpan[i][0]==MINIMUM_INIT) return -1; -  for (int idx=start; idx<=_sSpan[i][1]; idx++) { -    if (_matrix[idx][i]) return idx; -  } -  return -1; -} - -void Alignment::clearAls(int prevJ, int prevI) { -  for (int j=0; j<=prevJ; j++) { -    for (int i=0; i<prevI; i++) { -      _matrix[j][i]=false; -    } -  } -  for (int j=0; j<=prevJ; j++) { -    _tSpan[j][0] = MINIMUM_INIT; -    _tSpan[j][1] = MAXIMUM_INIT; -  } -  for (int i=0; i<=prevI; i++) { -    _sSpan[i][0] = MINIMUM_INIT; -    _sSpan[i][1] = MAXIMUM_INIT; -  } -  _J=0; -  _I=0; -} - -int Alignment::DominanceSource(int fw1, int fw2) { -  // Dominance of fw1 and fw2  -  // 0 -> neither, 1 -> leftFirst, 2 -> rightFirst, 3 -> dontCare -  if (DEBUG) cerr << "DominanceSource(" << fw1 << "," << fw2 << ")" << endl; -  //cerr << TD::Convert(_f[fw1]) << "," << TD::Convert(_f[fw2]) << endl;  -  //cerr << AsString() << endl; -  int dom = 0; -  curr_al.push_back(fw1); curr_al.push_back(fw2); -  if (doms_hash.find(curr_al)==doms_hash.end()) { -    int* block = blockSource(fw1,fw2); -    //cerr << "block = " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -    if (block[0]==fw1) {  -      int tfw10 = _tSpan[fw1][0]; -      int tfw11 = _tSpan[fw1][1]; -      //cerr << "tfw = " << tfw10 << "," << tfw11 << endl; -      if (tfw11<0) {  -        dom+=1; -      } else { -        if ((block[2]==tfw10 || block[3]==tfw11)) dom+=1; -      } -    } -    if (block[1]==fw2) { -      int tfw20 = _tSpan[fw2][0]; -      int tfw21 = _tSpan[fw2][1]; -      //cerr << "tfw = " << tfw20 << "," << tfw21 << endl; -      if (tfw21<0) { -        dom+=2; -      } else { -        if ((block[2]==tfw20 || block[3]==tfw21)) dom+=2; -      } -    } -    delete block; -    doms_hash.insert(pair<vector<int>,int>(curr_al,dom)); -  } else { -    dom = doms_hash[curr_al]; -  } -  if (DEBUG) cerr << "  dom = " << dom << endl; -  curr_al.pop_back(); curr_al.pop_back(); -  return dom; -} - -vector<int> Alignment::DominanceSource4Sampler(int fw1, int fw2) { -  if (DEBUG) cerr << "DominanceSource4Sampler(" << fw1 << "," << fw2 << ")" << endl; -  int dom = 0; -  int* block = blockSource(fw1,fw2); -  //cerr << "block = " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -  if (block[0]==fw1) { -    int tfw10 = _tSpan[fw1][0]; -    int tfw11 = _tSpan[fw1][1]; -    //cerr << "tfw = " << tfw10 << "," << tfw11 << endl; -    if (tfw11<0) { -      dom+=1; -    } else { -      if ((block[2]==tfw10 || block[3]==tfw11)) dom+=1; -    } -  } -  if (block[1]==fw2) { -    int tfw20 = _tSpan[fw2][0]; -    int tfw21 = _tSpan[fw2][1]; -    //cerr << "tfw = " << tfw20 << "," << tfw21 << endl; -    if (tfw21<0) { -      dom+=2; -    } else { -      if ((block[2]==tfw20 || block[3]==tfw21)) dom+=2; -    } -  } -  if (DEBUG) cerr << "doms = " << dom << endl; -  vector<int> ret; -  ret.push_back(dom); ret.push_back(block[0]); ret.push_back(block[1]); -  ret.push_back(block[2]); ret.push_back(block[3]); -  delete block; -  return ret; -} - -int Alignment::DominanceTarget(int fw1, int fw2) { -  int dom = 0; -  curr_al.push_back(fw1); curr_al.push_back(fw2); -  if (domt_hash.find(curr_al)==domt_hash.end()) { -    int* block = blockTarget(fw1,fw2); -    if (block[2]==fw1) { -      int sfw10 = _sSpan[fw1][0]; -      int sfw11 = _sSpan[fw1][1]; -      if (sfw11<0) { -        dom+=1; -      } else { -        if (block[0]==sfw10 || block[1]==sfw11) dom+=1; -      } -    } -    if (block[3]==fw2) { -      int sfw20 = _sSpan[fw2][0]; -      int sfw21 = _sSpan[fw2][0]; -      if (sfw21<0) { -        dom+=2; -      } else { -        if (block[0]==sfw20 || block[1]==sfw21) dom+=2; -      } -    } -    delete block; -    domt_hash.insert(pair<vector<int>,int>(curr_al,dom)); -  } else { -    dom = domt_hash[curr_al]; -  } -  curr_al.pop_back(); curr_al.pop_back(); -  return dom; -} - -vector<int> Alignment::DominanceTarget4Sampler(int fw1, int fw2) { -  int dom = 0; -  int* block = blockTarget(fw1,fw2); -  if (block[2]==fw1) { -    int sfw10 = _sSpan[fw1][0]; -    int sfw11 = _sSpan[fw1][1]; -    if (sfw11<0) { -      dom+=1; -    } else { -      if (block[0]==sfw10 || block[1]==sfw11) dom+=1; -    } -  } -  if (block[3]==fw2) { -    int sfw20 = _sSpan[fw2][0]; -    int sfw21 = _sSpan[fw2][0]; -    if (sfw21<0) { -      dom+=2; -    } else { -      if (block[0]==sfw20 || block[1]==sfw21) dom+=2; -    } -  } -  vector<int> ret; -  ret.push_back(dom); ret.push_back(block[0]); ret.push_back(block[1]); -  ret.push_back(block[2]); ret.push_back(block[3]); -  delete block; -  return ret; -} - -void Alignment::OrientationSource(int fw, int* oril, int* orir, bool Lcompute, bool Rcompute) { -  OrientationSource(fw,fw,oril,orir,Lcompute,Rcompute); -} - -vector<int> Alignment::OrientationSourceLeft4Sampler(int fw) { -  return OrientationSourceLeft4Sampler(fw,fw); -} - -vector<int> Alignment::OrientationSourceLeft4Sampler(int fw0, int fw1) { -  if (DEBUG) cerr << "OrientationSourceLeft4Sampler(" << fw0 << "," << fw1 << ")" << endl; -  int oril = 0; -  int N0=fw0-1; -  while (N0>=0) { -    if (minTSpan(N0)!=MINIMUM_INIT) break; -    N0--; -  } -  int N1=fw1+1; -  while (N1<_J) { -    if (minTSpan(N1)!=MINIMUM_INIT) break; -    N1++; -  } -  if (minTSpan(fw0)==MINIMUM_INIT && minTSpan(fw1)==MINIMUM_INIT) { -    fw0 = N1; fw1 = N0; -  } -  if (DEBUG) cerr << "fw0=" << fw0 << ", fw1=" << fw1 << ", N0=" << N0 << ", N1=" << N1 << endl; -  if (maxTSpan(N0)<minTSpan(fw0) || maxTSpan(fw0)<minTSpan(N0)) { -    if (DEBUG) cerr << "N0=" << minTSpan(N0) << "-" << maxTSpan(N0); -    if (DEBUG) cerr << "fw=" << minTSpan(fw0) << "-" << maxTSpan(fw0) << endl; -    int *block = blockTarget(minTSpan(N0),maxTSpan(N0)); -    if (block[0]<=fw0 && fw0<=block[1]) oril=5; -    delete block; -    if (oril==0) { -      block = blockTarget(minTSpan(fw0),maxTSpan(fw0)); -      if (block[0]<=N0 && N0<=block[1]) oril=5; -      delete block; -    } -    if (oril==0) { -      if (maxTSpan(N0)<minTSpan(fw0)) {// if N0 is monotone -        oril=1; -        block = blockTarget(maxTSpan(N0),minTSpan(fw0)-1); -        if (block[0] <= fw0 && fw0 <= block[1]) oril+=2; -        delete block; -      } else { //if (maxTSpan(fw0)<minTSpan(N0)) { // if NO is non-monotone -        oril=2; -        block = blockTarget(maxTSpan(fw0)+1,minTSpan(N0)); -        if (block[0] <= fw0 && fw0 <= block[1]) oril+=2; -        delete block; -      } -    } -  } else { -    oril=5; -  } -  if (DEBUG) cerr << "oril = " << oril << endl; -  int* block = blockSource(N0,fw0); -  if (DEBUG) { -    for (int i=0; i<4; i++) cerr << "block[" << i << "]=" << block[i] << endl; -  } -  vector<int> ret; -  ret.push_back(oril); ret.push_back(block[0]); ret.push_back(block[1]); -  ret.push_back(block[2]); ret.push_back(block[3]); -  delete block; -  return ret; -} - -vector<int> Alignment::OrientationSourceRight4Sampler(int fw) { -  return OrientationSourceRight4Sampler(fw,fw); -} - -vector<int> Alignment::OrientationSourceRight4Sampler(int fw0, int fw1) { -  if (DEBUG) cerr << "OrientationSourceLeft4Sampler(" << fw0 << "," << fw1 << ")" << endl; -  int orir = 0; -  int N0=fw0-1; -  while (N0>=0) { -    if (minTSpan(N0)!=MINIMUM_INIT) break; -    N0--; -  } -  int N1=fw1+1; -  while (N1<_J) { -    if (minTSpan(N1)!=MINIMUM_INIT) break; -    N1++; -  } -  if (minTSpan(fw0)==MINIMUM_INIT && minTSpan(fw1)==MINIMUM_INIT) { -    fw0 = N1; fw1 = N0; -  } -  if (DEBUG) cerr << "fw0=" << fw0 << ", fw1=" << fw1 << ", N0=" << N0 << ", N1=" << N1 << endl; -  if (maxTSpan(N1)<minTSpan(fw1) || maxTSpan(fw1)<minTSpan(N1)) { -    int* block = blockTarget(minTSpan(N1),maxTSpan(N1)); -    if (block[0]<=fw1 && fw1<=block[2]) orir=5; -    delete block; -    if (orir==0) { -      block = blockTarget(minTSpan(fw1),maxTSpan(fw1)); -      if (block[0]<=N1 && N1 <=block[1]) orir=5; -      delete block; -    } -    if (DEBUG) cerr << "N1=" << minTSpan(N1) << "-" << maxTSpan(N1); -    if (DEBUG) cerr << "fw1=" << minTSpan(fw1) << "-" << maxTSpan(fw1) << endl; -    if (orir==0) { -      if (maxTSpan(fw1)<minTSpan(N1)) { // if N1 is monotone -        orir = 1; -        block = blockTarget(maxTSpan(fw1)+1,minTSpan(N1)); -        if (block[0] <= fw1 && fw1 <= block[1]) orir+=2; -        delete block; -      } else {// if (maxTSpan(N1)<minTSpan(fw1)) { // if N1 is non-monotone -        orir = 2; -        block = blockTarget(maxTSpan(N1),minTSpan(fw1)-1); -        if (block[0] <= fw1 && fw1 <= block[1]) orir+=2; -        delete block; -      } -    } -  } else { -    orir = 5; -  } -  if (DEBUG) cerr << "orir = " << orir << endl; -  int* block = blockSource(fw1,N1); -  vector<int> ret; -  ret.push_back(orir); ret.push_back(block[0]); ret.push_back(block[1]); -  ret.push_back(block[2]); ret.push_back(block[3]); -  delete block; -  return ret; -} - -void Alignment::OrientationSource(int fw0, int fw1, int* oril, int* orir, bool Lcompute, bool Rcompute) { -  // Orientation -  // A bit tricky since fw can be 1) unaligned 2) aligned to many -  // 1 -> MA, 2 -> RA, 3 -> MG, 4 -> RG, 5 -> Other -  if (DEBUG) cerr << "OrientationSource(" << fw0 << "," << fw1 << ")" << endl; -  if (!Lcompute && !Rcompute) return; -  curr_al.push_back(fw0); -  curr_al.push_back(fw1); -  *oril=0; -  *orir=0; -  int lr=0; -  if (oris_hash.find(curr_al)==oris_hash.end()) { -    // Find first aligned word N0 to the left of fw -    int N0=fw0-1; -    while (N0>=0) { -      if (minTSpan(N0)!=MINIMUM_INIT) break; -      N0--; -    } -    int N1=fw1+1; -    while (N1<_J) { -      if (minTSpan(N1)!=MINIMUM_INIT) break; -      N1++; -    } -    if (minTSpan(fw0)==MINIMUM_INIT && minTSpan(fw1)==MINIMUM_INIT) { -      fw0 = N1; fw1 = N0; -      //cerr << "minTSpan(fw)==MINIMUM_INIT, thus fw0=" << fw0 << ", fw1=" << fw1 << endl; -    } -    if (DEBUG) cerr << "fw0=" << fw0 << ", fw1=" << fw1 << ", N0=" << N0 << ", N1=" << N1 << endl; -    if (maxTSpan(N0)<minTSpan(fw0) || maxTSpan(fw0)<minTSpan(N0)) { -      if (DEBUG) cerr << "N0=" << minTSpan(N0) << "-" << maxTSpan(N0); -      if (DEBUG) cerr << "fw=" << minTSpan(fw0) << "-" << maxTSpan(fw0) << endl; -      int *block = blockTarget(minTSpan(N0),maxTSpan(N0)); -      if (block[0]<=fw0 && fw0<=block[1]) *oril=5; -      delete block; -      if (*oril==0) { -        block = blockTarget(minTSpan(fw0),maxTSpan(fw0)); -        if (block[0]<=N0 && N0<=block[1]) *oril=5; -        delete block; -      } -      if (*oril==0) { -        if (maxTSpan(N0)<minTSpan(fw0)) {// if N0 is monotone -          *oril=1; -          block = blockTarget(maxTSpan(N0),minTSpan(fw0)-1); -          if (block[0] <= fw0 && fw0 <= block[1]) *oril+=2; -          delete block; -        } else { //if (maxTSpan(fw0)<minTSpan(N0)) { // if NO is non-monotone -          *oril=2; -          block = blockTarget(maxTSpan(fw0)+1,minTSpan(N0)); -          if (block[0] <= fw0 && fw0 <= block[1]) *oril+=2; -          delete block; -        } -      } -    } else { -      *oril=5; -    } -    if (DEBUG) cerr << "oril =" << *oril << endl; -    // Right neighbor -    if (maxTSpan(N1)<minTSpan(fw1) || maxTSpan(fw1)<minTSpan(N1)) { -      int* block = blockTarget(minTSpan(N1),maxTSpan(N1)); -      if (block[0]<=fw1 && fw1<=block[2]) *orir=5; -      delete block; -      if (*orir==0) { -        block = blockTarget(minTSpan(fw1),maxTSpan(fw1)); -        if (block[0]<=N1 && N1 <=block[1]) *orir=5; -        delete block; -      } -      if (DEBUG) cerr << "N1=" << minTSpan(N1) << "-" << maxTSpan(N1); -      if (DEBUG) cerr << "fw1=" << minTSpan(fw1) << "-" << maxTSpan(fw1) << endl; -      if (*orir==0) { -        if (maxTSpan(fw1)<minTSpan(N1)) { // if N1 is monotone -          *orir = 1; -          block = blockTarget(maxTSpan(fw1)+1,minTSpan(N1)); -          if (block[0] <= fw1 && fw1 <= block[1]) *orir+=2; -          delete block; -        } else {// if (maxTSpan(N1)<minTSpan(fw1)) { // if N1 is non-monotone -          *orir = 2; -          block = blockTarget(maxTSpan(N1),minTSpan(fw1)-1); -          if (block[0] <= fw1 && fw1 <= block[1]) *orir+=2; -          delete block; -        } -      } -    } else { -      *orir = 5; -    } -    if (DEBUG) cerr << "orir =" << *orir << endl; -    lr = link(*oril,*orir); -    oris_hash.insert(pair<vector<int>,int>(curr_al,lr));   -  } else { -    lr = oris_hash[curr_al];   -  } -  if (DEBUG) cerr << "Lcompute=" << Lcompute << ", Rcompute=" << Rcompute << endl; -  if (Lcompute) *oril = source(lr); -  if (Rcompute) *orir = target(lr); -  curr_al.pop_back(); -  curr_al.pop_back(); -} - -int Alignment::OrientationSource(int* left, int* right) { -  if (DEBUG) { -    cerr << "      OrientationSource("; -    cerr << "left="<<left[0]<<","<<left[1]<<","<<left[2]<<","<<left[3]; -    cerr << " right="<<right[0]<<","<<right[1]<<","<<right[2]<<","<<right[3]; -    cerr << ")" << endl; -  }  -  //if ((right[1]<=left[0]) return 5; -  if (!(left[1]<right[0])) return 5; -  int ori = 1; -  if (right[3]<left[2]) ori=2; -  int gapstart = left[3]+1; int gapend = right[2]-1; -  if (ori==2) { gapstart = right[3]+1; gapend = left[2]-1; } -  for (int j=gapstart; j<=gapend; j++) { -    if (sourceOf(j)!=-1) { -      ori+=2; break; -    } -  } -  return ori; -} - -void Alignment::OrientationTarget(int fw, int *oril, int *orir, bool Lcompute, bool Rcompute) { -  OrientationTarget(fw,fw,oril,orir,Lcompute,Rcompute); -} - -vector<int> Alignment::OrientationTargetLeft4Sampler(int fw) { -  return OrientationTargetLeft4Sampler(fw,fw); -} - -vector<int> Alignment::OrientationTargetLeft4Sampler(int fw0, int fw1) { -  if (DEBUG) cerr << "OrientationTargetLeft4Sampler " << fw0 << "," << fw1 << endl; -  int oril=0;  -  int N0=fw0-1; -  while (N0>=0) { -    if (minSSpan(N0)!=MINIMUM_INIT) break; -    N0--; -  } -  int N1=fw1+1; -  while (N1<_I) { -    if (minSSpan(N1)!=MINIMUM_INIT) break; -    N1++; -  } -  if (minSSpan(fw0)==MINIMUM_INIT && minSSpan(fw1)==MINIMUM_INIT) { -    fw0=N1; fw1=N0; -  } -  if (maxSSpan(N0)<minSSpan(fw0) || maxSSpan(fw0)<minSSpan(N0)) { -    int *block = blockSource(minSSpan(N0),maxSSpan(N0)); -    if (DEBUG) cerr << "block1[" << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -    if (block[2]<=fw0 && fw0<=block[3])  //source span of fw0 subsumes NO's or the other way around -      oril=5; -    delete block; -    if (oril==0) { -      block = blockSource(minSSpan(fw0), maxSSpan(fw0)); -      if (DEBUG) cerr << "block2[" << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -      if (block[2] <= N0 && N0 <= block[3]) oril=5; -      delete block; -    } -    if (oril==0) { -      if (maxSSpan(N0)<minSSpan(fw0)) {// if N0 is monotone -        oril=1; -        block = blockSource(maxSSpan(N0),minSSpan(fw0)-1); -        if (DEBUG) cerr << "block3[" << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -        if (block[2] <= fw0 && fw0 <= block[3]) oril+=2; -        delete block; -      } else { // (maxSSpan(fw0)<minSSpan(N0)) // if NO is non-monotone -        oril=2; -        block = blockSource(maxSSpan(fw0)+1,minSSpan(N0)); -        if (DEBUG) cerr << "block4[" << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -        if (block[2] <= fw0 && fw0 <= block[3]) oril+=2; -        delete block; -      } -    } -  } else { //source span of fw0 subsumes NO's or the other way around -    oril=5; -  } -  if (DEBUG) cerr << "oril = " << oril << endl; -  int* block = blockSource(N0,fw0); -  vector<int> ret; -  ret.push_back(oril); ret.push_back(block[0]); ret.push_back(block[1]); -  ret.push_back(block[2]); ret.push_back(block[3]); -  delete block; -  return ret; -} - -vector<int> Alignment::OrientationTargetRight4Sampler(int fw) { -  return OrientationTargetRight4Sampler(fw,fw); -} - -vector<int> Alignment::OrientationTargetRight4Sampler(int fw0, int fw1) { -  if (DEBUG) cerr << "OrientationTargetRight4Sampler " << fw0 << "," << fw1 << endl; -  int orir=0; -  int N0=fw0-1; -  while (N0>=0) { -    if (minSSpan(N0)!=MINIMUM_INIT) break; -    N0--; -  } -  int N1=fw1+1; -  while (N1<_I) { -    if (minSSpan(N1)!=MINIMUM_INIT) break; -    N1++; -  } -  if (minSSpan(fw0)==MINIMUM_INIT && minSSpan(fw1)==MINIMUM_INIT) { -    fw0=N1; fw1=N0; -  } -  if (maxSSpan(N1)<minSSpan(fw1) || maxSSpan(fw1)<minSSpan(N1)) { -    int *block = blockSource(minSSpan(N1),maxSSpan(N1)); -    if (block[2]<=fw1 && fw1<=block[3]) orir=5; -    delete block; -    if (orir==0) { -      block = blockSource(minSSpan(fw1),maxSSpan(fw1)); -      if (block[2] <= N1 && N1 <= block[3]) orir=5; -      delete block; -    } -    if (orir==0) { -      if (maxSSpan(fw1)<minSSpan(N1)) { // if N1 is monotone -        orir=1; -        block = blockSource(maxSSpan(fw1)+1,minSSpan(N1)); -        if (block[2] <= fw1 && fw1 <= block[3]) orir+=2; -        delete block; -      } else { //if (maxSSpan(N1)<minSSpan(fw1)) { // if N1 is non-monotone -        orir=2; -        block = blockSource(maxSSpan(N1),minSSpan(fw1)-1); -        if (block[2] <= fw1 && fw1 <= block[3]) orir+=2; -        delete block; -      } -    } -  } else { -    orir=5; -  } -  if (DEBUG) cerr << "orir = " << orir << endl; -  int* block = blockSource(fw1,N1); -  vector<int> ret; -  ret.push_back(orir); ret.push_back(block[0]); ret.push_back(block[1]); -  ret.push_back(block[2]); ret.push_back(block[3]); -  delete block; -  return ret; - -} - -void Alignment::OrientationTarget(int fw0, int fw1, int*oril, int*orir, bool Lcompute, bool Rcompute) { -  if (DEBUG) cerr << "OrientationTarget " << fw0 << "," << fw1 << endl; -  // Left Neighbor -  if (!Lcompute && !Rcompute) return; -  *oril=0; -  *orir=0; -  curr_al.push_back(fw0); -  curr_al.push_back(fw1); -  int lr = 0; -  if (orit_hash.find(curr_al)==orit_hash.end()) { -    // Find first aligned word N0 to the left of fw -    //int fw0 = fw; int fw1 = fw; -    int N0=fw0-1; -    while (N0>=0) { -      if (minSSpan(N0)!=MINIMUM_INIT) break; -      N0--; -    } -    int N1=fw1+1; -    while (N1<_I) { -      if (minSSpan(N1)!=MINIMUM_INIT) break; -      N1++; -    } -    if (minSSpan(fw0)==MINIMUM_INIT && minSSpan(fw1)==MINIMUM_INIT) { -      fw0=N1; fw1=N0; -    } -    if (DEBUG) { -      cerr << "fw0:" << fw0 << ", fw1:" << fw1 << ", N0:" << N0 << ", N1:" << N1 << endl ; -      cerr << "minSSpan(N0)=" << minSSpan(N0) << " maxSSpan(N0)=" << maxSSpan(N0); -      cerr << " minSSpan(fw0)="<< minSSpan(fw0) << " maxSSpan(fw0)=" << maxSSpan(fw0) << endl; -      cerr << "minSSpan(fw1)=" << minSSpan(fw1) << " maxSSpan(fw1)=" << maxSSpan(fw1); -      cerr << " minSSpan(N1)="<< minSSpan(N1) << " maxSSpan(N1)=" << maxSSpan(N1) << endl; -    } -    if (maxSSpan(N0)<minSSpan(fw0) || maxSSpan(fw0)<minSSpan(N0)) { -      int *block = blockSource(minSSpan(N0),maxSSpan(N0)); -      if (DEBUG) cerr << "block1[" << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -      if (block[2]<=fw0 && fw0<=block[3])  //source span of fw0 subsumes NO's or the other way around -        *oril=5; -      delete block; -      if (*oril==0) { -        block = blockSource(minSSpan(fw0), maxSSpan(fw0)); -        if (DEBUG) cerr << "block2[" << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -        if (block[2] <= N0 && N0 <= block[3]) *oril=5; -        delete block; -      } -      if (*oril==0) {   -        if (maxSSpan(N0)<minSSpan(fw0)) {// if N0 is monotone -          *oril=1; -          block = blockSource(maxSSpan(N0),minSSpan(fw0)-1); -          if (DEBUG) cerr << "block3[" << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -          if (block[2] <= fw0 && fw0 <= block[3]) *oril+=2; -          delete block; -        } else { // (maxSSpan(fw0)<minSSpan(N0)) // if NO is non-monotone -          *oril=2; -          block = blockSource(maxSSpan(fw0)+1,minSSpan(N0)); -          if (DEBUG) cerr << "block4[" << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -          if (block[2] <= fw0 && fw0 <= block[3]) *oril+=2; -          delete block; -        } -      } -    } else { //source span of fw0 subsumes NO's or the other way around -      *oril=5; -    } -    if (DEBUG) cerr << "oril = " << *oril << endl; -    // Right Neighbor -    if (maxSSpan(N1)<minSSpan(fw1) || maxSSpan(fw1)<minSSpan(N1)) { -      int *block = blockSource(minSSpan(N1),maxSSpan(N1)); -      if (block[2]<=fw1 && fw1<=block[3]) *orir=5;  -      delete block; -      if (*orir==0) { -        block = blockSource(minSSpan(fw1),maxSSpan(fw1)); -        if (block[2] <= N1 && N1 <= block[3]) *orir=5; -        delete block; -      } -      if (*orir==0) { -        if (maxSSpan(fw1)<minSSpan(N1)) { // if N1 is monotone -          *orir=1; -          block = blockSource(maxSSpan(fw1)+1,minSSpan(N1)); -          if (block[2] <= fw1 && fw1 <= block[3]) *orir+=2; -          delete block; -        } else { //if (maxSSpan(N1)<minSSpan(fw1)) { // if N1 is non-monotone -          *orir=2; -          block = blockSource(maxSSpan(N1),minSSpan(fw1)-1); -          if (block[2] <= fw1 && fw1 <= block[3]) *orir+=2; -          delete block; -        } -      } -    } else { -      *orir=5; -    } -    if (DEBUG) cerr << "orir = " << *orir << endl; -    lr = link(*oril,*orir); -    orit_hash.insert(pair<vector<int>,int>(curr_al,lr)); -  } else { -    lr = orit_hash[curr_al]; -  } -  if (DEBUG) cerr << "Lcompute=" << Lcompute << ", Rcompute=" << Rcompute << endl; -  if (DEBUG) cerr << "lr=" << lr << ", l=" << source(lr) << ", r=" << target(lr) << endl; -  if (Lcompute>0) *oril=source(lr); -  if (Rcompute>0) *orir=target(lr); -  curr_al.pop_back(); -  curr_al.pop_back(); -} - -int* Alignment::blockSource(int idx1, int idx2) { -// outputs a minimal block [s1,s2,t1,t2] that contains idx1 and idx2, where idx1 <= idx2 -  if (DEBUG) cerr << "blockSource[" << idx1 << "," << idx2 << "]" << endl; -  int *curr = new int[4]; -  curr[0]=idx1; curr[1]=idx2; curr[2]=MINIMUM_INIT; curr[3]=MAXIMUM_INIT; -  for (int j=curr[0]; j<=curr[1]; j++) { -    curr[2] = least(curr[2],_tSpan[j][0]); -    curr[3] =  most(curr[3],_tSpan[j][1]); -  } -  int next[4]; -  next[0]=curr[0]; next[1]=curr[1]; -  for (int i=curr[2]; i<=curr[3]; i++) { -    next[0] = least(next[0],_sSpan[i][0]); -    next[1] =  most(next[1],_sSpan[i][1]); -  }  -  next[2] = curr[2]; next[3]= curr[3]; -  int idx=1; -  do { -    // update the current -    for (int j=next[0]; j<curr[0]; j++) { -      curr[2] = least(curr[2],_tSpan[j][0]); -      curr[3] =  most(curr[3],_tSpan[j][1]); -    } -    for (int j=curr[1]+1; j<=next[1]; j++) { -      curr[2] = least(curr[2],_tSpan[j][0]); -      curr[3] =  most(curr[3],_tSpan[j][1]); -    } -    curr[0] = next[0]; curr[1] = next[1];  -    if (curr[2]==next[2] && curr[3]==next[3]) break; -    // prepare for the next  -    for (int i=curr[2]; i<next[2]; i++) { -      next[0]= least(next[0],_sSpan[i][0]); -      next[1]=  most(next[1],_sSpan[i][1]); -    } -    for (int i=next[3]+1; i<=curr[3]; i++) { -      next[0] = least(next[0],_sSpan[i][0]); -      next[1] =  most(next[1],_sSpan[i][1]); -    } -    next[2] = curr[2]; next[3]= curr[3]; -    idx++; -  } while(1); -  return curr; -} - -int* Alignment::blockTarget(int idx1, int idx2) { -// outputs a minimal [s1,s2,t1,t2] that contains idx1 and idx2, where idx1<=idx2 -  int *curr = new int[4]; -  curr[0]=MINIMUM_INIT; curr[1]=MAXIMUM_INIT; curr[2]=idx1; curr[3]=idx2; -        for (int i=curr[2]; i<=curr[3]; i++) { -                curr[0] = least(curr[0],_sSpan[i][0]); -                curr[1] =  most(curr[1],_sSpan[i][1]); -        } -        int next[4]; -        next[2]=curr[2]; next[3]=curr[3]; -        for (int j=curr[0]; j<=curr[1]; j++) { -                next[2] = least(next[2],_tSpan[j][0]); -                next[3] =  most(next[3],_tSpan[j][1]); -        } -        next[0] = curr[0]; next[1]= curr[1]; -        int idx=1; -        do { -                // update the current -                for (int i=next[2]; i<curr[2]; i++) { -                        curr[0] = least(curr[0],_sSpan[i][0]); -                        curr[1] =  most(curr[1],_sSpan[i][1]); -                } -                for (int i=curr[3]+1; i<=next[3]; i++) { -                        curr[0] = least(curr[0],_sSpan[i][0]); -                        curr[1] =  most(curr[1],_sSpan[i][1]); -                } -                curr[2] = next[2]; curr[3] = next[3]; -                if (curr[0]==next[0] && curr[1]==next[1]) break; -                // prepare for the next -                for (int j=curr[0]; j<next[0]; j++) { -                        next[2]= least(next[2],_tSpan[j][0]); -                        next[3]=  most(next[3],_tSpan[j][1]); -                } -                for (int j=next[1]+1; j<=curr[1]; j++) { -                        next[2] = least(next[2],_tSpan[j][0]); -                        next[3] =  most(next[3],_tSpan[j][1]); -                } -                next[0] = curr[0]; next[1]= curr[1]; -                idx++; -        } while(1); -  return curr; -} - -int Alignment::firstSourceAligned(int start) { -  for (int j=start; j<_J; j++)  -    if (_tSpan[j][0]!=MINIMUM_INIT) return j; -  return -1; -} - -int Alignment::lastSourceAligned(int end) { -  for (int j=end; j>=0; j--) -    if (_tSpan[j][0]!=MINIMUM_INIT) return j; -  return -1; -} - -int Alignment::firstTargetAligned(int start) { -  for (int i=start; i<_I; i++)  -    if (_sSpan[i][0]!=MINIMUM_INIT) return i; -  return -1; -} - -int Alignment::lastTargetAligned(int end) { -  for (int i=end; i>=0; i--)  -    if (_sSpan[i][0]!=MINIMUM_INIT) return i; -  return -1; -} - -void Alignment::BorderingSFWsOnly() { -// removes the record of all function word alignments, except those at the borders -// the number of alignments kept may be more than two -// i.e. where the leftmost / the rightmost alignments are unaligned.  -// In such cases, this function continues keeping function word alignments until the  -// first (or last) alignment words.  -  if (SourceFWIdxs[0]>2) { -    int firstCut = 1; -    for (int j=2; j<=SourceFWIdxs[0]; j++) { -      if (SourceFWIdxs[3*j-2]>fas) break; -      firstCut=j; -    } -    int lastCut  = SourceFWIdxs[0]; -    for (int j=SourceFWIdxs[0]-1; j>=0; j--) { -      if (SourceFWIdxs[3*j-2]<las) break; -      lastCut=j; -    } -    if (firstCut>=lastCut) return; -    int delta = 0; -    for (int j=lastCut; j<=SourceFWIdxs[0]; j++) { -      delta++; -      SourceFWIdxs[3*(firstCut+delta)-2]=SourceFWIdxs[3*j-2]; -      SourceFWIdxs[3*(firstCut+delta)-1]=SourceFWIdxs[3*j-1]; -      SourceFWIdxs[3*(firstCut+delta)]  =SourceFWIdxs[3*j]; -    } -    SourceFWIdxs[0]=firstCut+delta; -  } -} - -void Alignment::BorderingTFWsOnly() { -// similar to BorderingSFWsOnly() except this looks at the source side. -  if (TargetFWIdxs[0]>2) { -    int firstCut = 1; -    for (int j=2; j<=TargetFWIdxs[0]; j++) { -      if (TargetFWIdxs[3*j-2]>fat) break; -      firstCut=j; -    } -    int lastCut  = TargetFWIdxs[0]; -    for (int j=TargetFWIdxs[0]-1; j>=0; j--) { -      if (TargetFWIdxs[3*j-2]<lat) break; -      lastCut=j; -    } -    if (firstCut>=lastCut) return; -    int delta = 0; -    for (int j=lastCut; j<=TargetFWIdxs[0]; j++) { -      delta++; -      TargetFWIdxs[3*(firstCut+delta)-2]=TargetFWIdxs[3*j-2]; -      TargetFWIdxs[3*(firstCut+delta)-1]=TargetFWIdxs[3*j-1]; -      TargetFWIdxs[3*(firstCut+delta)]  =TargetFWIdxs[3*j]; -    } -    TargetFWIdxs[0]=firstCut+delta; -  } -} - -void Alignment::FillFWIdxsState(int* state, int fas, int las, int fat, int lat) { -  if (DEBUG) cerr << "FillFWIdxsState ("<< fas <<","<< las<<"," << fat <<"," << lat << ")" << endl; -  if (fas==las) las+=1; -  if (fat==lat) lat+=1; -  for (int idx=0; idx<12; idx++) state[idx]=-1; -  if (SourceFWIdxs[0]<=2) { -    if (SourceFWIdxs[0]>=1) {state[0]=SourceFWIdxs[1]; state[1]=SourceFWIdxs[2]; state[2]=SourceFWIdxs[3];} -    if (SourceFWIdxs[0]==2) {state[3]=SourceFWIdxs[4]; state[4]=SourceFWIdxs[5]; state[5]=SourceFWIdxs[6];} -  } else { -    if (SourceFWIdxs[1]>fas) { -      state[0]=SourceFWIdxs[1]; state[1]=SourceFWIdxs[2]; state[2]=SourceFWIdxs[3]; -    } else { -      ostringstream issf; ostringstream isse; -      for (int idx=1; idx<=SourceFWIdxs[0]; idx++) { -        if (SourceFWIdxs[3*idx-2]>las) break; -        if (idx>1) { issf << " "; isse << " ";}; -        issf << TD::Convert(SourceFWIdxs[3*idx-1]); -        isse << TD::Convert(SourceFWIdxs[3*idx]); -        state[0]=SourceFWIdxs[3*idx-2]; -        if (state[0]>=fas) break; -      } -      if (state[0]>=0) { -        state[1]=TD::Convert(issf.str())*-1; state[2]=TD::Convert(isse.str()); //multiplying source with -1 as marker -      } -    } -    if (SourceFWIdxs[SourceFWIdxs[0]*3-2]==las) { -      state[3]=SourceFWIdxs[SourceFWIdxs[0]*3-2]; -      state[4]=SourceFWIdxs[SourceFWIdxs[0]*3-1];  -      state[5]=SourceFWIdxs[SourceFWIdxs[0]*3]; -    } else { -      int lastCut = SourceFWIdxs[0]; -      for (int j=lastCut-1; j>=state[0]+1; j--) { -        if (SourceFWIdxs[3*j-2]==state[0]) break; -        if (SourceFWIdxs[3*j-2]<las) break; -        lastCut=j; -      } -      state[3]=SourceFWIdxs[3*lastCut-2]; -      ostringstream issf; ostringstream isse; -      for (int idx=lastCut; idx<=SourceFWIdxs[0]; idx++) { -        if (idx>lastCut) { issf << " "; isse << " ";}; -        issf << TD::Convert(SourceFWIdxs[3*idx-1]); -        isse << TD::Convert(SourceFWIdxs[3*idx]); -      } -      if (state[3]>=0) { -        //multiplying source with -1 as compound marker -        state[4]=TD::Convert(issf.str())*-1; state[5]=TD::Convert(isse.str());  -      } -    } -  } -  if (TargetFWIdxs[0]<=2) { -    if (TargetFWIdxs[0]>=1) {state[6]=TargetFWIdxs[1]; state[7]=TargetFWIdxs[2]; state[8]=TargetFWIdxs[3];} -    if (TargetFWIdxs[0]==2) {state[9]=TargetFWIdxs[4]; state[10]=TargetFWIdxs[5]; state[11]=TargetFWIdxs[6];} -  } else { -    if (TargetFWIdxs[1]>fat) { //shouldn't come here if SetTargetBorderingFW is invoked -      state[6]=TargetFWIdxs[1]; state[7]=TargetFWIdxs[2]; state[8]=TargetFWIdxs[3]; -    } else { -      ostringstream issf; ostringstream isse; -      for (int idx=1; idx<=TargetFWIdxs[0]; idx++) { -        if (TargetFWIdxs[3*idx-2]>fat) break; -        if (idx>1) { issf << " "; isse << " ";}; -        issf << TD::Convert(TargetFWIdxs[3*idx-1]); -        isse << TD::Convert(TargetFWIdxs[3*idx]); -        state[6]=TargetFWIdxs[3*idx-2]; -      } -      state[7]=TD::Convert(issf.str()); state[8]=TD::Convert(isse.str())*-1;  -      //multiplying target with -1 as compound marker -    } -    if (TargetFWIdxs[TargetFWIdxs[0]*3-2]==lat) { -      state[9]=TargetFWIdxs[TargetFWIdxs[0]*3-2]; -      state[10]=TargetFWIdxs[TargetFWIdxs[0]*3-1]; -      state[11]=TargetFWIdxs[TargetFWIdxs[0]*3]; -    } else { -      int lastCut = TargetFWIdxs[0]; -      for (int j=lastCut-1; j>=1; j--) { -        if (TargetFWIdxs[3*j-2]<=state[9]) break; -        if (TargetFWIdxs[3*j-2]<lat) break; -        lastCut=j; -      } -      state[9]=TargetFWIdxs[3*lastCut-2]; -      ostringstream issf; ostringstream isse; -      for (int idx=lastCut; idx<=TargetFWIdxs[0]; idx++) { -        if (idx>lastCut) issf << " "; isse << " ";; -        issf << TD::Convert(TargetFWIdxs[3*idx-1]); -        isse << TD::Convert(TargetFWIdxs[3*idx]); -      } -      state[10]=TD::Convert(issf.str()); state[11]=TD::Convert(isse.str())*-1;  -    } -  } -} - -void Alignment::simplifyBackward(vector<int *>*blocks, int* block, const vector<int>& danglings) { -// given a *block*, see whether its target span contains any index inside *danglings*.  -// if yes, break it; otherwise, keep it. put the result(s) to *blocks* -  if (DEBUG) cerr << "simplifyBackward[" << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << "]" << endl; -  if (DEBUG) for (int i=0; i<danglings.size(); i++) cerr << "danglings[" << i << "] = " << danglings[i] << endl; -  if (danglings.size()==0) { -    blocks->push_back(block);  -    if (DEBUG) cerr << "pushing(0) " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -    return;  -  } -  int currIdx = block[2]; -  int i_dangling = 0; -  while (block[2]>danglings[i_dangling])  { -    if (i_dangling+1 >= danglings.size()) break; -    i_dangling++; -  } -  while (danglings[i_dangling]==currIdx) { -    i_dangling++; -    currIdx++; -  }  -  /*if (i_dangling>=danglings.size() && currIdx) { -    blocks->push_back(block);  -    if (DEBUG) cerr << "pushing(1) " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -    return; -  } -  if (block[3]<danglings[i_dangling]) { -    blocks->push_back(block);  -    if (DEBUG) cerr << "pushing(2) " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -    return; -  }*/ -  if (DEBUG) cerr << "i_dangling = " << i_dangling << endl; -  int anchorIdx = danglings[i_dangling]; -  if (i_dangling+1>=danglings.size() || anchorIdx>block[3]+1) anchorIdx=block[3]+1; -  if (DEBUG) cerr << "anchorIdx = " << anchorIdx << ", currIdx = " << currIdx << endl; -  do { -    while(currIdx<anchorIdx) { -      if (DEBUG) cerr << "currIdx = " << currIdx << ", anchorIdx = " << anchorIdx << endl; -      bool isMoved = false; -      for (int idx=anchorIdx-1; idx>=currIdx; idx--) { -        int *nublock = blockTarget(currIdx,idx); -        if (nublock[2]==currIdx && nublock[3]==idx) { -          if (nublock[0]!=MINIMUM_INIT) { -            blocks->push_back(nublock); -            if (DEBUG) cerr << "pushing(3) " << nublock[0] << "," << nublock[1] << "," << nublock[2] << "," << nublock[3] << endl; -          } else {  -            delete nublock;  -          } -          isMoved = true; -          currIdx=idx+1; break; -        } else { -          delete nublock; -        } -      } -      if (DEBUG) cerr << "isMoved=" << isMoved << ", currIdx=" << currIdx << endl; -      if (!isMoved) { -        int source = sourceOf(currIdx); -        while (source>=0) { -          if (source >= block[0]) { -            int* nublock = new int[4]; -            nublock[0]=source; nublock[1]=source; nublock[2]=currIdx; nublock[3]=currIdx; -            blocks->push_back(nublock); -            if (DEBUG) cerr << "pushing(4) " << nublock[0] << "," << nublock[1] << "," << nublock[2] << "," << nublock[3] << endl; -          }  -          source = sourceOf(currIdx,source+1); -        } -        currIdx++; -      } -    } -    currIdx=anchorIdx+1; -    anchorIdx=block[3]+1; -    if (i_dangling+1<danglings.size()) anchorIdx=danglings[++i_dangling]; -  } while(currIdx<=block[3]); -} - -void Alignment::simplify(int* ret) { -  // the idea is to create blocks of maximal consistent alignment in between a pair of function words -  // exceptional cases include: one to non-contiguous many (or vice versa) -> treat this as one alignment each -  // record all function word alignments first, important because it may be unaligned -  // return true if it's truly simple (no function word alignment involves); false, otherwise -  if (DEBUG) cerr << "begin simplify" << endl; -  reset(0,0); reset(_J-1,_I-1); // remove the phrase boundary alignments, NEED TO CHECK AGAIN !!! -  if (SourceFWIdxs[0]+TargetFWIdxs[0]==0) { // return singleton -    if (DEBUG) cerr << "no function words" << endl; -    for (int idx=0; idx<12; idx++) ret[idx]=-1; -    ret[12]=1; ret[13]=0; ret[14]=0; // 0-0 -    FillFWIdxsState(ret,0,0,0,0);   -    return; -  } -  curr_al.insert(curr_al.begin(),curr_al.size()); -  curr_al.push_back(SourceFWIdxs[0]); -  for (int i=1; i<=SourceFWIdxs[0]; i++) curr_al.push_back(SourceFWIdxs[3*i-2]); -  curr_al.push_back(TargetFWIdxs[0]); -  for (int i=1; i<=TargetFWIdxs[0]; i++) curr_al.push_back(TargetFWIdxs[3*i-2]); -  vector<int> el; -  if (simplify_hash.find(curr_al)==simplify_hash.end()) { -    if (DEBUG) { -      cerr << "SourceFWIdxs:" << SourceFWIdxs[0] << endl; -      for (int i=1; i<=SourceFWIdxs[0]; i++)   -        cerr << SourceFWIdxs[3*i-2] << "," <<  SourceFWIdxs[3*i-1] << "," << SourceFWIdxs[3*i] << endl;   -      cerr << "TargetFWIdxs:" << TargetFWIdxs[0] << endl; -      for (int i=1; i<=TargetFWIdxs[0]; i++) {  -        cerr << TargetFWIdxs[3*i-2] << "," <<  TargetFWIdxs[3*i-1] << "," << TargetFWIdxs[3*i] << endl;   -      } -    } - -    vector< int* > blocks; // each element contains s1,s2,t1,t2 -    int currIdx = 1; // start from 1 to avoid considering phrase start -    std::set<int> FWIdxs; -    std::vector<int> DanglingTargetFWIdxs; -    for (int i=1; i<= SourceFWIdxs[0]; i++) FWIdxs.insert(SourceFWIdxs[3*i-2]); -    for (int i=1; i<= TargetFWIdxs[0]; i++) { -      int source = sourceOf(TargetFWIdxs[3*i-2]); -      if (source>=0) { -        do { -          FWIdxs.insert(source); -          source = sourceOf(TargetFWIdxs[3*i-2],source+1); -        } while(source >=0); -      } else { -        int *block = new int[4]; -        block[0]=-1; block[1]=-1; block[2]=TargetFWIdxs[3*i-2]; block[3]=TargetFWIdxs[3*i-2]; -        blocks.push_back(block); -        if (DEBUG) cerr << "pushing[1] " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -        DanglingTargetFWIdxs.push_back(TargetFWIdxs[3*i-2]);   -      } -    } -    if (DEBUG) -      for (std::set<int>::const_iterator iter=FWIdxs.begin(); iter!=FWIdxs.end(); iter++) {  -        cerr << "FWIdxs=" << *iter << endl; -      } -    std::set<int>::const_iterator currFWIdx  = FWIdxs.begin(); -    if (currFWIdx == FWIdxs.end()) { -      int* block = new int[4]; -      block[0]=1; block[1]=_J-2; block[2]=1; block[3]=_I-2; // no need to consider phrase boundaries -      simplifyBackward(&blocks,block,DanglingTargetFWIdxs); -    } else { -      int anchorIdx = *currFWIdx; // also used to denote _J+1 -      do { -        // add alignments whose source from currIdx to currFWIdx-1 -        while (currIdx<anchorIdx) { -          bool isMoved = false; -          //cerr << "anchorIdx = " << anchorIdx << ", currIdx = " << currIdx << endl; -          for (int idx=anchorIdx-1; idx>=currIdx; idx--) { -            int* block = blockSource(currIdx,idx); -            if (block[0]==currIdx&&block[1]==idx) { -              if (block[2]!=MINIMUM_INIT) { // must be aligned -                simplifyBackward(&blocks,block,DanglingTargetFWIdxs);  -              } else { -                delete block; -              } -              currIdx = idx+1; isMoved = true; -              break; -            } else { -              delete block; -            } -          } -          if (!isMoved) { -            int target = targetOf(currIdx); -            while (target>=0) { -              int* block = new int[4]; -              block[0]=currIdx; block[1]=currIdx; block[2]=target; block[3]=target; -              blocks.push_back(block); -              if (DEBUG) cerr << "pushing[2] " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -              target = targetOf(currIdx,target+1); -            } -            currIdx++; -          } -        }     -        // add function word alignments (anchorIdx) -        if (anchorIdx==getJ()) break; -        int target = targetOf(anchorIdx); -        do { -          int* block = new int[4]; -          block[0]=anchorIdx; block[1]=anchorIdx; block[2]=target; block[3]=target; -          blocks.push_back(block); -          if (DEBUG) cerr << "pushing[3] " << block[0] << "," << block[1] << "," << block[2] << "," << block[3] << endl; -          if (target>=0) target = targetOf(anchorIdx,target+1); -        } while (target>=0); -        // advance indexes -        currIdx   = anchorIdx+1; -        anchorIdx = getJ()-1; // was minus 2 -        if (++currFWIdx!=FWIdxs.end()) anchorIdx = *currFWIdx; -      } while (currIdx<=getJ()-2); -    }   - -     -    vector<int> source_block_mapper(getJ(),-1); -    vector<int> target_block_mapper(getI(),-1); -    for (int i = 0; i<blocks.size(); i++) { -      if (DEBUG) cerr << "blocks[" << i << "]=" << blocks[i][0] << "," << blocks[i][1] << "," << blocks[i][2] << "," << blocks[i][3] << endl; -      if (blocks[i][0]>=0) source_block_mapper[blocks[i][0]]=1; -      if (blocks[i][2]>=0) target_block_mapper[blocks[i][2]]=1;  -    } -    int curr = 1; -    int prev = -1; -    for (int idx=0; idx<source_block_mapper.size(); idx++) { -      if (source_block_mapper[idx]>0) { -        source_block_mapper[idx]=curr++; -        prev = curr; -      } else { -        source_block_mapper[idx]=prev; -      } -    } -    curr = 1; -    for (int idx=0; idx<target_block_mapper.size(); idx++) { -      if (target_block_mapper[idx]>0) { -        target_block_mapper[idx]=curr++; -        prev = curr; -      } else { -        target_block_mapper[idx]=prev; -      } -    } -     -    //assert(blocks.size()<=50); -    if (DEBUG) cerr << "resulting alignment:" << endl; -    for (int i = 0; i<blocks.size(); i++) { -      if (blocks[i][2]<0 || blocks[i][0]<0) continue; -      int source = source_block_mapper[blocks[i][0]]-1; -      int target = target_block_mapper[blocks[i][2]]-1; -      el.push_back(link(source,target)); -      if (DEBUG) cerr << source << "-" << target << " "; -    } -    el.insert(el.begin(),el.size()); -    if (DEBUG) cerr << endl; -    el.push_back(SourceFWIdxs[0]); -    for (int idx=1; idx<=SourceFWIdxs[0]; idx++) { -      if (DEBUG) cerr << "SourceFWIdxs[" << (3*idx-2) << "] from " << SourceFWIdxs[3*idx-2] << endl; -      el.push_back(source_block_mapper[SourceFWIdxs[3*idx-2]]-1); -    } -    el.push_back(TargetFWIdxs[0]); -    for (int idx=1; idx<=TargetFWIdxs[0]; idx++) { -      if (DEBUG) cerr << "TargetFWIdxs[" << (3*idx-2) << "] from " << TargetFWIdxs[3*idx-2] << endl; -      el.push_back(target_block_mapper[TargetFWIdxs[3*idx-2]]-1); -    } -    el.push_back(source_block_mapper[fas]-1);  -    el.push_back(source_block_mapper[las]-1); -    el.push_back(target_block_mapper[fat]-1); -    el.push_back(target_block_mapper[lat]-1); -    if (DEBUG) { -      cerr << "insert key:el = "; -      for (int ii=0; ii<el.size(); ii++)  -        cerr << ii << "." << el[ii] << " "; -      cerr << " || " << endl; -    } -    if (DEBUG) cerr << "trying to insertL " <<  endl; -    if (DEBUG) { -      cerr << "size=" << curr_al.size() << " "; -      for (int ii=0; ii<curr_al.size(); ii++) cerr << "curr_al[" << ii << "]=" << curr_al[ii] << " "; -      cerr << endl; -    } -    simplify_hash.insert(pair<vector<int>, vector<int> > (curr_al,el)); -    if (DEBUG) cerr << "inserted" << endl; -  } else { -    el = simplify_hash[curr_al]; -  } -  if (DEBUG) { -    cerr << "pull key:el = "; -    for (int ii=0; ii<el.size(); ii++) -      cerr << ii << "." << el[ii] << " "; -    cerr << endl; -  } -  ret[12] = el[0]; -  for (int i=1; i<=el[0]; i++) ret[12+i] = el[i]; -  int istart = el[0]+1; -  assert(el[istart]==SourceFWIdxs[0]); -  for (int i=1; i<=el[istart]; i++) SourceFWIdxs[3*i-2]=el[istart+i]; -  istart += el[istart]+1; -  assert(el[istart]==TargetFWIdxs[0]); -  for (int i=1; i<=el[istart]; i++) TargetFWIdxs[3*i-2]=el[istart+i]; -  istart += el[istart]+1; -  FillFWIdxsState(ret,el[istart],el[istart+1],el[istart+2],el[istart+3]); -} - -void Alignment::simplify_nofw(int* ret) { -  for (int i=0; i<12; i++) ret[i]=-1;   -  ret[12]=1; ret[13]=0;  -} - -void Alignment::sort(int* num) { -  if (num[0]>1) quickSort(num,1,num[0]); -} - -void Alignment::quickSort(int arr[], int left, int right) { -  int i = left, j = right; -  int tmp1,tmp2,tmp3; -  int mid = (left + right) / 2; -  int pivot = arr[3*mid-2]; -  -  /* partition */ -  while (i <= j) { -    while (arr[3*i-2] < pivot) i++; -    while (arr[3*j-2] > pivot) j--; -    if (i <= j) { -      tmp1 = arr[3*i-2]; tmp2 = arr[3*i-1]; tmp3 = arr[3*i]; -      arr[3*i-2] = arr[3*j-2]; arr[3*i-1] = arr[3*j-1]; arr[3*i] = arr[3*j]; -      arr[3*j-2] = tmp1; arr[3*j-1] = tmp2; arr[3*j] = tmp3; -      i++; -      j--; -    } -  }; -  -  /* recursion */ -  if (left < j) quickSort(arr, left, j); -  if (i < right) quickSort(arr, i, right); -} - -double Alignment::ScoreOrientation(const CountTable& table, int offset, int ori, WordID cond1, WordID cond2) { -  string source = TD::Convert(cond1); -  string sourceidx; -  if (table.mode == 1) { -    sourceidx = source;  -    int slashidx = sourceidx.find_last_of("/"); -    source = sourceidx.substr(0,slashidx); -    string idx = sourceidx.substr(slashidx+1); -    if (DEBUG) cerr << "   sourceidx = " << sourceidx << ", idx = " << idx << endl; -    if (idx == "X") { -      if (DEBUG) cerr << "        idx == X, returning 0" << endl; -      return 0; -    } -  } -  string target = TD::Convert(cond2); -  if (DEBUG) cerr << "sourceidx='" << sourceidx << "', source='" << source << "', target='" << target << "'" << endl; -  double count = table.ultimate[offset+ori-1]; -  double total = table.ultimate[offset+5];    -  double alpha = 0.1; -  double prob = count/total; -  if (DEBUG) cerr << "level0 " << count << "/" << total << "=" << prob << endl; -   -  WordID key_id = (table.mode!=1) ? cond1 : TD::Convert(source);  -  map<WordID,int*>::const_iterator it = table.model.find(key_id); -  bool stop = (it==table.model.end()); -  if (!stop) { -    stop=true; -    if (it->second[offset+5]>=0) { -      count = it->second[offset+ori-1] + alpha * prob; -      total = it->second[offset+5] + alpha; -      prob = count/total; -      stop = false; -      if (DEBUG) cerr << "level1 " << count << "/" << total << "=" << prob << endl; -    } -  } -  if (stop) return prob; - -  string key = source + " " + target; -  it = table.model.find(TD::Convert(key)); -  stop = (it==table.model.end()); -  if (!stop) { -    stop = true; -    if (it->second[offset+5]>=0) { -      count = it->second[offset+ori-1] + alpha * prob; -      total = it->second[offset+5] + alpha; -      prob = count/total; -      stop = false; -      if (DEBUG) cerr << "level2 " << count << "/" << total << "=" << prob << endl; -    } -  } - -  if (stop || table.mode!=1) return prob; -   -  key = sourceidx + " " + target; -  it = table.model.find(TD::Convert(key)); -  if (it!=table.model.end()) { -    if (it->second[offset+5]>=0) { -      count = it->second[offset+ori-1] + alpha * prob; -      total = it->second[offset+5] + alpha; -      prob = count/total; -      if (DEBUG) cerr << "level3 " << count << "/" << total << "=" << prob << endl; -    } -  } - -  return prob; -} -  -void Alignment::ScoreOrientation(const CountTable& table, int offset, int ori, WordID cond1, WordID cond2,  -  bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, -  double alpha1, double beta1) { -  if (DEBUG) cerr << "ScoreOrientation:" << TD::Convert(cond1) << "," << TD::Convert(cond2) << ", alpha1 = " << alpha1 << ", beta1 = " << beta1 << endl; -  double ret = ScoreOrientation(table,offset,ori,cond1,cond2); -  if (isBonus) { -    if (table.mode == 0) *bonus += log(ret); else *bonus += ret; -  } else { -    if (table.mode == 0) *cost += log(ret); else *cost += ret; -  } -} - -double Alignment::ScoreOrientationLeft(const CountTable& table, int ori, WordID cond1, WordID cond2) { -  double ret = ScoreOrientation(table,0,ori,cond1,cond2);  -  if (table.mode == 0) return log(ret); -  return ret; -} - -double Alignment::ScoreOrientationLeftBackward(const CountTable& table, int ori, WordID cond1, WordID cond2) { -  double ret = ScoreOrientation(table,12,ori,cond1,cond2);  -  if (table.mode == 0) return log(ret); -  return ret; -} -  -double Alignment::ScoreOrientationRight(const CountTable& table, int ori, WordID cond1, WordID cond2) { -  double ret = ScoreOrientation(table,6,ori,cond1,cond2);  -  if (table.mode == 0) return log(ret); -  return ret; -} - -double Alignment::ScoreOrientationRightBackward(const CountTable& table, int ori, WordID cond1, WordID cond2) { -  double ret = ScoreOrientation(table,18,ori,cond1,cond2);  -  if (table.mode == 0) return log(ret); -  return ret; -} - -void Alignment::ScoreOrientationLeft(const CountTable& table, int ori, WordID cond1, WordID cond2,  -  bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, double alpha1, double beta1) { -  if (DEBUG) cerr << "ScoreOrientationLeft(" << isBonus << ")" << endl; -  ScoreOrientation(table,0,ori,cond1,cond2,isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha1,beta1); -} - -void Alignment::ScoreOrientationLeftBackward(const CountTable& table, int ori, WordID cond1, WordID cond2,  -  bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, double alpha1, double beta1) { -  if (DEBUG) cerr << "ScoreOrientationLeftBackward" << endl; -  ScoreOrientation(table,12,ori,cond1,cond2,isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha1,beta1); -} - -void Alignment::ScoreOrientationRight(const CountTable& table, int ori, WordID cond1, WordID cond2,  -  bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, double alpha1, double beta1) { -  if (DEBUG) cerr << "ScoreOrientationRight(" << isBonus << ")" << endl; -  ScoreOrientation(table,6,ori,cond1,cond2,isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha1,beta1); -} - -void Alignment::ScoreOrientationRightBackward(const CountTable& table, int ori, WordID cond1, WordID cond2, -  bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, double alpha1, double beta1) { -  if (DEBUG) cerr << "ScoreOrientationRightBackward" << endl; -  ScoreOrientation(table,18,ori,cond1,cond2,isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha1,beta1); -} - -void Alignment::computeOrientationSourceBackwardPos(const CountTable& table, double *cost, double *bonus, -                                                 double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, int maxfwidx, int maxdepth1, int maxdepth2) { -  if (DEBUG) cerr << "computeOrientationSourceBackward" << endl; -  int oril, orir; -  for (int idx=1; idx<=SourceFWRuleIdxs[0]; idx++) { -    if (DEBUG) cerr << "considering SourceFWRuleIdxs[" << idx << "]: " << SourceFWRuleIdxs[3*idx-2] << endl; -    if (!(SourceFWRuleAbsIdxs[idx]<=maxdepth1 || maxfwidx-SourceFWRuleAbsIdxs[idx]+1<=maxdepth2)) continue; -    int* fwblock = blockSource(SourceFWRuleIdxs[3*idx-2],SourceFWRuleIdxs[3*idx-2]); -    bool aligned = (fwblock[2]!=MINIMUM_INIT); -    if (aligned) { -      OrientationTarget(fwblock[2],fwblock[3],&oril,&orir); -    } else { -      OrientationSource(SourceFWRuleIdxs[3*idx-2],&oril,&orir); -    } -    if (DEBUG) cerr << "oril = " << oril << ", orir = " << orir << endl; -    bool isBonus = false; // fas -> first aligned source word, las -> last aligned source word -    if ((aligned && fwblock[2]<=fat)|| -        (!aligned && SourceFWRuleIdxs[3*idx-2]<=fas)) isBonus=true; -    if (SourceFWRuleAbsIdxs[idx]<=maxdepth1) { -      ostringstream nusource; -      nusource << TD::Convert(SourceFWRuleIdxs[3*idx-1]) << "/" << SourceFWRuleAbsIdxs[idx]; -      ScoreOrientationLeftBackward(table,oril,TD::Convert(nusource.str()),SourceFWRuleIdxs[3*idx], -                         isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -    } -    if (maxfwidx-SourceFWRuleAbsIdxs[idx]+1<=maxdepth2) { -      ostringstream nusource; -      nusource << TD::Convert(SourceFWRuleIdxs[3*idx-1]) << "/" << ((maxfwidx-SourceFWRuleAbsIdxs[idx]+1)*-1); -      ScoreOrientationLeftBackward(table,oril,TD::Convert(nusource.str()),SourceFWRuleIdxs[3*idx], -                         isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -    } -    isBonus = false; -    if ((aligned && lat<=fwblock[3])|| -        (!aligned && las<=SourceFWRuleIdxs[3*idx-2])) isBonus=true; -    if (SourceFWRuleAbsIdxs[idx]<=maxdepth1) { -      ostringstream nusource; -      nusource << TD::Convert(SourceFWRuleIdxs[3*idx-1]) << "/" << SourceFWRuleAbsIdxs[idx]; -      ScoreOrientationRightBackward(table,orir,SourceFWRuleIdxs[3*idx-1],SourceFWRuleIdxs[3*idx], -                          isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -    } -    if (maxfwidx-SourceFWRuleAbsIdxs[idx]+1<=maxdepth2) { -      ostringstream nusource; -      nusource << TD::Convert(SourceFWRuleIdxs[3*idx-1]) << "/" << ((maxfwidx-SourceFWRuleAbsIdxs[idx]+1)*-1); -      ScoreOrientationRightBackward(table,orir,SourceFWRuleIdxs[3*idx-1],SourceFWRuleIdxs[3*idx], -                          isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -    } -    delete fwblock; -  } -  for (int i_ant=0; i_ant<_Arity; i_ant++) { -    // antfas -> first aligned source word antecedent-wise -    // antlas -> last aligned source word antecedent-wise -    int antfat = firstTargetAligned(TargetAntsIdxs[i_ant][1]); -    int antlat = lastTargetAligned(TargetAntsIdxs[i_ant][TargetAntsIdxs[i_ant][0]]); -    int antfas = firstSourceAligned(SourceAntsIdxs[i_ant][1]); -    int antlas = lastSourceAligned(SourceAntsIdxs[i_ant][SourceAntsIdxs[i_ant][0]]); -    assert(antfat <= antlat); -    assert(antfas <= antlas); -    for (int idx=1; idx<=SourceFWAntsIdxs[i_ant][0]; idx++) { -      if (DEBUG) -        cerr << "considering SourceFWAntsIdxs[" << i_ant << "][" << idx << "]: " << SourceFWAntsIdxs[i_ant][3*idx-2] << endl; -      if (!(SourceFWAntsAbsIdxs[i_ant][idx]<=maxdepth1 || maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1<=maxdepth2)) continue; -      int* fwblock = blockSource(SourceFWAntsIdxs[i_ant][3*idx-2],SourceFWAntsIdxs[i_ant][3*idx-2]); -      //bool aligned = (minTSpan(SourceFWAntsIdxs[i_ant][3*idx-2])!=MINIMUM_INIT); -      bool aligned = (fwblock[2]!=MINIMUM_INIT); -      bool Lcompute = true; bool Rcompute = true; -      if (DEBUG) { -        cerr << " aligned = " << aligned << endl; -        cerr << "  fwblock = " << fwblock[0] << "," << fwblock[1] << "," << fwblock[2] << "," << fwblock[3] << endl; -        cerr << "   antfas=" << antfas << ", antlas=" << antlas << ", antfat=" << antfat << ", antlat=" << antlat << endl; -      } -      if (aligned) { -        if (DEBUG) cerr << "laligned" << endl; -        if (antfat<fwblock[2]) { -          if (DEBUG) cerr << antfat << "<" << fwblock[2] << endl; -          Lcompute=false; -        } -      } else { -        if (DEBUG) cerr << "!laligned" << endl; -        if (antfas<fwblock[0] && fwblock[1] < antlas) Lcompute=false; -      } -      if (aligned) { -        if (DEBUG) cerr << "raligned" << endl; -        if (fwblock[3]<antlat) { -          if (DEBUG) cerr << fwblock[3] << "<" << antlat << endl; -          Rcompute=false; -        } -      } else { -        if (DEBUG) cerr << "!raligned" << endl; -        if (fwblock[1]<antlas && fwblock[1] < antlas) Rcompute=false; -      } -      if (!Lcompute && !Rcompute) continue; -      if (!aligned) { -        OrientationSource(SourceFWAntsIdxs[i_ant][3*idx-2],&oril,&orir,Lcompute,Rcompute); -      } else { -        OrientationTarget(fwblock[2],fwblock[3],&oril,&orir,Lcompute,Rcompute); -      } -      if (DEBUG) cerr << "oril = " << oril << ", orir = " << orir << endl; -      bool isBonus = false; -      if (Lcompute) { -        if ((aligned && fwblock[3]<=fat) || -            (!aligned && SourceFWAntsIdxs[i_ant][3*idx-2]<=fas)) isBonus = true; -        if (SourceFWAntsAbsIdxs[i_ant][idx]<=maxdepth1) { -          ostringstream nusource; -          nusource << TD::Convert(SourceFWAntsIdxs[i_ant][3*idx-1]) << "/" << SourceFWAntsAbsIdxs[i_ant][idx]; -          ScoreOrientationLeftBackward(table,oril,TD::Convert(nusource.str()),SourceFWAntsIdxs[i_ant][3*idx], -                             isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -        } -        if (maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1<=maxdepth2) { -          ostringstream nusource; -          nusource << TD::Convert(SourceFWAntsIdxs[i_ant][3*idx-1]) << "/" << (-1*(maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1)); -          ScoreOrientationLeftBackward(table,oril,TD::Convert(nusource.str()),SourceFWAntsIdxs[i_ant][3*idx], -                             isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -        } -      } -      isBonus = false; -      if (Rcompute) { -        if ((aligned && lat<=fwblock[2]) || -            (!aligned && las<=SourceFWAntsIdxs[i_ant][3*idx-2]))isBonus = true; -        if (SourceFWAntsAbsIdxs[i_ant][idx]<=maxdepth1) { -          ostringstream nusource; -          nusource << TD::Convert(SourceFWAntsIdxs[i_ant][3*idx-1]) << "/" << SourceFWAntsAbsIdxs[i_ant][idx]; -          ScoreOrientationRightBackward(table,orir,TD::Convert(nusource.str()),SourceFWAntsIdxs[i_ant][3*idx], -                              isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -        } -        if (maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1<=maxdepth2) { -          ostringstream nusource; -          nusource << TD::Convert(SourceFWAntsIdxs[i_ant][3*idx-1]) << "/" << (-1*(maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1)); -          ScoreOrientationRightBackward(table,orir,TD::Convert(nusource.str()),SourceFWAntsIdxs[i_ant][3*idx], -                              isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -        } -      } -      delete fwblock; -    } -  } -} - - -void Alignment::computeOrientationSourceBackward(const CountTable& table, double *cost, double *bonus,  -                                                 double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus) { -  if (DEBUG) cerr << "computeOrientationSourceBackward" << endl; -  int oril, orir; -  for (int idx=1; idx<=SourceFWRuleIdxs[0]; idx++) { -    if (DEBUG) cerr << "considering SourceFWRuleIdxs[" << idx << "]: " << SourceFWRuleIdxs[3*idx-2] << endl; -    int* fwblock = blockSource(SourceFWRuleIdxs[3*idx-2],SourceFWRuleIdxs[3*idx-2]); -    bool aligned = (fwblock[2]!=MINIMUM_INIT); -    if (aligned) { -      OrientationTarget(fwblock[2],fwblock[3],&oril,&orir); -    } else { -      OrientationSource(SourceFWRuleIdxs[3*idx-2],&oril,&orir); -    } -    if (DEBUG) cerr << "oril = " << oril << ", orir = " << orir << endl; -    bool isBonus = false; // fas -> first aligned source word, las -> last aligned source word -    if ((aligned && fwblock[2]<=fat)|| -        (!aligned && SourceFWRuleIdxs[3*idx-2]<=fas)) isBonus=true; -    ScoreOrientationLeftBackward(table,oril,SourceFWRuleIdxs[3*idx-1],SourceFWRuleIdxs[3*idx], -                         isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -    isBonus = false; -    if ((aligned && lat<=fwblock[3])|| -        (!aligned && las<=SourceFWRuleIdxs[3*idx-2])) isBonus=true; -    ScoreOrientationRightBackward(table,orir,SourceFWRuleIdxs[3*idx-1],SourceFWRuleIdxs[3*idx], -                          isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -    delete fwblock; -  } -  for (int i_ant=0; i_ant<_Arity; i_ant++) { -    // antfas -> first aligned source word antecedent-wise -    // antlas -> last aligned source word antecedent-wise -    int antfat = firstTargetAligned(TargetAntsIdxs[i_ant][1]); -    int antlat = lastTargetAligned(TargetAntsIdxs[i_ant][TargetAntsIdxs[i_ant][0]]); -    int antfas = firstSourceAligned(SourceAntsIdxs[i_ant][1]); -    int antlas = lastSourceAligned(SourceAntsIdxs[i_ant][SourceAntsIdxs[i_ant][0]]); -    assert(antfat <= antlat); -    assert(antfas <= antlas); -    for (int idx=1; idx<=SourceFWAntsIdxs[i_ant][0]; idx++) { -      if (DEBUG) -        cerr << "considering SourceFWAntsIdxs[" << i_ant << "][" << idx << "]: " << SourceFWAntsIdxs[i_ant][3*idx-2] << endl; -      int* fwblock = blockSource(SourceFWAntsIdxs[i_ant][3*idx-2],SourceFWAntsIdxs[i_ant][3*idx-2]); -      //bool aligned = (minTSpan(SourceFWAntsIdxs[i_ant][3*idx-2])!=MINIMUM_INIT); -      bool aligned = (fwblock[2]!=MINIMUM_INIT); -      bool Lcompute = true; bool Rcompute = true; -      if (DEBUG) { -        cerr << " aligned = " << aligned << endl; -        cerr << "  fwblock = " << fwblock[0] << "," << fwblock[1] << "," << fwblock[2] << "," << fwblock[3] << endl; -        cerr << "   antfas=" << antfas << ", antlas=" << antlas << ", antfat=" << antfat << ", antlat=" << antlat << endl;  -      } -      if (aligned) { -        if (DEBUG) cerr << "laligned" << endl; -        if (antfat<fwblock[2]) {  -          if (DEBUG) cerr << antfat << "<" << fwblock[2] << endl; -          Lcompute=false; -        } -      } else { -        if (DEBUG) cerr << "!laligned" << endl; -        if (antfas<fwblock[0] && fwblock[1] < antlas) Lcompute=false; -      } -      if (aligned) {  -        if (DEBUG) cerr << "raligned" << endl; -        if (fwblock[3]<antlat) {  -          if (DEBUG) cerr << fwblock[3] << "<" << antlat << endl; -          Rcompute=false; -        } -      } else { -        if (DEBUG) cerr << "!raligned" << endl; -        if (fwblock[1]<antlas && fwblock[1] < antlas) Rcompute=false; -      } -      if (!Lcompute && !Rcompute) continue; -      if (!aligned) { -        OrientationSource(SourceFWAntsIdxs[i_ant][3*idx-2],&oril,&orir,Lcompute,Rcompute); -      } else { -        OrientationTarget(fwblock[2],fwblock[3],&oril,&orir,Lcompute,Rcompute); -      } -      if (DEBUG) cerr << "oril = " << oril << ", orir = " << orir << endl; -      bool isBonus = false; -      if (Lcompute) { -        if ((aligned && fwblock[3]<=fat) || -            (!aligned && SourceFWAntsIdxs[i_ant][3*idx-2]<=fas)) isBonus = true; -        ScoreOrientationLeftBackward(table,oril,SourceFWAntsIdxs[i_ant][3*idx-1],SourceFWAntsIdxs[i_ant][3*idx], -                             isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -      } -      isBonus = false; -      if (Rcompute) { -        if ((aligned && lat<=fwblock[2]) || -            (!aligned && las<=SourceFWAntsIdxs[i_ant][3*idx-2]))isBonus = true; -        ScoreOrientationRightBackward(table,orir,SourceFWAntsIdxs[i_ant][3*idx-1],SourceFWAntsIdxs[i_ant][3*idx], -                              isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -      } -      delete fwblock; -    } -  } -} - -void Alignment::computeOrientationSourcePos(const CountTable& table, double *cost, double *bonus, -                double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, int maxfwidx, int maxdepth1, int maxdepth2) { -  // This implementation is actually really bad, not reusing codes at all -  if (DEBUG) cerr << "computeOrientationSourcePos(maxfwidx=" << maxfwidx << ",maxdepth=" << maxdepth1 << "," << maxdepth2 << ")" << endl; -  if (maxdepth1+maxdepth2==0) return; -  int oril, orir; -  ostringstream oss; -  WordID sourceID; -  for (int idx=1; idx<=SourceFWRuleIdxs[0]; idx++) { -    if (DEBUG) cerr << "considering SourceFWRuleIdxs[" << idx << "]: " << SourceFWRuleIdxs[3*idx-2] << endl; -    //if (!((SourceFWRuleAbsIdxs[idx]<=maxdepth1) || (maxfwidx-SourceFWRuleAbsIdxs[idx]+1<=maxdepth2))) continue;  -    string source = TD::Convert(SourceFWRuleIdxs[3*idx-1]); -    OrientationSource(SourceFWRuleIdxs[3*idx-2],&oril,&orir); -    bool isBonus = false; // fas -> first aligned source word, las -> last aligned source word -    if (SourceFWRuleIdxs[3*idx-2]<=fas) isBonus=true; -    if (!isBonus) // this is unnecessary because fas <= las assertion -      if (minTSpan(SourceFWRuleIdxs[3*idx-2])==MINIMUM_INIT && las<=SourceFWRuleIdxs[3*idx-2]) isBonus=true; -    if (maxdepth1>0) { -      oss << source << "/"; -      if (SourceFWRuleAbsIdxs[idx]<=maxdepth1)  -        oss << SourceFWRuleAbsIdxs[idx]; -      else  -        oss << "X"; -      sourceID = TD::Convert(oss.str()); -      oss.str(""); -      ScoreOrientationLeft(table,oril,sourceID,SourceFWRuleIdxs[3*idx], -                         isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -    } -    if (maxdepth2>0) { -      oss << source << "/"; -      if (maxfwidx-SourceFWRuleAbsIdxs[idx]+1<=maxdepth2) -        oss << ((maxfwidx-SourceFWRuleAbsIdxs[idx]+1)*-1); -      else  -        oss << "X"; -      sourceID = TD::Convert(oss.str()); -      oss.str(""); -      ScoreOrientationLeft(table,oril,sourceID,SourceFWRuleIdxs[3*idx], -                         isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -    }  -    isBonus = false; -    if (las<=SourceFWRuleIdxs[3*idx-2]) isBonus=true; -    if (!isBonus) // this is unnecessary becuase fas <= las assertion -      if (minTSpan(SourceFWRuleIdxs[3*idx-2])==MINIMUM_INIT && SourceFWRuleIdxs[3*idx-2]<=fas) isBonus=true; -    if (maxdepth1>0) { -      oss << source << "/"; -      if (SourceFWRuleAbsIdxs[idx]<=maxdepth1) -        oss << SourceFWRuleAbsIdxs[idx]; -      else -        oss << "X"; -      sourceID = TD::Convert(oss.str()); -      oss.str(""); -      ScoreOrientationRight(table,orir,sourceID,SourceFWRuleIdxs[3*idx], -                          isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -    } -    if (maxdepth2>0) { -      oss << source << "/"; -      if (maxfwidx-SourceFWRuleAbsIdxs[idx]+1<=maxdepth2) -        oss << ((maxfwidx-SourceFWRuleAbsIdxs[idx]+1)*-1); -      else -        oss << "X"; -      sourceID = TD::Convert(oss.str()); -      oss.str(""); -      ScoreOrientationRight(table,orir,sourceID,SourceFWRuleIdxs[3*idx], -                          isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -    } - -  } -  for (int i_ant=0; i_ant<_Arity; i_ant++) { -    for (int idx=1; idx<=SourceFWAntsIdxs[i_ant][0]; idx++) { -      if (DEBUG) -        cerr << "considering SourceFWAntsIdxs[" << i_ant << "][" << idx << "]: " << SourceFWAntsIdxs[i_ant][3*idx-2] << endl; -      //if (!((SourceFWAntsAbsIdxs[i_ant][idx]<=maxdepth1)||(maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1<=maxdepth2))) continue; -      // antfas -> first aligned source word antecedent-wise -      // antlas -> last aligned source word antecedent-wise -      int antfas = firstSourceAligned(SourceAntsIdxs[i_ant][1]); -      int antlas = lastSourceAligned(SourceAntsIdxs[i_ant][SourceAntsIdxs[i_ant][0]]); -      if (DEBUG) cerr << " SourceFWAntsAbsIdxs[i_ant][3*idx-1]=" << SourceFWAntsAbsIdxs[i_ant][3*idx-1] << endl; -      string source = TD::Convert(SourceFWAntsIdxs[i_ant][3*idx-1]);  -      assert(antfas <= antlas); -      bool aligned = (minTSpan(SourceFWAntsIdxs[i_ant][3*idx-2])!=MINIMUM_INIT); -      bool Lcompute = true;bool Rcompute = true; -      if ((aligned && antfas<SourceFWAntsIdxs[i_ant][3*idx-2]) || -          (!aligned && antfas < SourceFWAntsIdxs[i_ant][3*idx-2] && SourceFWAntsIdxs[i_ant][3*idx-2] < antlas)) -          Lcompute=false; -      if ((aligned && SourceFWAntsIdxs[i_ant][3*idx-2]<antlas) || -          (!aligned && antfas < SourceFWAntsIdxs[i_ant][3*idx-2] && SourceFWAntsIdxs[i_ant][3*idx-2] < antlas)) -          Rcompute=false; -      if (!Lcompute && !Rcompute) continue; -      OrientationSource(SourceFWAntsIdxs[i_ant][3*idx-2],&oril,&orir,Lcompute, Rcompute); -      bool isBonus = false; -      if (Lcompute) { -        if (SourceFWAntsIdxs[i_ant][3*idx-2]<=fas) isBonus = true; -        //if (!isBonus)  // this is unnecessary -        //  if (!aligned && las<=SourceFWAntsIdxs[i_ant][3*idx-2]) isBonus=true; -        if (maxdepth1>0) { -          oss << source << "/"; -          if (SourceFWAntsAbsIdxs[i_ant][idx]<=maxdepth1) -            oss << SourceFWAntsAbsIdxs[i_ant][idx]; -          else -            oss << "X"; -          sourceID = TD::Convert(oss.str()); -          oss.str(""); -          ScoreOrientationLeft(table,oril,sourceID,SourceFWAntsIdxs[i_ant][3*idx], -                             isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -        } -        if (maxdepth2>0) { -          oss << source << "/"; -          if (maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1<=maxdepth2) -            oss << ((maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1)*-1); -          else -            oss << "X"; -          sourceID = TD::Convert(oss.str()); -          oss.str(""); -          ScoreOrientationLeft(table,oril,sourceID,SourceFWAntsIdxs[i_ant][3*idx], -                             isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -        } -      } -      isBonus = false; -      if (Rcompute) { -        if (las<=SourceFWAntsIdxs[i_ant][3*idx-2]) isBonus = true; -        //if (!isBonus) // this is unnecessary -        //  if (!aligned && SourceFWAntsIdxs[i_ant][3*idx-2]<=fas) isBonus=true; -        if (maxdepth1>0) { -          oss << source << "/"; -          if (SourceFWAntsAbsIdxs[i_ant][idx]<=maxdepth1) -            oss << SourceFWAntsAbsIdxs[i_ant][idx]; -          else -            oss << "X"; -          sourceID = TD::Convert(oss.str()); -          oss.str(""); -          ScoreOrientationRight(table,orir,sourceID,SourceFWAntsIdxs[i_ant][3*idx], -                              isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -        }	 -       if (maxdepth2>0) { -          oss << source << "/"; -          if (maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1<=maxdepth2) -            oss << ((maxfwidx-SourceFWAntsAbsIdxs[i_ant][idx]+1)*-1); -          else -            oss << "X"; -          sourceID = TD::Convert(oss.str()); -          oss.str(""); -          ScoreOrientationRight(table,orir,sourceID,SourceFWAntsIdxs[i_ant][3*idx], -                              isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -        }	 -      } -    } -  } -} - -void Alignment::computeOrientationSource(const CountTable& table, double *cost, double *bonus,  -                                         double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus) { -// a bit complex due to imperfect state (TO DO!!!) -// 1. there are cases where function word alignments come from antecedents, which orientation  -// (either its left or its right) has been computed earlier.    -// 2. some orientation will go as bonus -  if (DEBUG) cerr << "computeOrientationSource" << endl; -  int oril, orir; -  for (int idx=1; idx<=SourceFWRuleIdxs[0]; idx++) { -    if (DEBUG) cerr << "considering SourceFWRuleIdxs[" << idx << "]: " << SourceFWRuleIdxs[3*idx-2] << endl;  -    OrientationSource(SourceFWRuleIdxs[3*idx-2],&oril,&orir); -    bool isBonus = false; // fas -> first aligned source word, las -> last aligned source word -    if (SourceFWRuleIdxs[3*idx-2]<=fas) isBonus=true; -    if (!isBonus) // this is unnecessary because fas <= las assertion  -      if (minTSpan(SourceFWRuleIdxs[3*idx-2])==MINIMUM_INIT && las<=SourceFWRuleIdxs[3*idx-2]) isBonus=true; -    ScoreOrientationLeft(table,oril,SourceFWRuleIdxs[3*idx-1],SourceFWRuleIdxs[3*idx], -                         isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris);  -    isBonus = false; -    if (las<=SourceFWRuleIdxs[3*idx-2]) isBonus=true; -    if (!isBonus) // this is unnecessary becuase fas <= las assertion -      if (minTSpan(SourceFWRuleIdxs[3*idx-2])==MINIMUM_INIT && SourceFWRuleIdxs[3*idx-2]<=fas) isBonus=true; -    ScoreOrientationRight(table,orir,SourceFWRuleIdxs[3*idx-1],SourceFWRuleIdxs[3*idx], -                          isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -  } -  for (int i_ant=0; i_ant<_Arity; i_ant++) { -    for (int idx=1; idx<=SourceFWAntsIdxs[i_ant][0]; idx++) { -      if (DEBUG) -        cerr << "considering SourceFWAntsIdxs[" << i_ant << "][" << idx << "]: " << SourceFWAntsIdxs[i_ant][3*idx-2] << endl; -      // antfas -> first aligned source word antecedent-wise -      // antlas -> last aligned source word antecedent-wise -      int antfas = firstSourceAligned(SourceAntsIdxs[i_ant][1]); -      int antlas = lastSourceAligned(SourceAntsIdxs[i_ant][SourceAntsIdxs[i_ant][0]]); -      assert(antfas <= antlas); -      bool aligned = (minTSpan(SourceFWAntsIdxs[i_ant][3*idx-2])!=MINIMUM_INIT); -      bool Lcompute = true;bool Rcompute = true; -      if ((aligned && antfas<SourceFWAntsIdxs[i_ant][3*idx-2]) || -          (!aligned && antfas < SourceFWAntsIdxs[i_ant][3*idx-2] && SourceFWAntsIdxs[i_ant][3*idx-2] < antlas))  -          Lcompute=false;  -      if ((aligned && SourceFWAntsIdxs[i_ant][3*idx-2]<antlas) || -          (!aligned && antfas < SourceFWAntsIdxs[i_ant][3*idx-2] && SourceFWAntsIdxs[i_ant][3*idx-2] < antlas))  -          Rcompute=false; -      if (!Lcompute && !Rcompute) continue; -      OrientationSource(SourceFWAntsIdxs[i_ant][3*idx-2],&oril,&orir,Lcompute, Rcompute); -      bool isBonus = false; -      if (Lcompute) { -        if (SourceFWAntsIdxs[i_ant][3*idx-2]<=fas) isBonus = true; -        //if (!isBonus)  // this is unnecessary -        //  if (!aligned && las<=SourceFWAntsIdxs[i_ant][3*idx-2]) isBonus=true; -        ScoreOrientationLeft(table,oril,SourceFWAntsIdxs[i_ant][3*idx-1],SourceFWAntsIdxs[i_ant][3*idx], -                             isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris);  -      } -      isBonus = false; -      if (Rcompute) { -        if (las<=SourceFWAntsIdxs[i_ant][3*idx-2]) isBonus = true; -        //if (!isBonus) // this is unnecessary -        //  if (!aligned && SourceFWAntsIdxs[i_ant][3*idx-2]<=fas) isBonus=true; -        ScoreOrientationRight(table,orir,SourceFWAntsIdxs[i_ant][3*idx-1],SourceFWAntsIdxs[i_ant][3*idx], -                              isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -      } -    } -  } -} - -void Alignment::computeOrientationSourceGen(const CountTable& table, double *cost, double *bonus, -                                         double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, const map<WordID,WordID>& tags) { -  if (DEBUG) cerr << "computeOrientationSourceGen" << endl; -  int oril, orir; -  for (int idx=1; idx<=SourceFWRuleIdxs[0]; idx++) { -    if (DEBUG) cerr << "considering SourceFWRuleIdxs[" << idx << "]: " << SourceFWRuleIdxs[3*idx-2] << endl; -    OrientationSource(SourceFWRuleIdxs[3*idx-2],&oril,&orir); -    bool isBonus = false; // fas -> first aligned source word, las -> last aligned source word -    if (SourceFWRuleIdxs[3*idx-2]<=fas) isBonus=true; -    if (!isBonus) // this is unnecessary because fas <= las assertion  -      if (minTSpan(SourceFWRuleIdxs[3*idx-2])==MINIMUM_INIT && las<=SourceFWRuleIdxs[3*idx-2]) isBonus=true; -    ScoreOrientationLeft(table,oril,generalize(SourceFWRuleIdxs[3*idx-1],tags),SourceFWRuleIdxs[3*idx], -                         isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -    isBonus = false; -    if (las<=SourceFWRuleIdxs[3*idx-2]) isBonus=true; -    if (!isBonus) // this is unnecessary becuase fas <= las assertion -      if (minTSpan(SourceFWRuleIdxs[3*idx-2])==MINIMUM_INIT && SourceFWRuleIdxs[3*idx-2]<=fas) isBonus=true; -    ScoreOrientationRight(table,orir,generalize(SourceFWRuleIdxs[3*idx-1],tags),SourceFWRuleIdxs[3*idx], -                          isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -  } -  for (int i_ant=0; i_ant<_Arity; i_ant++) { -    for (int idx=1; idx<=SourceFWAntsIdxs[i_ant][0]; idx++) { -      if (DEBUG) -        cerr << "considering SourceFWAntsIdxs[" << i_ant << "][" << idx << "]: " << SourceFWAntsIdxs[i_ant][3*idx-2] << endl; -      // antfas -> first aligned source word antecedent-wise -      // antlas -> last aligned source word antecedent-wise -      int antfas = firstSourceAligned(SourceAntsIdxs[i_ant][1]); -      int antlas = lastSourceAligned(SourceAntsIdxs[i_ant][SourceAntsIdxs[i_ant][0]]); -      assert(antfas <= antlas); -      bool aligned = (minTSpan(SourceFWAntsIdxs[i_ant][3*idx-2])!=MINIMUM_INIT); -      bool Lcompute = true;bool Rcompute = true; -      if ((aligned && antfas<SourceFWAntsIdxs[i_ant][3*idx-2]) || -          (!aligned && antfas < SourceFWAntsIdxs[i_ant][3*idx-2] && SourceFWAntsIdxs[i_ant][3*idx-2] < antlas)) -          Lcompute=false; -      if ((aligned && SourceFWAntsIdxs[i_ant][3*idx-2]<antlas) || -          (!aligned && antfas < SourceFWAntsIdxs[i_ant][3*idx-2] && SourceFWAntsIdxs[i_ant][3*idx-2] < antlas)) -          Rcompute=false; -      if (!Lcompute && !Rcompute) continue; -      OrientationSource(SourceFWAntsIdxs[i_ant][3*idx-2],&oril,&orir,Lcompute, Rcompute); -      bool isBonus = false; -      if (Lcompute) { -        if (SourceFWAntsIdxs[i_ant][3*idx-2]<=fas) isBonus = true; -        //if (!isBonus)  // this is unnecessary -       //  if (!aligned && las<=SourceFWAntsIdxs[i_ant][3*idx-2]) isBonus=true; -        ScoreOrientationLeft(table,oril,generalize(SourceFWAntsIdxs[i_ant][3*idx-1],tags),SourceFWAntsIdxs[i_ant][3*idx], -                             isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -      } -      isBonus = false; -      if (Rcompute) { -        if (las<=SourceFWAntsIdxs[i_ant][3*idx-2]) isBonus = true; -        //if (!isBonus) // this is unnecessary -        //  if (!aligned && SourceFWAntsIdxs[i_ant][3*idx-2]<=fas) isBonus=true; -        ScoreOrientationRight(table,orir,generalize(SourceFWAntsIdxs[i_ant][3*idx-1],tags),SourceFWAntsIdxs[i_ant][3*idx], -                              isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_oris,beta_oris); -      } -    } -  } -} -void Alignment::computeOrientationTarget(const CountTable& table, double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus) { -  if (DEBUG) cerr << "computeOrientationTarget" << endl; -  int oril, orir; -  for (int idx=1; idx<=TargetFWRuleIdxs[0]; idx++) { -    if (DEBUG) cerr << "considering TargetFWRuleIdxs[" << idx << "]: " << TargetFWRuleIdxs[3*idx-2] << endl; -    OrientationTarget(TargetFWRuleIdxs[3*idx-2],&oril,&orir); -    // the second and the third parameters of ScoreOrientationLeft must be e and f (not f and then e)  -    bool isBonus = false; -    if (TargetFWRuleIdxs[3*idx-2]<=fat) isBonus = true; -    if (!isBonus) -      if (minSSpan(TargetFWRuleIdxs[3*idx-2])==MINIMUM_INIT && lat<=TargetFWRuleIdxs[3*idx-2]) isBonus = true; -    ScoreOrientationLeft(table,oril,TargetFWRuleIdxs[3*idx-1],TargetFWRuleIdxs[3*idx], -                         isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_orit,beta_orit);  -    isBonus = false; -    if (lat<=TargetFWRuleIdxs[3*idx-2]) isBonus = true; -    if (!isBonus) -      if (minSSpan(TargetFWRuleIdxs[3*idx-2])==MINIMUM_INIT && TargetFWRuleIdxs[3*idx-2]<=fat) isBonus=true; -    ScoreOrientationRight(table,orir,TargetFWRuleIdxs[3*idx-1],TargetFWRuleIdxs[3*idx], -                          isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_orit,beta_orit); -  } - -  for (int i_ant=0; i_ant<_Arity; i_ant++) { -    for (int idx=1; idx<=TargetFWAntsIdxs[i_ant][0]; idx++) { -      if (DEBUG) cerr << "considering TargetFWAntsIdxs[" << i_ant << "][" << idx << "]: " << TargetFWAntsIdxs[i_ant][3*idx-2] << endl;  -      int antfat = firstTargetAligned(TargetAntsIdxs[i_ant][1]); -      int antlat = lastTargetAligned(TargetAntsIdxs[i_ant][TargetAntsIdxs[i_ant][0]]); -      int aligned = (minSSpan( TargetFWAntsIdxs[i_ant][3*idx-2])!=MINIMUM_INIT); -      bool Lcompute = true; bool Rcompute = true; -      if ((aligned && antfat<TargetFWAntsIdxs[i_ant][3*idx-2]) || -          (!aligned && antfat < TargetFWAntsIdxs[i_ant][3*idx-2] && TargetFWAntsIdxs[i_ant][3*idx-2] < antlat))  -          Lcompute=false; -      if ((aligned && TargetFWAntsIdxs[i_ant][3*idx-2]<antlat) || -          (!aligned && antfat < TargetFWAntsIdxs[i_ant][3*idx-2] && TargetFWAntsIdxs[i_ant][3*idx-2] < antlat))  -          Rcompute=false; -      if (!Lcompute && !Rcompute) continue; -      bool isBonus = false; -      OrientationTarget(TargetFWAntsIdxs[i_ant][3*idx-2],&oril,&orir, Lcompute, Rcompute); -      if (Lcompute) { -        if (TargetFWAntsIdxs[i_ant][3*idx-2]<=fat) isBonus=true; -        //if (!isBonus)  -        //  if (!aligned && lat<=TargetFWAntsIdxs[i_ant][3*idx-2]) isBonus=true; -        ScoreOrientationLeft(table,oril,TargetFWAntsIdxs[i_ant][3*idx-1],TargetFWAntsIdxs[i_ant][3*idx], -                             isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_orit,beta_orit);  -      } -      isBonus = false; -      if (Rcompute) { -        if (lat<=TargetFWAntsIdxs[i_ant][3*idx-2]) isBonus=true; -        if (!isBonus) -          //if (!aligned && TargetFWAntsIdxs[i_ant][3*idx-2]<=fat) isBonus=true; -        ScoreOrientationRight(table,orir,TargetFWAntsIdxs[i_ant][3*idx-1],TargetFWAntsIdxs[i_ant][3*idx], -                              isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_orit,beta_orit); -      } -    } -  } -} - -void Alignment::computeOrientationTargetBackward(const CountTable& table, double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus) { -  if (DEBUG) cerr << "computeOrientationTargetBackward" << endl; -  int oril, orir; -  for (int idx=1; idx<=TargetFWRuleIdxs[0]; idx++) { -    if (DEBUG) cerr << "considering TargetFWRuleIdxs[" << idx << "]: " << TargetFWRuleIdxs[3*idx-2] << endl; -    int* fwblock = blockSource(TargetFWRuleIdxs[3*idx-2],TargetFWRuleIdxs[3*idx-2]); -    bool aligned = (fwblock[0] == MINIMUM_INIT); -    if (aligned) { -      OrientationSource(fwblock[0],fwblock[1],&oril,&orir); -    } else { -      OrientationTarget(TargetFWRuleIdxs[3*idx-2],&oril,&orir); -    } -    delete fwblock; -    // the second and the third parameters of ScoreOrientationLeft must be e and f (not f and then e) -    bool isBonus = false; -    if (TargetFWRuleIdxs[3*idx-2]<=fat) isBonus = true; -    //if (!isBonus)  // unnecessary -      //if (minSSpan(TargetFWRuleIdxs[3*idx-2])==MINIMUM_INIT && lat<=TargetFWRuleIdxs[3*idx-2]) isBonus = true; -    ScoreOrientationLeftBackward(table,oril,TargetFWRuleIdxs[3*idx-1],TargetFWRuleIdxs[3*idx], -                         isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_orit,beta_orit); -    isBonus = false; -    if (lat<=TargetFWRuleIdxs[3*idx-2]) isBonus = true; -    //if (!isBonus) // unnecessary -      //if (minSSpan(TargetFWRuleIdxs[3*idx-2])==MINIMUM_INIT && TargetFWRuleIdxs[3*idx-2]<=fat) isBonus=true; -    ScoreOrientationRightBackward(table,orir,TargetFWRuleIdxs[3*idx-1],TargetFWRuleIdxs[3*idx], -                          isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_orit,beta_orit); -  } - -  for (int i_ant=0; i_ant<_Arity; i_ant++) { -    int antfat = firstTargetAligned(TargetAntsIdxs[i_ant][1]); -    int antlat = lastTargetAligned(TargetAntsIdxs[i_ant][TargetAntsIdxs[i_ant][0]]); -    int antfas = firstSourceAligned(SourceAntsIdxs[i_ant][1]); -    int antlas = lastSourceAligned(SourceAntsIdxs[i_ant][SourceAntsIdxs[i_ant][0]]); -    for (int idx=1; idx<=TargetFWAntsIdxs[i_ant][0]; idx++) { -      if (DEBUG) cerr << "considering TargetFWAntsIdxs[" << i_ant << "][" << idx << "]: " << TargetFWAntsIdxs[i_ant][3*idx-2] << endl; -      int* fwblock = blockTarget(TargetFWAntsIdxs[i_ant][3*idx-2],TargetFWAntsIdxs[i_ant][3*idx-2]); -      bool aligned = (fwblock[0]!=MINIMUM_INIT); -      //bool aligned = (minSSpan( TargetFWAntsIdxs[i_ant][3*idx-2])!=MINIMUM_INIT); -      bool Lcompute = true; bool Rcompute = true; -      if ((aligned && antfas<fwblock[0]) || -          (!aligned && antfat < fwblock[2])) -          Lcompute=false; -      if ((aligned && fwblock[0]<antlas) || -          (!aligned && fwblock[3] < antlat)) -          Rcompute=false; -      if (!Lcompute && !Rcompute) continue; -      bool isBonus = false; -      if (aligned) { -        OrientationSource(fwblock[0],fwblock[1],&oril,&orir,Lcompute,Rcompute); -      } else {  -        OrientationTarget(TargetFWAntsIdxs[i_ant][3*idx-2],&oril,&orir, Lcompute, Rcompute); -      } -      if (Lcompute) { -        if ((aligned && fwblock[1]<=fas) || -            (!aligned && fwblock[3]<=fat)) -            isBonus=true; -        //if (!isBonus) -        //  if (!aligned && lat<=TargetFWAntsIdxs[i_ant][3*idx-2]) isBonus=true; -        ScoreOrientationLeftBackward(table,oril,TargetFWAntsIdxs[i_ant][3*idx-1],TargetFWAntsIdxs[i_ant][3*idx], -                             isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_orit,beta_orit); -      } -      isBonus = false; -      if (Rcompute) { -        if ((aligned && las<=fwblock[0]) || -            (!aligned && lat<=fwblock[2])) -            isBonus=true; -        if (!isBonus) -          //if (!aligned && TargetFWAntsIdxs[i_ant][3*idx-2]<=fat) isBonus=true; -        ScoreOrientationRightBackward(table,orir,TargetFWAntsIdxs[i_ant][3*idx-1],TargetFWAntsIdxs[i_ant][3*idx], -                              isBonus,cost,bonus,bo1,bo1_bonus,bo2,bo2_bonus,alpha_orit,beta_orit); -      } -      delete fwblock; -    } -  } -} - -bool Alignment::MemberOf(int* FWIdxs, int pos1, int pos2) { -  for (int idx=2; idx<=FWIdxs[0]; idx++) {  -    if (FWIdxs[3*(idx-1)-2]==pos1 && FWIdxs[3*idx-2]==pos2) return true; -  } -  return false; -} - -void Alignment::computeDominanceSource(const CountTable& table, WordID lfw, WordID rfw,  -     double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus) { -  // no bonus yet -  if (DEBUG) cerr << "computeDominanceSource" << endl; -  if (DEBUG) cerr << " initial cost=" << *cost << ", initial bonus=" << *bonus << endl; -  for (int idx=2; idx<=SourceFWIdxs[0]; idx++) { -    if (DEBUG) {  -      cerr << "PrevSourceFWIdxs :" << SourceFWIdxs[3*(idx-1)-2] << "," << SourceFWIdxs[3*(idx-1)-1]  -           << "," << SourceFWIdxs[3*(idx-1)] << endl; -      cerr << "CurrSourceFWIdxs :" << SourceFWIdxs[3*(idx)-2] << "," << SourceFWIdxs[3*(idx)-1]  -           << "," << SourceFWIdxs[3*(idx)] << endl; -    } -    bool compute = true; -    for (int i_ant=0; i_ant<_Arity && compute; i_ant++) { -      if (MemberOf(SourceFWAntsIdxs[i_ant],SourceFWIdxs[3*(idx-1)-2],SourceFWIdxs[3*(idx)-2])) { -        //cerr << "Skipping, they have been calculated in the  " << (i_ant+1) << "-th branch" << endl; -        compute=false; -      } -    } -    if (compute) { -      int dom = DominanceSource(SourceFWIdxs[3*(idx-1)-2],SourceFWIdxs[3*idx-2]); -      if (DEBUG) cerr << "dom = " << dom << endl; -      ScoreDominance(table,dom,SourceFWIdxs[3*(idx-1)-1],SourceFWIdxs[3*idx-1],SourceFWIdxs[3*(idx-1)],SourceFWIdxs[3*idx], -              cost,bo1,bo2,false,alpha_doms,beta_doms); -      if (DEBUG) cerr << "cost now is " << *cost << endl; -    } -  } -  if (SourceFWIdxs[0]>0) { -    if (lfw>=0) { -      int dom = DominanceSource(0,SourceFWIdxs[1]); -      if (DEBUG) cerr << " --> lfw = " << lfw << "-" << TD::Convert(lfw) << endl; -      if (DEBUG) cerr << " --> rfw = " << rfw << "-" << TD::Convert(rfw) << endl; -      ScoreDominance(table,dom,lfw,SourceFWIdxs[2],lfw,SourceFWIdxs[3],bonus,bo1_bonus,bo2_bonus,true,alpha_doms,beta_doms); -    } -    if (rfw>=0) { -      int dom = DominanceSource(SourceFWIdxs[3*SourceFWIdxs[0]-2],_J-1); -      ScoreDominance(table,dom,SourceFWIdxs[3*SourceFWIdxs[0]-1],rfw,SourceFWIdxs[3*SourceFWIdxs[0]], -                     rfw,bonus,bo1_bonus,bo2_bonus,true,alpha_doms,beta_doms); -    } -  } -} - -void Alignment::computeDominanceSourcePos(const CountTable& table, WordID lfw, WordID rfw, -     double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, int maxfwidx, int maxdepth1, int maxdepth2) { -  if (DEBUG) cerr << "computeDominanceSourcePos" << endl; -  if (DEBUG) cerr << " initial cost=" << *cost << ", initial bonus=" << *bonus << endl; -  ostringstream oss; -  for (int idx=2; idx<=SourceFWIdxs[0]; idx++) { -    if (DEBUG) { -      cerr << "PrevSourceFWIdxs :" << SourceFWIdxs[3*(idx-1)-2] << "," << SourceFWIdxs[3*(idx-1)-1] -           << "," << SourceFWIdxs[3*(idx-1)] << endl; -      cerr << "CurrSourceFWIdxs :" << SourceFWIdxs[3*(idx)-2] << "," << SourceFWIdxs[3*(idx)-1] -           << "," << SourceFWIdxs[3*(idx)] << endl; -    } -    //if (!((SourceFWAbsIdxs[3*(idx-1)-2]<=maxdepth1 && SourceFWAbsIdxs[3*idx-2]<=maxdepth1) || -    //      (maxfwidx-SourceFWAbsIdxs[3*(idx-1)-2]+1<=maxdepth2 && maxfwidx-SourceFWAbsIdxs[3*idx-2]+1<=maxdepth2))) continue; -    bool compute = true; -    for (int i_ant=0; i_ant<_Arity && compute; i_ant++) { -      if (MemberOf(SourceFWAntsIdxs[i_ant],SourceFWIdxs[3*(idx-1)-2],SourceFWIdxs[3*(idx)-2])) { -        //cerr << "Skipping, they have been calculated in the  " << (i_ant+1) << "-th branch" << endl; -        compute=false; -      } -    } -    if (compute) { -      int dom = DominanceSource(SourceFWIdxs[3*(idx-1)-2],SourceFWIdxs[3*idx-2]); -      if (DEBUG) cerr << "dom = " << dom << endl; -      if (maxdepth1+maxdepth2>0) { -        string source1 = TD::Convert(SourceFWIdxs[3*(idx-1)-1]); -        string source2 = TD::Convert(SourceFWIdxs[3*(idx)-1]); -        if (maxdepth1>0) { -          oss << source1 << "/"; -          if (SourceFWAbsIdxs[3*(idx-1)-2]<=maxdepth1)  -            oss << SourceFWAbsIdxs[3*(idx-1)-2]; -          else  -            oss << "X"; -          WordID source1id = TD::Convert(oss.str()); -          oss.str(""); -          oss << source2 << "/"; -          if (SourceFWAbsIdxs[3*idx-2]<=maxdepth1) -            oss << SourceFWAbsIdxs[3*idx-2]; -          else -            oss << "X"; -          WordID source2id = TD::Convert(oss.str()); -          oss.str("");     -          ScoreDominance(table,dom,source1id,source2id,SourceFWIdxs[3*(idx-1)],SourceFWIdxs[3*idx], -              cost,bo1,bo2,false,alpha_doms,beta_doms); -        }   -        if (maxdepth2>0) { -          oss << source1 << "/"; -          if (maxfwidx-SourceFWAbsIdxs[3*(idx-1)-2]+1<=maxdepth2)  -            oss << ((maxfwidx-SourceFWAbsIdxs[3*(idx-1)-2]+1)*-1); -          else -            oss << "X"; -          WordID source1id = TD::Convert(oss.str()); -          oss.str(""); -          oss << source2 << "/"; -          if (maxfwidx-SourceFWAbsIdxs[3*idx-2]+1<=maxdepth2)  -            oss << ((maxfwidx-SourceFWAbsIdxs[3*(idx-1)-2]+1)*-1); -          else -            oss << "X"; -          WordID source2id = TD::Convert(oss.str()); -          oss.str(""); -          ScoreDominance(table,dom,source1id,source2id,SourceFWIdxs[3*(idx-1)],SourceFWIdxs[3*idx], -              cost,bo1,bo2,false,alpha_doms,beta_doms); -        } -      } -    } -  } -  if (SourceFWIdxs[0]>0) { -    if (lfw>=0) { -      int dom = DominanceSource(0,SourceFWIdxs[1]); -      string source1 = TD::Convert(lfw); -      string source2 = TD::Convert(SourceFWIdxs[2]); -      if (maxdepth1>0) { -        oss << source1 << "/"; -        if (SourceFWAbsIdxs[1]-1<=maxdepth1)  -          oss << (SourceFWAbsIdxs[1]-1); -        else -          oss << "X"; -        WordID source1id = TD::Convert(oss.str()); -        oss.str(""); -        oss << source2 << "/"; -        if (SourceFWAbsIdxs[1]<=maxdepth1) -          oss << SourceFWAbsIdxs[1]; -        else -          oss << "X"; -        WordID source2id = TD::Convert(oss.str()); -        oss.str(""); -        ScoreDominance(table,dom,source1id,source2id,lfw,SourceFWIdxs[3],bonus,bo1_bonus,bo2_bonus,true,alpha_doms,beta_doms); -      } -      if (maxdepth2>0) {  -        oss << source1 << "/"; -        if (maxfwidx-(SourceFWAbsIdxs[1]-1)+1<=maxdepth2)  -          oss << ((maxfwidx-(SourceFWAbsIdxs[1]-1)+1)*-1); -        else  -          oss << "X"; -        WordID source1id = TD::Convert(oss.str()); -        oss.str(""); -        oss << source2 << "/"; -        if (maxfwidx-SourceFWAbsIdxs[1]+1<=maxdepth2)  -          oss << ((maxfwidx-SourceFWAbsIdxs[1]+1)*-1); -        else  -          oss << "X"; -        WordID source2id = TD::Convert(oss.str()); -        oss.str(""); -        ScoreDominance(table,dom,source1id,source2id,lfw,SourceFWIdxs[3],bonus,bo1_bonus,bo2_bonus,true,alpha_doms,beta_doms); -      } -    } -    if (rfw>=0) { -      int dom = DominanceSource(SourceFWIdxs[3*SourceFWIdxs[0]-2],_J-1); -      string source1 = TD::Convert(SourceFWIdxs[3*SourceFWIdxs[0]-1]); -      string source2 = TD::Convert(rfw); -      if (maxdepth1>0) { -        oss << source1 << "/"; -        if (SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]<=maxdepth1) -          oss << SourceFWAbsIdxs[3*SourceFWIdxs[0]-2]; -        else -          oss << "X"; -        WordID source1id = TD::Convert(oss.str()); -        oss.str(""); -        oss << source2 << "/"; -        if (SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]+1<=maxdepth1) -          oss << (SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]+1); -        else  -          oss << "X"; -        WordID source2id = TD::Convert(oss.str()); -        ScoreDominance(table,dom,source1id,source2id,SourceFWIdxs[3*SourceFWIdxs[0]], -                     rfw,bonus,bo1_bonus,bo2_bonus,true,alpha_doms,beta_doms); -      } -      if (maxdepth2>0) { -        oss << source1 << "/"; -        if (maxfwidx-SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]+1<=maxdepth2)  -          oss << ((maxfwidx-SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]+1)*-1); -        else  -          oss << "X"; -        WordID source1id = TD::Convert(oss.str()); -        oss.str(""); -        oss << source2 << "/"; -        if (maxfwidx-(SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]+1)+1<=maxdepth2)  -          oss << ((maxfwidx-(SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]+1)+1)*-1); -        else  -          oss << "X"; -        WordID source2id = TD::Convert(oss.str()); -        oss.str(""); -        ScoreDominance(table,dom,source1id,source2id,SourceFWIdxs[3*SourceFWIdxs[0]], -                     rfw,bonus,bo1_bonus,bo2_bonus,true,alpha_doms,beta_doms); -      } -    } -  } -} - - -void Alignment::computeDominanceTarget(const CountTable& table, WordID lfw, WordID rfw, -     double *cost, double *bonus, double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus) { -   if (DEBUG) cerr << "computeDominanceTarget" << endl; -  for (int idx=2; idx<=TargetFWIdxs[0]; idx++) { -    if (DEBUG) cerr << "PrevTargetFWIdxs :" << TargetFWIdxs[3*(idx-1)-2] << "," << TargetFWIdxs[3*(idx-1)-1] << "," <<TargetFWIdxs[3*(idx-1)] << endl; -    if (DEBUG) cerr << "CurrTargetFWIdxs :" << TargetFWIdxs[3*(idx)-2] << "," << TargetFWIdxs[3*(idx)-1] << "," <<TargetFWIdxs[3*(idx)] << endl; -    bool compute = true; -    for (int i_ant=0; i_ant <_Arity && compute; i_ant++) { -      if (MemberOf(TargetFWAntsIdxs[i_ant],TargetFWIdxs[3*(idx-1)-2],TargetFWIdxs[3*idx-2])) { -        if (DEBUG) cerr << "Skipping, they have been calculated in the " << (i_ant+1) << "-th branch" << endl; -        compute = false; -      } -    } -    if (compute) { -      int dom = DominanceTarget(TargetFWIdxs[3*(idx-1)-2],TargetFWIdxs[3*idx-2]); -      //cerr << (3*(idx-1)) << "," << (3*idx) << "," << (3*(idx-1)-1) << "," << (3*idx-1) << endl; -      if (DEBUG) cerr << "dom target = " << dom << endl; -      ScoreDominance(table,dom,TargetFWIdxs[3*(idx-1)],TargetFWIdxs[3*idx],TargetFWIdxs[3*(idx-1)-1],TargetFWIdxs[3*idx-1], -              cost,bo1,bo2,false,alpha_domt,beta_domt); -    } -  } -  if (TargetFWIdxs[0]>0) { -    if (DEBUG) cerr << "backoff dominance " << endl; -    if (lfw>=0) { -      int dom = DominanceTarget(0,TargetFWIdxs[1]); -      if (DEBUG) cerr << "dom target (with left) = " << dom << endl; -      ScoreDominance(table,dom,lfw,lfw,TargetFWIdxs[2],TargetFWIdxs[3],bonus,bo1_bonus,bo2_bonus,true,alpha_domt,beta_domt); -    } -    if (rfw>=0) { -      int dom = DominanceTarget(TargetFWIdxs[3*TargetFWIdxs[0]-2],_I-1); -      if (DEBUG) cerr << "dom target (with right) = " << dom << endl; -      ScoreDominance(table,dom,TargetFWIdxs[3*TargetFWIdxs[0]-1],TargetFWIdxs[3*TargetFWIdxs[0]], -                     rfw,rfw,bonus,bo1_bonus,bo2_bonus,true,alpha_domt,beta_domt); -    } -  } - -  //cerr << "END of computeDominanceTarget" << endl; -} - -double Alignment::ScoreDominance(const CountTable& table, int dom, WordID source1, WordID source2, WordID target1, WordID target2) { -  if (DEBUG) { -     cerr << "ScoreDominance(source1=" << TD::Convert(source1) << ",source2=" << TD::Convert(source2) -          << ",target1=" << TD::Convert(target1) << ",target2=" << TD::Convert(target2) << ", dom=" << dom << endl; -  } -  string _source1 = TD::Convert(source1); -  string _source2 = TD::Convert(source2); -  string _source1idx; string _source2idx; -  if (table.mode==1) { -    _source1idx = _source1; _source2idx = _source2; -    _source1 = _source1idx.substr(0,_source1idx.find_last_of("/")); -    _source2 = _source2idx.substr(0,_source2idx.find_last_of("/")); -  } -  string _target1 = TD::Convert(target1); -  string _target2 = TD::Convert(target2); - -  double count = table.ultimate[dom]; -  double total = table.ultimate[4]; -  double prob = count/total; -  if (DEBUG) cerr << "level0 " << count << "/" << total << "=" << prob << endl; -  double alpha = 0.1; -   -  string key = _source1 + " " + _source2; -  WordID key_id = TD::Convert(key); -  map<WordID,int*>::const_iterator it = table.model.find(key_id);  -  bool stop = (it==table.model.end()); -  if (!stop) { -    stop = true; -    if (it->second[4]>=0) { -      count = it->second[dom] + alpha*prob; -      total = it->second[4]   + alpha; -      prob = count/total; -      if (DEBUG) cerr << "level1 " << count << "/" << total << "=" << prob << endl; -      stop = false; -    } -  }  -  if (stop) return prob; -  -  key = _source1 + " " + _source2 + " " + _target1 + " " + _target2;  -  key_id = TD::Convert(key);   -  it = table.model.find(key_id); -  stop = (it==table.model.end()); -  if (!stop) { -    stop = true; -    if (it->second[4]>=0) { -      count = it->second[dom] + alpha*prob; -      total = it->second[4]   + alpha; -      prob = count/total; -      if (DEBUG) cerr << "level2 " << count << "/" << total << "=" << prob << endl; -      stop = false; -    } -  }  -  -  if (table.mode!=1 || stop) return prob; -  key = _source1 + " " + _source2 + " " + _target1 + " " + _target2; -  key_id = TD::Convert(key); -  it = table.model.find(key_id); -  if (it!=table.model.end()) { -    if (it->second[4]>=0) { -      count = it->second[dom] + alpha*prob; -      total = it->second[4]   + alpha; -      if (DEBUG) cerr << "level3 " << count << "/" << total << "=" << prob << endl; -      prob = count/total; -    } -  } - -  return prob; -} - -void Alignment::ScoreDominance(const CountTable& table, int dom, WordID source1, WordID source2, WordID target1, WordID target2, double *cost, double *bo1, double *bo2, bool isBonus, double alpha2, double beta2) { -  if (DEBUG)  -    cerr << "ScoreDominance(source1=" << TD::Convert(source1) << ",source2=" << TD::Convert(source2)  -         << ",target1=" << TD::Convert(target1) << ",target2=" << TD::Convert(target2) << ",isBonus=" << isBonus << ", alpha2 = " << alpha2 << ", beta2 = " << beta2 << endl; -  if (DEBUG) cerr << "    BEFORE=" << *cost << endl; -  *cost += ScoreDominance(table,dom,source1,source2,target1,target2); -  if (DEBUG) cerr << "    AFTER=" << *cost << endl; -} - -WordID Alignment::F2EProjectionFromExternal(int idx, const vector<AlignmentPoint>& als, const string& delimiter) { -  if (DEBUG) { -    cerr << "F2EProjectionFromExternal=" << idx << endl; -    for (int i=0; i< als.size(); i++) cerr << "als[" << i << "]=" << als[i] << " "; -    cerr << endl; -  } -  vector<int> alignedTo; -  for (int i=0; i<als.size(); i++) { -    if (DEBUG) cerr << als[i] << " "; -    if (als[i].s_==idx)  -      alignedTo.push_back(als[i].t_); -  } -  if (DEBUG) { -    cerr << endl; -    cerr << "alignedTo = "; -    for (int i=0; i<alignedTo.size(); i++) cerr << alignedTo[i] << " "; -    cerr << endl; -  } -  if (alignedTo.size()==0) { -    if (DEBUG) cerr << "returns [NULL] : " << TD::Convert("NULL") << endl; -    return TD::Convert("NULL"); -  } else if (alignedTo.size()==1) { -    if (DEBUG) cerr << "returns [" << TD::Convert(_e[alignedTo[0]]) << "] : " << _e[alignedTo[0]] << endl; -    return _e[alignedTo[0]]; // if not aligned to many, why bother continuing -  } else { -    ostringstream projection; -    for (int i=0; i<alignedTo.size(); i++) { -      if (i>0) projection << delimiter; -      projection << TD::Convert(_e[alignedTo[i]]); -    } -    if (DEBUG) { -      cerr << "projection = " << projection.str() << endl; -      cerr << "returns = " << TD::Convert(projection.str()) << endl; -    } -    return TD::Convert(projection.str()); -  } -} - -WordID Alignment::E2FProjectionFromExternal(int idx, const vector<AlignmentPoint>& als, const string& delimiter) { -  vector<int> alignedTo; -  for (int i=0; i<als.size(); i++) -    if (als[i].t_==idx) alignedTo.push_back(als[i].s_); -  if (alignedTo.size()==0) { -    return TD::Convert("NULL"); -  } else if (alignedTo.size()==1) { -    return _f[alignedTo[0]]; // if not aligned to many, why bother continuing -  } else { -    ostringstream projection; -    for (int i=0; i<alignedTo.size(); i++) { -      if (i>0) projection << delimiter; -      projection << TD::Convert(_f[alignedTo[i]]); -    } -    return TD::Convert(projection.str()); -  } -} - - -WordID Alignment::F2EProjection(int idx, const string& delimiter) { -  if (DEBUG) cerr << "F2EProjection(" << idx << ")" << endl; -  int e = targetOf(idx); -  if (e<0) { -    if (DEBUG) cerr << "projection = NULL" << endl; -    return TD::Convert("NULL"); -  } else { -    if (targetOf(idx,e+1)<0) { -      if (DEBUG) cerr << "e-1=" << (e-1) << ", size=" << _e.size() << endl; -      return getE(e-1); // if not aligned to many, why bother continuing -    } -    ostringstream projection; -    bool firstTime = true; -    do { -      if (!firstTime) projection << delimiter;  -      projection << TD::Convert(_e[e-1]); // transform space -      firstTime = false; -      e = targetOf(idx,e+1); -      //if (DEBUG) cerr << "projection = " << projection.str() << endl; -    } while(e>=0); -    return TD::Convert(projection.str()); -  } -} - -WordID Alignment::E2FProjection(int idx, const string& delimiter) { -  //cerr << "E2FProjection(" << idx << ")" << endl; -  //cerr << "i" << endl; -  int f = sourceOf(idx); -  //cerr << "j, f=" << f << endl; -  if (f<0) { -    //cerr << "projection = NULL" << endl; -    return TD::Convert("NULL"); -  } else { -    if (sourceOf(idx,f+1)<0) return getF(f-1); -    bool firstTime = true; -    ostringstream projection(ostringstream::out); -    do { -      if (!firstTime) projection << delimiter; -      projection << TD::Convert(_f[f-1]); //transform space -      firstTime = false; -      f = sourceOf(idx,f+1); -      //cerr << "projection = " << projection.str() << endl; -    } while(f>=0); -    return TD::Convert(projection.str()); -  } -} -void Alignment::computeBorderDominanceSource(const CountTable& table, double *cost, double *bonus, double *state_mono,  -        double *state_nonmono, TRule &rule, const std::vector<const void*>& ant_contexts, const map<WordID,int>& sfw) { -  // HACK: GOAL is assumed to always be "S" -  if (DEBUG) cerr << "computeBorderDominanceSource" << endl; -  std::vector<WordID> f = rule.f(); -  std::vector<WordID> e = rule.e(); -  int nt_index[f.size()]; -  int nt_count=0; -  for (int i=0; i<f.size(); i++) nt_index[i] = (f[i]<0)? ++nt_count : 0; -  if (DEBUG) { -    cerr << "f = "; -    for (int i=0; i<f.size(); i++) cerr << i << "." << "[" << f[i] << "] "; -    cerr << endl; -    cerr << "e = "; -    for (int i=0; i<e.size(); i++) cerr << i << "." << "[" << e[i] << "] "; -    cerr << endl; -  } -  bool flag[f.size()];  -  for (int idx=0; idx<f.size(); idx++) flag[idx]=false; -  //collect alignments -  vector<int> als; -  for (std::vector<AlignmentPoint>::const_iterator i = rule.als().begin(); i != rule.als().end(); ++i) { -    int s = i->s_; int t = i->t_; -    als.push_back(link(t,s)); -  } -  if (DEBUG) cerr << "rule.Arity=" << rule.Arity() << endl; -  if (rule.Arity()>0) { -    int ntc=0; -    for (int s=0; s<f.size(); s++) { -      if (f[s]<=0) { -        if (DEBUG) cerr << "f[s]=" << f[s] << "+" << s << " - "; -        for (int t=0; t<e.size(); t++) { -          if (e[t]==ntc) { -            if (DEBUG) cerr << "e[t]=" << e[t] << "+" << t <<endl; -            als.push_back(link(t,s)); -            ntc--;  break; -          } -        } -      }  -    } -  } -  if (DEBUG) { -    cerr << "unsorted alignments (nonterminals and terminals)" << endl; -    for (int i=0; i<als.size(); i++) -      cerr << source(als[i]) << "-" << target(als[i]) << " "; -    cerr << endl; -  } -  // sort alignments according to target -  std::sort(als.begin(),als.end()); -  if (DEBUG) { -    cerr << "sorted alignments (nonterminals and terminals)" << endl; -    for (int i=0; i<als.size(); i++)  -      cerr << source(als[i]) << "-" << target(als[i]) << " "; -    cerr << endl; -  } -  // 0 -> neither, 1 -> leftFirst, 2 -> rightFirst, 3 -> dontCare -  // ScoreDominance(const CountTable& table, int dom, WordID source1, WordID source2, WordID target1, WordID target2) -  int prevs = 0; -  for (int i=0; i<als.size(); i++) { -    int currs = target(als[i]); //int currt = source(als[i]); -    if (DEBUG) cerr << "prevs=" << prevs << ", currs=" << currs << endl << endl; -    if (currs<prevs) { -      if (DEBUG) cerr << "currs<prevs" << endl; -      for (int s = currs; s <= prevs; s++) { -        if (sfw.find(f[s])!=sfw.end()) { -          WordID target = F2EProjectionFromExternal(s,rule.a_,"_SEP_"); -          if (DEBUG) cerr<<"  f[s]="<<TD::Convert(f[s])<<" is a function word, target="<<TD::Convert(target)<<endl; -          //*cost += ScoreDominance(table,1,kSOS,f[s],kSOS,target) + ScoreDominance(table,2,f[s],kEOS,target,kEOS); -          *cost += ScoreDominance(table,1,kSOS,f[s],kUNK,kUNK) + ScoreDominance(table,2,f[s],kEOS,kUNK,kUNK); -          if (DEBUG) cerr << "  resulting cost="<< *cost << endl; -        } else if (f[s]<=0) { -          if (DEBUG) cerr << "  f[s]= is a nonterminal" << endl; -          const int* ants = reinterpret_cast<const int *>(ant_contexts[nt_index[s]-1]); -          *cost += Dwarf::IntegerToDouble(ants[51]); // 50->mono, 51->non-mono  -          if (DEBUG) cerr << "  adding "<< Dwarf::IntegerToDouble(ants[51]) << " into cost, resulting = " << *cost << endl; -        }  -        flag[s] = true; -      } -    } -    prevs = currs; -  }  -  if (DEBUG) cerr << "bonus and state matter" << endl; -  for (int s=0; s<rule.f().size(); s++) { -    if (!flag[s]) { -      if (sfw.find(f[s])!=sfw.end()) { -        WordID target = F2EProjectionFromExternal(s,rule.a_,"_SEP_"); -        if (DEBUG) cerr<<"  f[s]="<<TD::Convert(f[s])<<" is a function word, target="<<TD::Convert(target)<<endl; -        //double indbonus = ScoreDominance(table,3,kSOS,f[s],kSOS,target) + ScoreDominance(table,3,f[s],kEOS,target,kEOS); -        double indbonus = ScoreDominance(table,3,kSOS,f[s],kUNK,kUNK) + ScoreDominance(table,3,f[s],kEOS,kUNK,kUNK); -        *bonus += indbonus; -        *state_mono += indbonus; -        //*state_nonmono += ScoreDominance(table,1,kSOS,f[s],kSOS,target) + ScoreDominance(table,2,f[s],kEOS,target,kEOS); -        *state_nonmono += ScoreDominance(table,1,kSOS,f[s],kUNK,kUNK) + ScoreDominance(table,2,f[s],kEOS,kUNK,kUNK); -        if (DEBUG) cerr<<"  new bonus="<<*bonus<<", new state="<<*state_mono<<","<<*state_nonmono<<endl; -      } else if (f[s]<=0) { -        if (DEBUG) cerr << "  f[s]="<< f[s] <<" is a nonterminal" << endl; -        const int* ants = reinterpret_cast<const int *>(ant_contexts[nt_index[s]-1]); -        double indbonus = Dwarf::IntegerToDouble(ants[50]); -        *bonus += indbonus; -        *state_mono += indbonus; -        *state_nonmono += Dwarf::IntegerToDouble(ants[51]); -        if (DEBUG) cerr << "  propagating state=" << *state_mono <<","<< *state_nonmono<< endl; -      } -    } -  } -  if (DEBUG) cerr << "LHS:" << rule.GetLHS() << ":" << TD::Convert(rule.GetLHS()*-1) <<endl; -  if (rule.GetLHS()*-1==TD::Convert("S")) { -    *state_mono = 0; -    *state_nonmono = 0; -    for (int i=0; i<rule.Arity(); i++) {  -      const int* ants = reinterpret_cast<const int *>(ant_contexts[i]); -      *cost += Dwarf::IntegerToDouble(ants[50]); -    } -    *bonus = 0; -  } -  if (DEBUG) cerr << "-->>>> cost="<<*cost<<", bonus="<<*bonus<<", state_mono="<<*state_mono<<", state_nonmono="<<*state_nonmono<<endl; -} - -bool Alignment::prepare(TRule& rule, const std::vector<const void*>& ant_contexts, const map<WordID,int>& sfw, const map<WordID,int>& tfw,const Lattice& sourcelattice, int spanstart, int spanend) {   -  if (DEBUG) cerr << "===Rule===" << rule.AsString() << endl; -  _f = rule.f(); -  _e = rule.e(); -  _Arity = rule.Arity(); -  if (DEBUG) { -    cerr << "F: "; -    for (int idx=0; idx<_f.size(); idx++) cerr << _f[idx] << " "; -    cerr << endl; -    cerr << "F': "; -    for (int idx=0; idx<_f.size(); idx++)  -      if (_f[idx]>=0) { -        cerr << TD::Convert(_f[idx]) << " ";  -      } else { -        cerr << TD::Convert(_f[idx]*-1); -      } -    cerr << endl; -    cerr << "E: "; -    for (int idx=0; idx<_e.size(); idx++)  -      cerr << _e[idx] << " "; -    cerr << endl; -    cerr << "E': "; -    for (int idx=0; idx<_e.size(); idx++)  -      if (_e[idx]>0) { -        cerr << TD::Convert(_e[idx]) << " ";  -      } else { -        cerr << "[NT]" << " "; -      } -    cerr << endl; -  } - -  SourceFWRuleIdxs[0]=0; -  SourceFWRuleAbsIdxs[0]=0; -  for (int idx=1; idx<=_f.size(); idx++) { // in transformed space -    if (sfw.find(_f[idx-1])!=sfw.end()) { -      SourceFWRuleIdxs[0]++; -      SourceFWRuleAbsIdxs[++SourceFWRuleAbsIdxs[0]]=GetFWGlobalIdx(idx,sourcelattice,_f,spanstart,spanend,ant_contexts,sfw); -      SourceFWRuleIdxs[3*SourceFWRuleIdxs[0]-2]=idx; -      SourceFWRuleIdxs[3*SourceFWRuleIdxs[0]-1]=_f[idx-1]; -      SourceFWRuleIdxs[3*SourceFWRuleIdxs[0]]  =F2EProjectionFromExternal(idx-1,rule.a_,"_SEP_"); -    } -  } -  TargetFWRuleIdxs[0]=0; -  for (int idx=1; idx<=_e.size(); idx++) { // in transformed space -    if (tfw.find(_e[idx-1])!=tfw.end()) { -      TargetFWRuleIdxs[0]++; -      TargetFWRuleIdxs[3*TargetFWRuleIdxs[0]-2]=idx; -      TargetFWRuleIdxs[3*TargetFWRuleIdxs[0]-1]=E2FProjectionFromExternal(idx-1,rule.a_,"_SEP_"); -      TargetFWRuleIdxs[3*TargetFWRuleIdxs[0]]  =_e[idx-1]; -    } -  } - -  if (DEBUG) { -    cerr << "SourceFWRuleIdxs[" << SourceFWRuleIdxs[0] << "]:"; -    for (int idx=1; idx<=SourceFWRuleIdxs[0]; idx++) { -      cerr << " idx:" << SourceFWRuleIdxs[3*idx-2]; -      cerr << " absidx:" << SourceFWRuleAbsIdxs[idx]; -      cerr << " F:" << SourceFWRuleIdxs[3*idx-1]; -      cerr << " E:" << SourceFWRuleIdxs[3*idx]; -      cerr << "; "; -    } -    cerr << endl; -    cerr << "TargetFWRuleIdxs[" << TargetFWRuleIdxs[0] << "]:"; -    for (int idx=1; idx<=TargetFWRuleIdxs[0]; idx++) { -      cerr << " idx:" << TargetFWRuleIdxs[3*idx-2]; -      cerr << " F:" << TargetFWRuleIdxs[3*idx-1]; -      cerr << " E:" << TargetFWRuleIdxs[3*idx]; -    } -    cerr << endl; -  } -  if (SourceFWRuleIdxs[0]+TargetFWRuleIdxs[0]==0) { -    bool nofw = true; -    for (int i_ant=0; i_ant<_Arity && nofw; i_ant++) {  -      const int* ants = reinterpret_cast<const int *>(ant_contexts[i_ant]); -      if (ants[0]>=0||ants[3]>=0||ants[6]>=0||ants[9]>=0) nofw=false; -    }  -    if (nofw) return true; -  } -  //cerr << "clearing als first" << endl; -  clearAls(_J,_I); - -  if (DEBUG) cerr << "A["<< rule.a_.size() << "]: " ; -  RuleAl[0]=0; -  // add phrase start boundary -  RuleAl[0]++; RuleAl[RuleAl[0]*2-1]=0; RuleAl[RuleAl[0]*2]=0; -  if (DEBUG) cerr << RuleAl[RuleAl[0]*2-1] << "-" << RuleAl[RuleAl[0]*2] << " "; -  for (int idx=0; idx<rule.a_.size(); idx++) { -    RuleAl[0]++; -    RuleAl[RuleAl[0]*2-1]=rule.a_[idx].s_+1; -    RuleAl[RuleAl[0]*2]  =rule.a_[idx].t_+1; -    if (DEBUG) cerr << RuleAl[RuleAl[0]*2-1] << "-" << RuleAl[RuleAl[0]*2] << " "; -  } -  // add phrase end boundary -  RuleAl[0]++; RuleAl[RuleAl[0]*2-1]=_f.size()+1; RuleAl[RuleAl[0]*2]=_e.size()+1; -  if (DEBUG) cerr << RuleAl[RuleAl[0]*2-1] << "-" << RuleAl[RuleAl[0]*2] << " "; -  if (DEBUG) cerr << endl; - -  SourceRuleIdxs[0] = _f.size()+2; // +2 (phrase boundaries) -  TargetRuleIdxs[0] = _e.size()+2;  -  int ntidx=-1; -  for (int idx=0; idx<_f.size()+2; idx++) { // idx in transformed space -    SourceRuleIdxs[idx+1]=idx; -    if (0<idx && idx<=_f.size()) if (_f[idx-1]<0) SourceRuleIdxs[idx+1]=ntidx--; -  } -  for (int idx=0; idx<_e.size()+2; idx++) { -    TargetRuleIdxs[idx+1]=idx; -    if (0<idx && idx<=_e.size()) { -      //cerr << "_e[" <<(idx-1)<< "]=" << _e[idx-1] << endl; -      if (_e[idx-1]<=0) TargetRuleIdxs[idx+1]=_e[idx-1]-1; -    } -  } -  if (DEBUG) { -    cerr << "SourceRuleIdxs:"; -    for (int idx=0; idx<SourceRuleIdxs[0]+1; idx++)  -      cerr << " " << SourceRuleIdxs[idx]; -    cerr << endl; -    cerr << "TargetRuleIdxs:"; -    for (int idx=0; idx<TargetRuleIdxs[0]+1; idx++)  -      cerr << " " << TargetRuleIdxs[idx]; -    cerr << endl; -  } - -  // sloppy, the integrity of anstates is assumed -  // total = 50 bytes -  // first 3 ints for leftmost source function words (1 for index, 4 for source WordID and 4 for target WordI -  // second 3 for rightmost source function words -  // third 3 for leftmost target function words -  // fourth 3 for rightmost target function words -  // the next 1 int for the number of alignments -  // the remaining 37 ints for alignments (source then target) -  for (int i_ant=0; i_ant<_Arity; i_ant++) { -    const int* ants = reinterpret_cast<const int *>(ant_contexts[i_ant]); -    int span = ants[Dwarf::STATE_SIZE-1]; -    if (DEBUG) { -      cerr << "antcontexts[" << i_ant << "] "; -      for (int idx=0; idx<Dwarf::STATE_SIZE; idx++) cerr << idx << "." << ants[idx] << " "; -      cerr << endl; -      cerr << "i,j = " << source(ants[Dwarf::STATE_SIZE-1]) << "," << target(ants[Dwarf::STATE_SIZE-1]) << endl; -    } -    SourceFWAntsIdxs[i_ant][0]=0; -    SourceFWAntsAbsIdxs[i_ant][0]=0; -    if (ants[0]>=0) { -      // Given a span, give the index of the first function word -      int firstfwidx = GetFirstFWIdx(source(span),target(span),sourcelattice,sfw); -      if (DEBUG) cerr << "  firstfwidx = " << firstfwidx << endl; -      int fwcount = 0; -      if (ants[1]>=0) { // one function word -        SourceFWAntsIdxs[i_ant][0]++; SourceFWAntsIdxs[i_ant][1]=ants[0]; -        SourceFWAntsIdxs[i_ant][2]=ants[1]; SourceFWAntsIdxs[i_ant][3]=ants[2];  -        fwcount++; -      } else { // if ants[1] < 0 then compound fws -        //cerr << "ants[1]<0" << endl; -        istringstream ossf(TD::Convert(ants[1]*-1)); string ffw; -        istringstream osse(TD::Convert(ants[2])); string efw; //projection would be mostly NULL -        int delta=ants[0]; -        while (osse >> efw && ossf >> ffw) { -          SourceFWAntsIdxs[i_ant][0]++;  -          SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3-2]=ants[0]-(delta--); -          SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3-1]=TD::Convert(ffw);   -          SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3]  =TD::Convert(efw); -          fwcount++; -        } -      } -      if (DEBUG) cerr << " fwcount=" << fwcount << endl; -      SourceFWAntsAbsIdxs[i_ant][0]=fwcount; -      for (int i=1; i<=fwcount; i++) SourceFWAntsAbsIdxs[i_ant][i]=firstfwidx++; -    } -    if (ants[3]>=0) { -      int lastfwidx = GetLastFWIdx(source(span),target(span),sourcelattice,sfw); -      if (DEBUG) cerr << "  lastfwidx = " << lastfwidx << endl; -      int fwcount=0; -      if (ants[4]>=0) { -        fwcount++; -        SourceFWAntsIdxs[i_ant][0]++;  -        SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3-2]=ants[3];  -        SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3-1]=ants[4];  -        SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3]  =ants[5]; -      } else { // if ants[4] < 0 then compound fws -        //cerr << "ants[4]<0" << endl; -        istringstream ossf(TD::Convert(ants[4]*-1)); string ffw; -        istringstream osse(TD::Convert(ants[5]));    string efw; -        int delta=0; -        while (osse >> efw && ossf >> ffw) { -          fwcount++; -          SourceFWAntsIdxs[i_ant][0]++; -          SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3-2]=ants[3]+(delta++); -          SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3-1]=TD::Convert(ffw); -          SourceFWAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][0]*3]  =TD::Convert(efw); -        } -      } -      if (DEBUG) cerr << " fwcount=" << fwcount << endl; -      for (int i=1; i<=fwcount; i++) SourceFWAntsAbsIdxs[i_ant][SourceFWAntsAbsIdxs[i_ant][0]+i]=lastfwidx-fwcount+i; -      SourceFWAntsAbsIdxs[i_ant][0]+=fwcount; -    } -    TargetFWAntsIdxs[i_ant][0]=0; -    if (ants[6]>=0) { -      if (ants[8]>=0) { // check the e part  -        TargetFWAntsIdxs[i_ant][0]++;  -        TargetFWAntsIdxs[i_ant][1]=ants[6]; -        TargetFWAntsIdxs[i_ant][2]=ants[7];  -        TargetFWAntsIdxs[i_ant][3]=ants[8]; -      } else { // if ants[8] < 0 then compound fws -        //cerr << "ants[8]<0" << endl; -        //cerr << "ants[7]=" << TD::Convert(ants[7]) << endl; -        //cerr << "ants[8]=" << TD::Convert(ants[8]*-1) << endl; -        istringstream ossf(TD::Convert(ants[7]));    string ffw; -        istringstream osse(TD::Convert(ants[8]*-1)); string efw; -        int delta=ants[6]; -        while (osse >> efw && ossf >> ffw) { -          //cerr << "efw="<< efw << ",ffw=" << ffw << endl; -          TargetFWAntsIdxs[i_ant][0]++; -          TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3-2]=ants[6]-(delta--); -          TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3-1]=TD::Convert(ffw); -          TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3]  =TD::Convert(efw); -        } -      } -    } -    if (ants[9]>=0) { -      if (ants[11]>=0) { -        TargetFWAntsIdxs[i_ant][0]++;  -        TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3-2]=ants[9]; -        TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3-1]=ants[10]; -        TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3]  =ants[11]; -      } else { -        //cerr << "ants[11]<0" << endl; -        //cerr << "ants[10]=" << TD::Convert(ants[10]) << endl; -        //cerr << "ants[11]=" << TD::Convert(ants[11]*-1) << endl; -        istringstream ossf(TD::Convert(ants[10]));    string ffw; -        istringstream osse(TD::Convert(ants[11]*-1)); string efw; -        int delta = 0; -        while (osse >> efw && ossf >> ffw) { -          //cerr << "efw="<< efw << ",ffw=" << ffw << endl; -          TargetFWAntsIdxs[i_ant][0]++; -          TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3-2]=ants[9]+(delta++); -          TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3-1]=TD::Convert(ffw); -          TargetFWAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][0]*3]  =TD::Convert(efw); -        } -      } -    } -    AntsAl[i_ant][0]=ants[12];//number of alignments -    for (int idx=1; idx<=AntsAl[i_ant][0]; idx++) { -      AntsAl[i_ant][idx*2-1] = source(ants[12+idx]); -      AntsAl[i_ant][idx*2]   = target(ants[12+idx]); -    } -  } - -  for (int i_ant=0; i_ant<_Arity; i_ant++) { -    int length = AntsAl[i_ant][0]; -    int maxs = -1000; -    int maxt = -1000; -    for (int idx=0; idx<length; idx++) { -      if (maxs<AntsAl[i_ant][2*idx+1]) maxs = AntsAl[i_ant][2*idx+1]; -      if (maxt<AntsAl[i_ant][2*idx+2]) maxt = AntsAl[i_ant][2*idx+2]; -    } -    if (DEBUG) cerr << "SourceFWAntsIdxs[" <<i_ant<<"][0]=" << SourceFWAntsIdxs[i_ant][0] << endl; -    for (int idx=1; idx<=SourceFWAntsIdxs[i_ant][0]; idx++) { -      if (DEBUG) { -        cerr << "SourceFWAntsIdxs["<<i_ant<<"]["<<(3*idx-2)<<"]="<<SourceFWAntsIdxs[i_ant][3*idx-2]; -        cerr << ","<<SourceFWAntsIdxs[i_ant][3*idx-1]<<","<<SourceFWAntsIdxs[i_ant][3*idx]<<endl; -        cerr << "SourceFWAntsAbsIdxs["<<i_ant<<"]["<<idx<<"]="<<SourceFWAntsAbsIdxs[i_ant][idx] << endl; -      } -      if (maxs<SourceFWAntsIdxs[i_ant][3*idx-2]) maxs=SourceFWAntsIdxs[i_ant][3*idx-2]; -    } -    if (DEBUG) cerr << "TargetFWAntsIdxs[" <<i_ant<<"][0]=" << TargetFWAntsIdxs[i_ant][0] << endl; -    for (int idx=1; idx<=TargetFWAntsIdxs[i_ant][0]; idx++) { -      if (DEBUG) { -        cerr << "TargetFWAntsIdxs["<<i_ant<<"]["<<(3*idx-2)<<"]="<<TargetFWAntsIdxs[i_ant][3*idx-2]; -        cerr << ","<<TargetFWAntsIdxs[i_ant][3*idx-1]<<","<<TargetFWAntsIdxs[i_ant][3*idx]<<endl; -      } -      if (maxt<TargetFWAntsIdxs[i_ant][3*idx-2]) maxt=TargetFWAntsIdxs[i_ant][3*idx-2]; -    } -    SourceAntsIdxs[i_ant][0] = maxs+1; -    if (DEBUG) cerr << "SourceAntsIdxs[" << i_ant << "][0]=" <<SourceAntsIdxs[i_ant][0] << endl; -    for (int idx=0; idx<SourceAntsIdxs[i_ant][0]; idx++) SourceAntsIdxs[i_ant][idx+1]=idx; -    TargetAntsIdxs[i_ant][0] = maxt+1; -    if (DEBUG) cerr << "TargetAntsIdxs[" << i_ant << "][0]=" <<TargetAntsIdxs[i_ant][0] << endl; -    for (int idx=0; idx<TargetAntsIdxs[i_ant][0]; idx++) TargetAntsIdxs[i_ant][idx+1]=idx; -  } -  int TotalSource = SourceRuleIdxs[0] - _Arity; -  for (int idx=0; idx<_Arity; idx++) TotalSource += SourceAntsIdxs[idx][0]; -  int TotalTarget = TargetRuleIdxs[0] - _Arity; -  for (int idx=0; idx<_Arity; idx++) TotalTarget += TargetAntsIdxs[idx][0]; -  if (DEBUG) cerr << "TotalSource = "<< TotalSource << ", TotalTarget = "<< TotalTarget << endl; -  int curr = 0; -  for (int idx=1; idx<=SourceRuleIdxs[0]; idx++) { -    if (SourceRuleIdxs[idx]>=0) { -      SourceRuleIdxs[idx]=curr++; -    } else { -      int i_ant = SourceRuleIdxs[idx]*-1-1; -      if (DEBUG) cerr << "SourceAntsIdxs[" << i_ant << "]" << endl; -      for (int idx2=1; idx2<=SourceAntsIdxs[i_ant][0]; idx2++) { -        SourceAntsIdxs[i_ant][idx2]=curr++; -        if (DEBUG) cerr << SourceAntsIdxs[i_ant][idx2] << " "; -      } -      if (DEBUG) cerr << endl; -    } -  } -  if (DEBUG) { -    cerr << "SourceRuleIdxs" << endl; -    for (int idx=1; idx<=SourceRuleIdxs[0]; idx++) cerr << SourceRuleIdxs[idx] << " "; -    cerr << endl; -  } -  curr = 0; -  for (int idx=1; idx<=TargetRuleIdxs[0]; idx++) { -    if (TargetRuleIdxs[idx]>=0) { -      TargetRuleIdxs[idx]=curr++; -    } else { -      int i_ant = TargetRuleIdxs[idx]*-1-1; -      if (DEBUG) cerr << "TargetRuleIdxs[" << i_ant << "]" << endl; -      for (int idx2=1; idx2<=TargetAntsIdxs[i_ant][0]; idx2++) { -        TargetAntsIdxs[i_ant][idx2]=curr++; -        if (DEBUG) cerr << TargetAntsIdxs[i_ant][idx2] << " "; -      } -      if (DEBUG) cerr << endl; -    } -  } -  if (DEBUG) { -    cerr << "TargetRuleIdxs" << endl; -    for (int idx=1; idx<=TargetRuleIdxs[0]; idx++) cerr << TargetRuleIdxs[idx] << " "; -    cerr << endl; -  } -  for (int idx=1; idx<=RuleAl[0]; idx++) { -    if (DEBUG) { -      cerr << RuleAl[idx*2-1] << " - " << RuleAl[idx*2] << " to "; -      cerr << SourceRuleIdxs[RuleAl[idx*2-1]+1] << " - " << TargetRuleIdxs[RuleAl[idx*2]+1] << endl; -    } -    set(SourceRuleIdxs[RuleAl[idx*2-1]+1],TargetRuleIdxs[RuleAl[idx*2]+1]); -  } -  for (int i_ant=0; i_ant<_Arity; i_ant++) { -    for (int idx=1; idx<=AntsAl[i_ant][0]; idx++) { -      if (DEBUG) { -        cerr << AntsAl[i_ant][2*idx-1] << " - " << AntsAl[i_ant][2*idx] << " to "; -        cerr << SourceAntsIdxs[i_ant][AntsAl[i_ant][2*idx-1]+1] << " - "; -        cerr << TargetAntsIdxs[i_ant][AntsAl[i_ant][2*idx]+1] << endl; -      } -      set(SourceAntsIdxs[i_ant][AntsAl[i_ant][2*idx-1]+1],TargetAntsIdxs[i_ant][AntsAl[i_ant][2*idx]+1]); -    } -  } -  SourceFWIdxs[0]=0; -  SourceFWAbsIdxs[0]=0; -  if (DEBUG) cerr << "SourceFWRuleIdxs:" << endl; -  for (int idx=1; idx<=SourceFWRuleIdxs[0]; idx++) { -    if (DEBUG) cerr << SourceFWRuleIdxs[3*idx-2] << " to " << SourceRuleIdxs[SourceFWRuleIdxs[3*idx-2]+1] << endl; -    SourceFWRuleIdxs[3*idx-2] = SourceRuleIdxs[SourceFWRuleIdxs[3*idx-2]+1]; -    SourceFWAbsIdxs[0]++; -    SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]=SourceFWRuleAbsIdxs[idx]; -    SourceFWIdxs[0]++; -    SourceFWIdxs[3*SourceFWIdxs[0]-2]=SourceFWRuleIdxs[3*idx-2]; -    SourceFWIdxs[3*SourceFWIdxs[0]-1]=SourceFWRuleIdxs[3*idx-1]; -    SourceFWIdxs[3*SourceFWIdxs[0]]  =SourceFWRuleIdxs[3*idx]; -  } -  for (int i_ant=0; i_ant<_Arity; i_ant++) { -    if (DEBUG) cerr << "SourceFWAntsIdxs[" << i_ant << "]" << endl; -    for (int idx=1; idx<=SourceFWAntsIdxs[i_ant][0]; idx++) { -      if (DEBUG)  -        cerr << SourceFWAntsIdxs[i_ant][3*idx-2] << " to " << SourceAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][3*idx-2]+1] << endl; -      SourceFWAntsIdxs[i_ant][3*idx-2] = SourceAntsIdxs[i_ant][SourceFWAntsIdxs[i_ant][3*idx-2]+1]; -      SourceFWAbsIdxs[0]++; -      SourceFWAbsIdxs[3*SourceFWAbsIdxs[0]-2]=SourceFWAntsAbsIdxs[i_ant][idx]; -      SourceFWIdxs[0]++; -      SourceFWIdxs[3*SourceFWIdxs[0]-2]=SourceFWAntsIdxs[i_ant][3*idx-2]; -      SourceFWIdxs[3*SourceFWIdxs[0]-1]=SourceFWAntsIdxs[i_ant][3*idx-1]; -      SourceFWIdxs[3*SourceFWIdxs[0]]  =SourceFWAntsIdxs[i_ant][3*idx]; -    } -  } -  sort(SourceFWIdxs); -  sort(SourceFWAbsIdxs); -  if (DEBUG) { -    cerr << "SourceFWIdxs : "; -    for (int idx=1; idx<=SourceFWIdxs[0]; idx++) { -      cerr << "idx:" << SourceFWIdxs[3*idx-2] << ","; -      cerr << "F:" << SourceFWIdxs[3*idx-1] << ","; -      cerr << "E:" << SourceFWIdxs[3*idx] << " "; -    } -    cerr << endl; -  } -  TargetFWIdxs[0]=0; -  if (DEBUG) cerr << "TargetFWRuleIdxs:" << endl; -  for (int idx=1; idx<=TargetFWRuleIdxs[0]; idx++) { -    if (DEBUG) cerr << TargetFWRuleIdxs[3*idx-2] << " to " << TargetRuleIdxs[TargetFWRuleIdxs[3*idx-2]+1] << endl; -    TargetFWRuleIdxs[3*idx-2] = TargetRuleIdxs[TargetFWRuleIdxs[3*idx-2]+1]; -    TargetFWIdxs[0]++;     -    TargetFWIdxs[3*TargetFWIdxs[0]-2]=TargetFWRuleIdxs[3*idx-2]; -    TargetFWIdxs[3*TargetFWIdxs[0]-1]=TargetFWRuleIdxs[3*idx-1]; -    TargetFWIdxs[3*TargetFWIdxs[0]]  =TargetFWRuleIdxs[3*idx]; -  } -  for (int i_ant=0; i_ant<_Arity; i_ant++) { -    if (DEBUG) cerr << "TargetFWAntsIdxs[" << i_ant << "]" << endl; -    for (int idx=1; idx<=TargetFWAntsIdxs[i_ant][0]; idx++) { -      if (DEBUG) cerr << TargetFWAntsIdxs[i_ant][3*idx-2] << " to " << TargetAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][3*idx-2]+1] << endl; -      TargetFWAntsIdxs[i_ant][3*idx-2] = TargetAntsIdxs[i_ant][TargetFWAntsIdxs[i_ant][3*idx-2]+1]; -      TargetFWIdxs[0]++; -      TargetFWIdxs[3*TargetFWIdxs[0]-2]=TargetFWAntsIdxs[i_ant][3*idx-2]; -      TargetFWIdxs[3*TargetFWIdxs[0]-1]=TargetFWAntsIdxs[i_ant][3*idx-1]; -      TargetFWIdxs[3*TargetFWIdxs[0]]  =TargetFWAntsIdxs[i_ant][3*idx]; -    } -  } -  sort(TargetFWIdxs); -  if (DEBUG) { -    cerr << "TargetFWIdxs : "; -    for (int idx=1; idx<=TargetFWIdxs[0]; idx++) { -      cerr << "idx:" << TargetFWIdxs[3*idx-2]<< ","; -      cerr << "E:" << TargetFWIdxs[3*idx-1]<< ","; -      cerr << "F:" << TargetFWIdxs[3*idx]<< " "; -    } -    cerr << endl; -    cerr << AsString() << endl; -  } -  fas = firstSourceAligned(1); las = lastSourceAligned(_J-2); -  fat = firstTargetAligned(1); lat = lastTargetAligned(_I-2);  -  if (DEBUG) cerr << "fas=" << fas << ", las=" << las << ", fat=" << fat << ", lat=" << lat << endl; -  assert(fas<=las); -  assert(fat<=lat); -  SetCurrAlVector(); -  if (DEBUG) cerr << "end prepare" << endl; -  return false; -}       - -string Alignment::AsStringSimple() { -  ostringstream stream; -  for (int j=0; j<getJ(); j++) { -    int t = targetOf(j,minTSpan(j)); -    while (t>=0) { -      stream << " " << j << "-" << t; -      t = targetOf(j,t+1); -    } -  } -  return stream.str(); -}; - - -string Alignment::AsString() { -  ostringstream stream; -  stream << "J:" << getJ() << " I:" << getI(); -  for (int j=0; j<getJ(); j++) { -    int t = targetOf(j,minTSpan(j)); -    while (t>=0) { -      stream << " " << j << "-" << t; -      t = targetOf(j,t+1); -    } -  } -  stream << " TargetSpan:"; -  for (int j=0; j<getJ(); j++) -    if (minTSpan(j)!=MINIMUM_INIT) -      stream << " " << j << "[" << minTSpan(j) << "," << maxTSpan(j) << "]"; -    else -      stream << " " << j << "[-,-]"; -  stream << " SourceSpan:"; -  for (int i=0; i<getI(); i++) -    if (minSSpan(i)!=MINIMUM_INIT) -      stream << " " << i << "[" << minSSpan(i) << "," << maxSSpan(i) << "]"; -    else -      stream << " " << i << "[-,-]"; -  return stream.str(); -}; - -void Alignment::SetCurrAlVector() { -  curr_al.clear(); -  for (int j=0; j<_J; j++) { -    int i = targetOf(j); -    while (i>=0) { -      curr_al.push_back(link(j,i)); -      i = targetOf(j,i+1); -    } -  } -} - -void CountTable::print() const { -  cerr << "+++ Model +++" << endl; -  for (map<WordID,int*>::const_iterator iter=model.begin(); iter!=model.end(); iter++) { -    cerr << TD::Convert(iter->first) << " "; -    for (int i=0; i<numColumn; i++) cerr << iter->second[i] << " "; -    cerr << endl; -  } -  cerr << "+++ Ultimate +++" << endl; -  for (int i=0; i<numColumn; i++) cerr << ultimate[i] << " "; -  cerr << endl; -} - -void Alignment::ToArrayInt(vector<int>* ret) { -  ret->clear(); -  for (int i=0; i<_J; i++) { -    int t = targetOf(i); -    while (t>=0) { -      ret->push_back(link(i,t)); -      t = targetOf(i,t+1); -    } -  } -} - -int Alignment::GetFWGlobalIdx(int idx, const Lattice& sourcelattice, vector<WordID>& sources, int spanstart, int spanend, const std::vector<const void*>& ant_contexts, const map<WordID,int>& sfw) { -  // get the index of the function word in the lattice -  if (DEBUG) cerr << "   GetFWGlobalIdx(" << idx << "," << spanstart << "," << spanend << ")" << endl; -  int curr = spanstart; int i_ant = 0; -  for (int i=1; i<sources.size() && i<idx; i++) { // sources contain <s> and </s>  -    if (sources[i]<0) { -      const int* ants = reinterpret_cast<const int *>(ant_contexts[i_ant++]); -      int antstate = ants[Dwarf::STATE_SIZE-1]; -      if (DEBUG) cerr << "    found NT[" << target(antstate) << "," << source(antstate) << "]" << endl; -      curr += target(antstate)-source(antstate); -    } else { -      curr++; -    } -  } -  if (DEBUG) cerr << "    curr = " << curr << endl; -  //compute the fw index -  int ret = 1; -  for (int i=0; i<curr; i++) { -    if (sfw.find(sourcelattice[i][0].label)!=sfw.end()) ret++; -  } -  if (DEBUG) cerr << "    ret = " << ret << endl; -  return ret; -} - -int Alignment::GetFirstFWIdx(int spanstart,int spanend, const Lattice& sourcelattice, const map<WordID,int>& sfw) { -  if (DEBUG) cerr << "   GetFirstFWIdx(" << spanstart << "," << spanend << ")" << endl;   -  int curr=0; -  for (int i=0; i<spanend; i++) { -    if (sfw.find(sourcelattice[i][0].label)!=sfw.end()) { -      curr++; -      if (i>=spanstart) return curr; -    }  -  } -//  assert(0); -  return curr; -} - -int Alignment::GetLastFWIdx(int spanstart,int spanend, const Lattice& sourcelattice, const map<WordID,int>& sfw) { -  if (DEBUG) cerr << "   GetLastFWIdx(" << spanstart << "," << spanend << ")" << endl; -  int curr=0; -  for (int i=0; i<spanend; i++) { -    if (sfw.find(sourcelattice[i][0].label)!=sfw.end()) { -      curr++; -    }  -  } -  return curr; -} - -WordID Alignment::generalize(WordID original, const map<WordID,WordID>& tags, bool pos) { -  if (!pos) { -    map<WordID,WordID>::const_iterator it = tags.find(original); -    if (it!=tags.end()) { -      return it->second; -    } -  } else { -    string key,idx; -    Dwarf::stripIndex(TD::Convert(original),&key,&idx); -    map<WordID,WordID>::const_iterator it = tags.find(TD::Convert(key));  -    if (it!=tags.end()) { -      ostringstream oss; -      oss << TD::Convert(it->second) << "/" << idx; -      return TD::Convert(oss.str());    -    } -  } -  return original; -} - -int* Alignment::SOS() { -  int* neighbor = new int[4]; -  neighbor[0]=0; neighbor[1]=0; -  neighbor[2]=0; neighbor[3]=0; -  return neighbor; -} - -int* Alignment::EOS() { -  int* neighbor = new int[4]; -  neighbor[0]=getJ()-1; neighbor[1]=neighbor[0]; -  neighbor[2]=getI()-1; neighbor[3]=neighbor[2]; -  return neighbor; -} - -int* Alignment::neighborLeft(int startidx, int endidx, bool* getit) { -  if (DEBUG) cerr << "   neighborLeft("<<startidx<<","<<endidx<<")"<<endl; -  int lborder = startidx; -  int* ret; -  while(lborder<=endidx) { -    ret = blockSource(lborder,endidx); -    if (ret[0]==lborder && ret[1]==endidx && ret[2]!=MINIMUM_INIT) { -      *getit = true; -      return ret; -    } else { -      delete[] ret; -      lborder++; -    } -  } -  ret = new int[4]; -  ret[0]=-1; ret[1]=-1; ret[2]=-1; ret[3]=-1; -  *getit = false; -  return ret; -} - -int* Alignment:: neighborRight(int startidx, int endidx, bool* getit) { -  if (DEBUG) cerr << "   neighborRight("<<startidx<<","<<endidx<<")"<<endl; -  int rborder = endidx; -  int* ret; -  while(startidx<=rborder) { -    ret = blockSource(startidx,rborder); -    if (ret[0]==startidx && ret[1]==rborder && ret[2]!=MINIMUM_INIT) { -      *getit = true; -      return ret; -    } else { -      delete[] ret; -      rborder--; -    } -  } -  ret = new int[4]; -  ret[0]=-1; ret[1]=-1; ret[2]=-1; ret[3]=-1; -  *getit = false; -  return ret; -} diff --git a/decoder/dwarf.h b/decoder/dwarf.h deleted file mode 100644 index 49d2a3b7..00000000 --- a/decoder/dwarf.h +++ /dev/null @@ -1,286 +0,0 @@ -#ifndef DWARF_H -#define DWARF_H - -#include <cstdlib> -#include <vector> -#include <map> -#include <string> -#include <ostream> -#include "wordid.h" -#include "lattice.h" -#include "trule.h" -#include "tdict.h" -#include <boost/functional/hash.hpp> -#include <tr1/unordered_map> -#include <boost/tuple/tuple.hpp> - -using namespace std; -using namespace std::tr1; -using namespace boost::tuples; -using namespace boost; - -const static bool DEBUG = false; - -class CountTable { -public: -        int* ultimate; -        map<WordID,int*> model; -        int mode; -        int numColumn; -        void print() const; -        void setup(int _numcolumn, int _mode) { -          mode = _mode; numColumn = _numcolumn; -        } -}; - -class Alignment { -/* Alignment represents an alignment object in a 2D format to support function word-based models calculation  - -   A note about model's parameter estimation: -   ========================================== -   The model is estimated as a two-level Dirichlet process.  -   For orientation model, the first tier estimation is: -   P(o|f,e) where *o* is the orientation value to estimate, *f* is the source function word aligned to *e*  -   its second tier is: P(o|f), while its third tier is P(o) -   For dominance model, the first tier estimation is: -   P(d|f1,f2,e1,e2) where *d* is a dominance value to estimate, *f1,f2* are the neighboring function words on the source -   aligned to *e1,e2* on the target side -   its second tier is: P(d|f1,f2) while its third tier is P(d) -     -   Taking orientation model as a case in point, a two level estimation proceeds as follow: -   P(o|f,e) = c(o,f,e) + alpha { c(o,f) + beta [ c (o) / c(.) ] } -                                 ------------------------------ -                                 c(f)   + beta -              ------------------------------------------------- -              c(f,e)   + alpha -   where c() is a count function, alpha and beta are the concentration parameter  -         of the first and second Dirichlet process respectively  -   To encourage or penalize the use of second and third tier statistics, bo1 and bo2 binary features are introduced  -*/ -public: -  const static int MAX_WORDS = 200;   -  const static int MINIMUM_INIT = 1000; -  const static int MAXIMUM_INIT = -1000; -  const static int MAX_ARITY = 2; -  WordID kSOS; -  WordID kEOS; -  WordID kUNK; -  double alpha_oris; // 1st concentration parameter for orientation model  -  double beta_oris;  // 2nd concentration parameter for orientation model -  double alpha_orit; // 1st concentration parameter for orientation model  -  double beta_orit;  // 2nd concentration parameter for orientation model -  double alpha_doms; // idem as above but for dominance model -  double beta_doms; -  double alpha_domt; // idem as above but for dominance model -  double beta_domt; -   -  // ACCESS to alignment -  void set(int j,int i);   // j is the source index, while i is the target index -  void reset(int j,int i); // idem as above -  inline bool at(int j, int i) { return _matrix[j][i]; }; -  inline int getJ() {return _J;}; // max source of the current alignment -  inline int getI() {return _I;}; // max target of the current alignment -  inline void setI(int I) { _I = I; }; -  inline void setJ(int J) { _J = J; }; -  inline void setF(vector<WordID> f) { _f=f;}; -  inline void setE(vector<WordID> e) { _e=e;}; -  inline WordID getF(int id) { if (id<0) return TD::Convert("<s>"); if (id>=_f.size()) return TD::Convert("</s>"); return _f[id];}; -  inline WordID getE(int id) { if (id<0) return TD::Convert("<s>"); if (id>=_e.size()) return TD::Convert("</s>"); return _e[id];}; -  void clearAls(int prevJ=200, int prevI=200); -  int sourceOf(int i, int start = -1); -  int targetOf(int j, int start = -1); -  inline int minSSpan(int i) { return _sSpan[i][0];} -  inline int maxSSpan(int i) { return _sSpan[i][1];} -  inline int minTSpan(int j) { return _tSpan[j][0];} -  inline int maxTSpan(int j) { return _tSpan[j][1];} -  static inline int link(int s, int t) { return (s << 16) | t; } -  static inline int source(int st) {return st >> 16; } -  static inline int target(int st) {return st & 0xffff; } -  inline void setAlphaOris(double val) { alpha_oris=val; } -  inline void setAlphaOrit(double val) { alpha_orit=val; } -  inline void setAlphaDoms(double val) { alpha_doms=val; } -  inline void setAlphaDomt(double val) { alpha_domt=val; } -  inline void setBetaOris(double val) { beta_oris=val; } -  inline void setBetaOrit(double val) { beta_orit=val; } -  inline void setBetaDoms(double val) { beta_doms=val; } -  inline void setBetaDomt(double val) { beta_domt=val; } -  inline void setFreqCutoff(int val) { cout << _freq_cutoff << " to " << val << endl;  _freq_cutoff=val; } -  string AsString(); -  string AsStringSimple(); -  int* SOS(); -  int* EOS(); - -  // Model related function   -  Alignment(); -  // Given the current *rule* and its antecedents, construct an alignment space and mark the function word alignments  -  // according *sfw* and *tfw* -  bool prepare(TRule& rule, const std::vector<const void*>& ant_contexts,  -               const map<WordID,int>& sfw, const map<WordID,int>& tfw, const Lattice& sourcelattice, int spanstart, int spanend); - -  // Compute orientation model score which parameters are stored in *table* and pass the values accordingly -  // will call Orientation(Source|Target) and ScoreOrientation(Source|Target) -  void computeOrientationSource(const CountTable& table, double *cost, double *bonus, double *bo1,  -                                double *bo1_bonus, double *bo2, double *bo2_bonus); -  void computeOrientationSourcePos(const CountTable& table, double *cost, double *bonus, -                double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, int maxfwidx, int maxdepth1, int maxdepth2); -  void computeOrientationSourceGen(const CountTable& table, double *cost, double *bonus, double *bo1,  -                                double *bo1_bonus, double *bo2, double *bo2_bonus, const map<WordID,WordID>& tags); -  void computeOrientationSourceBackward(const CountTable& table, double *cost, double *bonus, double *bo1,  -                                double *bo1_bonus, double *bo2, double *bo2_bonus); -  void computeOrientationSourceBackwardPos(const CountTable& table, double *cost, double *bonus, double *bo1,  -                                double *bo1_bonus, double *bo2, double *bo2_bonus, int maxfwidx, int maxdepth1, int maxdepth2); -  void computeOrientationTarget(const CountTable& table, double *cost, double *bonus, double *bo1,  -                                double *bo1_bonus, double *bo2, double *bo2_bonus); -  void computeOrientationTargetBackward(const CountTable& table, double *cost, double *bonus, double *bo1,  -                                double *bo1_bonus, double *bo2, double *bo2_bonus); -  // Get the orientation value of a function word at a particular index *fw* -  // assign the value to either *oril* or *orir* accoring to *Lcompute* and *Rcompute* -  void OrientationSource(int fw, int*oril, int* orir, bool Lcompute=true, bool Rcompute=true); -  void OrientationSource(int fw0, int fw1, int*oril, int* orir, bool Lcompute=true, bool Rcompute=true); -  int  OrientationSource(int* left, int* right); -  void OrientationTarget(int fw, int*oril, int* orir, bool Lcompute=true, bool Rcompute=true); -  void OrientationTarget(int fw0, int fw1, int*oril, int* orir, bool Lcompute=true, bool Rcompute=true); - -  vector<int> OrientationSourceLeft4Sampler(int fw0, int fw1); -  vector<int> OrientationSourceLeft4Sampler(int fw); -  vector<int> OrientationSourceRight4Sampler(int fw0, int fw1); -  vector<int> OrientationSourceRight4Sampler(int fw); -  vector<int> OrientationTargetLeft4Sampler(int fw0, int fw1); -  vector<int> OrientationTargetLeft4Sampler(int fw); -  vector<int> OrientationTargetRight4Sampler(int fw0, int fw1); -  vector<int> OrientationTargetRight4Sampler(int fw); - -  // Given an orientation value *ori*, estimate the score accoding to *cond1*, *cond2*  -  // and assign the value accordingly according to *isBonus* and whether the first or the second tier estimation -  // is used or not -  void ScoreOrientationRight(const CountTable& table, int ori, WordID cond1, WordID cond2,  -                             bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus,  -                             double *bo2, double *bo2_bonus, double alpha1, double beta1); -  void ScoreOrientationLeft(const CountTable& table, int ori, WordID cond1, WordID cond,  -                            bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus,  -                            double *bo2, double *bo2_bonus, double alpha1, double beta1); -  double ScoreOrientationRight(const CountTable& table, int ori, WordID cond1, WordID cond2);  -  double ScoreOrientationLeft(const CountTable& table, int ori, WordID cond1, WordID cond);  -  void ScoreOrientationRightBackward(const CountTable& table, int ori, WordID cond1, WordID cond2,  -                             bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus,  -                             double *bo2, double *bo2_bonus, double alpha1, double beta1); -  void ScoreOrientationLeftBackward(const CountTable& table, int ori, WordID cond1, WordID cond,  -                            bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus,  -                            double *bo2, double *bo2_bonus, double alpha1, double beta1); -  double ScoreOrientationRightBackward(const CountTable& table, int ori, WordID cond1, WordID cond2);  -  double ScoreOrientationLeftBackward(const CountTable& table, int ori, WordID cond1, WordID cond);  -  void ScoreOrientation(const CountTable& table, int offset, int ori, WordID cond1, WordID cond2,  -                            bool isBonus, double *cost, double *bonus, double *bo1, double *bo1_bonus,  -                            double *bo2, double *bo2_bonus, double alpha1, double beta1); -  double ScoreOrientation(const CountTable& table, int offset, int ori, WordID cond1, WordID cond2);  - -  // idem as above except these are for dominance model -  void computeDominanceSource(const CountTable& table, WordID lfw, WordID rfw, double *cost, double *bonus,  -                              double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus); -  void computeDominanceSourcePos(const CountTable& table, WordID lfw, WordID rfw, double *cost, double *bonus,  -                              double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus, int maxfwidx, int maxdepth1, int maxdepth2); -  void computeDominanceTarget(const CountTable& table, WordID lfw, WordID rfw, double *cost, double *bonus,  -                              double *bo1, double *bo1_bonus, double *bo2, double *bo2_bonus); -  void computeBorderDominanceSource(const CountTable& table, double *cost, double *bonus,  -        double *state_mono, double *state_nonmono, -        TRule &rule, const std::vector<const void*>& ant_contexts, const map<WordID,int>& sfw); -  int DominanceSource(int fw1, int fw2); -  int DominanceTarget(int fw1, int fw2); -  vector<int> DominanceSource4Sampler(int fw1, int fw2); -  vector<int> DominanceTarget4Sampler(int fw1, int fw2); -  void ScoreDominance(const CountTable& table, int dom, WordID s1, WordID s2, WordID t1, WordID t2,  -                      double *cost, double *bo1, double *bo2, bool isBonus, double alpha2, double beta2); -  double ScoreDominance(const CountTable& table, int dom, WordID s1, WordID s2, WordID t1, WordID t2);  - -  // Remove all function word alignments except those at the borders -  // May result in more than two function word alignments at each side, because this function  -  // will continue keeping function word alignments until the first aligned word at each side -  void BorderingSFWsOnly(); -  void BorderingTFWsOnly(); -  void simplify(int *ret); // preparing the next state -  void simplify_nofw(int *ret); // preparing the next state when no function word appears -  // set the first part of the next state, which concerns with function word -  // fas, las, fat, lat is the (f)irst or (l)ast function word alignments either on the (s)ource or (t)arget -  // these parameters to anticipate cases where there are more than two function word alignments -  void FillFWIdxsState(int *state, int fas, int las, int fat, int lat); - -  // Helper function to obtain the aligned words on the other side  -  // WARNING!!! Only to be used if the als are in sync with either source or target sentences -  WordID F2EProjectionFromExternal(int idx, const vector<AlignmentPoint>& als, const string& delimiter=" "); -  WordID E2FProjectionFromExternal(int idx, const vector<AlignmentPoint>& als, const string& delimiter=" "); -  // WARNING!!! Only to be used in dwarf_main.cc  -  // These two function words assume that the alignment contains phrase boundary  -  // but the source and target sentences do not -  WordID F2EProjection(int idx, const string& delimiter=" "); -  WordID E2FProjection(int idx, const string& delimiter=" "); -  void SetCurrAlVector(); -  int* blockSource(int fw1, int fw2); -  int* blockTarget(int fw1, int fw2); -  void ToArrayInt(vector<int>* arr); -  int* neighborLeft(int startidx, int endidx, bool* found); -  int* neighborRight(int startidx, int endidx, bool* found); -private: -  // Hash to avoid redundancy -  unordered_map<vector<int>, int, boost::hash<vector<int> > > oris_hash; -  unordered_map<vector<int>, int, boost::hash<vector<int> > > orit_hash; -  unordered_map<vector<int>, int, boost::hash<vector<int> > > doms_hash; -  unordered_map<vector<int>, int, boost::hash<vector<int> > > domt_hash; -  unordered_map<vector<int>, vector<int>, boost::hash<vector<int> > > simplify_hash; -  unordered_map<vector<int>, vector<int>, boost::hash<vector<int> > > prepare_hash; -  -  int _J; // effective source length; -  int _I; // effective target length; -  bool _matrix[MAX_WORDS][MAX_WORDS]; // true if aligned  -  short _sSpan[MAX_WORDS][2]; //the source span of a target index; 0->min, 1->max -  short _tSpan[MAX_WORDS][2]; //the target span of a source index; 0->min, 2->max -  int _freq_cutoff; -  int SourceFWRuleIdxs[40]; //the indexes of function words in the rule;  -          // The following applies to all *FW*Idxs -          // *FW*Idxs[0] = size -          // *FW*Idxs[idx*3-2] = index in the alignment, where idx starts from 1 to size -          // *FW*Idxs[idx*3-1] = source WordID -          // *FW*Idxs[idx*3]   = target WordID -  int SourceFWRuleAbsIdxs[40]; -  int TargetFWRuleIdxs[40]; //the indexes of function words in the rule; zeroth element is the count -  int ** SourceFWAntsIdxs;  //the indexes of function words in antecedents -  int ** SourceFWAntsAbsIdxs; -  int ** TargetFWAntsIdxs;  //the indexes of function words in antecedents -  int SourceRuleIdxs[40]; //the indexes of SOURCE tokens (zeroth element is the number of source tokens) -        //>0 means terminal, -i means the i-th Xs -  int TargetRuleIdxs[40]; //the indexes of TARGET tokens (zeroth element is the number of target tokens) -  int ** SourceAntsIdxs;  //the array of indexes of a particular antecedent's SOURCE tokens -  int ** TargetAntsIdxs;  //the array of indexes of a particular antecedent's TARGET tokens -  int SourceFWIdxs[40]; -  int SourceFWAbsIdxs[40]; -  int TargetFWIdxs[40]; -  // *sort* and *quickSort* are used to sort *FW*Idxs -  void sort(int* num); -  void quickSort(int arr[], int top, int bottom); - -  // *block(Source|Target)* finds the minimum block that containts two indexes (fw1 and fw2) -  inline int least(int i1, int i2) { return (i1<i2)?i1:i2; } -  inline int most(int i1, int i2) { return (i1>i2)?i1:i2; } -  void simplifyBackward(vector<int *>*blocks, int* block, const vector<int>& danglings); -  // used in simplify to check whether an atomic block according to source function words is also atomic according -  // to target function words as well, otherwise break it  -  // the resulting blocks are added into *blocks* -  int _Arity; -  std::vector<WordID> _f; // the source sentence of the **current** rule (may not consistent with the current alignment) -  std::vector<WordID> _e; // the target sentence of the **current** rule -  int RuleAl[40]; -  int **AntsAl; -  int firstSourceAligned(int start); -  int firstTargetAligned(int start); -  int lastSourceAligned(int end); -  int lastTargetAligned(int end); -  int fas, las, fat, lat; // first aligned source, last aligned source, first aligned target, last aligned target -  bool MemberOf(int* FWIdxs, int pos1, int pos2); // whether FWIdxs contains pos1 and pos2 consecutively -  // Convert the alignment to vector form, will be used for hashing purposes -  vector<int> curr_al; -  int GetFWGlobalIdx(int idx, const Lattice& sourcelattice, vector<WordID>& sources, int spanstart, int spanend, const std::vector<const void*>& ant_contexts, const map<WordID,int>& sfw); -  int GetFirstFWIdx(int spanstart,int spanend, const Lattice& sourcelattice, const map<WordID,int>& sfw); -  int GetLastFWIdx(int spanstart,int spanend, const Lattice& sourcelattice, const map<WordID,int>& sfw); -  WordID generalize(WordID original, const map<WordID,WordID>& tags, bool pos=false); -}; - -#endif diff --git a/decoder/earley_composer.cc b/decoder/earley_composer.cc index efce70a6..d47a6969 100644 --- a/decoder/earley_composer.cc +++ b/decoder/earley_composer.cc @@ -4,8 +4,14 @@  #include <fstream>  #include <map>  #include <queue> -#include <tr1/unordered_map> -#include <tr1/unordered_set> +#ifndef HAVE_OLD_CPP +# include <unordered_map> +# include <unordered_set> +#else +# include <tr1/unordered_map> +# include <tr1/unordered_set> +namespace std { using std::tr1::unordered_map; using std::tr1::unordered_multiset; using std::tr1::unordered_set; } +#endif  #include <boost/shared_ptr.hpp>  #include <boost/program_options.hpp> @@ -19,7 +25,6 @@  #include "hg_remove_eps.h"  using namespace std; -using namespace std::tr1;  // Define the following macro if you want to see lots of debugging output  // when you run the chart parser diff --git a/decoder/factored_lexicon_helper.cc b/decoder/factored_lexicon_helper.cc index 7203b325..e7899215 100644 --- a/decoder/factored_lexicon_helper.cc +++ b/decoder/factored_lexicon_helper.cc @@ -2,6 +2,7 @@  #include "filelib.h"  #include "stringlib.h" +#include "sentence_metadata.h"  using namespace std; diff --git a/decoder/factored_lexicon_helper.h b/decoder/factored_lexicon_helper.h index 81c75275..7fedc517 100644 --- a/decoder/factored_lexicon_helper.h +++ b/decoder/factored_lexicon_helper.h @@ -6,7 +6,8 @@  #include <string>  #include <map>  #include "tdict.h" -#include "sentence_metadata.h" + +struct SentenceMetadata;  // when computing features, it can be advantageous to:  //   1) back off to less specific forms (e.g., less highly inflected forms, POS tags, etc) diff --git a/decoder/ff_context.cc b/decoder/ff_context.cc index f2b0e67c..e56f6f1f 100644 --- a/decoder/ff_context.cc +++ b/decoder/ff_context.cc @@ -46,7 +46,7 @@ void RuleContextFeatures::ReplaceMacroWithString(    macro << relative_location << "]";    int macro_index = feature_instance.find(macro.str());    if (macro_index == string::npos) { -    cerr << "Can't find macro " << macro << " in feature template "  +    cerr << "Can't find macro " << macro.str() << " in feature template "   	 << feature_instance;      abort();    } diff --git a/decoder/ff_dwarf.cc b/decoder/ff_dwarf.cc deleted file mode 100644 index fe7a472e..00000000 --- a/decoder/ff_dwarf.cc +++ /dev/null @@ -1,894 +0,0 @@ -#include <vector> -#include <sstream> -#include <fstream> -#include <string> -#include <iostream> -#include <map> -#include "hg.h" -#include "ff_dwarf.h" -#include "dwarf.h" -#include "wordid.h" -#include "tdict.h" -#include "filelib.h" -#include "sentence_metadata.h" -#include "stringlib.h" - -using namespace std; - -Dwarf::Dwarf(const std::string& param) {  -/* Param is a space separated string which contains any or all of the following: -   oris|orit|doms|domt=filename  -   e.g. oris=/fs/clip-galep3eval/hendra/z2e/oris128.gz -*/ -  sSOS="<s>"; -  sEOS="</s>"; -  kSOS=TD::Convert(sSOS); -  kEOS=TD::Convert(sEOS); -  kGOAL=TD::Convert("S")*-1; -  _sent_id = (int *)malloc(sizeof(int)); -  *_sent_id = -1; -  if (DEBUG) cerr << "here = " << *_sent_id << endl; -  _fwcount = (int *)malloc(sizeof(int)); -  *_fwcount = -1; -  cerr << "initializing dwarf" << endl; -  flag_oris=false; flag_orit=false; flag_doms=false; flag_domt=false; flag_tfw_count=false; -  flag_bdoms=false; flag_porislr=false, flag_porisrl=false, flag_goris=false; flag_pgorislr=false, flag_pgorisrl=false; -  flag_pdomslr=false; flag_pdomsrl=false; flag_pgdomslr=false; flag_pgdomsrl=false; flag_gdoms=false; -  flag_oris_backward=false; flag_orit_backward=false;  -  explicit_soseos=false; -  SetStateSize(STATE_SIZE*sizeof(int));    -  als = new Alignment();   -  als->clearAls(Alignment::MAX_WORDS,Alignment::MAX_WORDS); -  istringstream iss(param); string w; -  while(iss >> w) { -    int equal = w.find_first_of("="); -    if (equal!=string::npos) { -      string model = w.substr(0,equal); -      vector<string> params;  -      Tokenize(w.substr(equal+1),',',¶ms); -      string fn = params[0]; -      if (model == "minfreq") { -        cerr << "model minfreq " << fn << endl; -        als->setFreqCutoff(atoi(fn.c_str())); -      } else if (model == "oris") { -        flag_oris = readOrientation(&toris,fn,&sfw);  -        if (flag_oris) { -          oris_ = FD::Convert("OrientationSource"); -          //oris_bo1_ = FD::Convert("OrientationSource_BO1"); -          //oris_bo2_ = FD::Convert("OrientationSource_BO2"); -        } -        if (params.size()>1) als->setAlphaOris(atof(params[1].c_str())); -        if (params.size()>2) als->setBetaOris(atof(params[2].c_str())); -      } else if (model == "porislr") { -        flag_porislr = readOrientation(&tporislr,fn,&sfw,true); -        poris_nlr = 0; -        if (flag_porislr) { -          porislr_ = FD::Convert("OrientationSourcePositionfulLeftRight"); -        } -        if (params.size()>1) poris_nlr = atoi(params[1].c_str()); -        if (DEBUG) cerr << "  maximum poris depth=" << poris_nlr  << endl; -      } else if (model == "porisrl") { -        flag_porisrl = readOrientation(&tporisrl,fn,&sfw,true); -        poris_nrl = 0; -        if (flag_porisrl) { -          porisrl_ = FD::Convert("OrientationSourcePositionfulRightLeft"); -        } -        if (params.size()>1) poris_nrl = atoi(params[1].c_str()); -        if (DEBUG) cerr << "  maximum poris depth=" << poris_nrl  << endl; -      } else if (model=="goris") { -        flag_goris = readOrientation(&tgoris,fn,&sfw); -        if (flag_goris) { -          goris_ = FD::Convert("OrientationSourceGeneralized"); -        } -        if (params.size()>1) { -          readTags(params[1],&tags); -          generalizeOrientation(&tgoris,tags);           -        } -      } else if (model=="pgorislr") { -        flag_pgorislr = readOrientation(&tpgorislr,fn,&sfw,true); -        pgoris_nlr = 0; -        if (flag_pgorislr) { -          pgorislr_ = FD::Convert("OrientationSourceGeneralizedPositionfulLeftRight"); -        } -        if (DEBUG) { -          cerr << "BEFORE GENERALIZATION" << endl; -          tpgorislr.print(); -        } -        if (params.size()>1) pgoris_nlr = atoi(params[1].c_str()); -        if (params.size()>2) { -          readTags(params[2],&tags); -          generalizeOrientation(&tpgorislr,tags,true); -        } -        if (DEBUG) { -          cerr << "AFTER GENERALIZATION" << endl; -          tpgorislr.print(); -        } -      } else if (model=="pgorisrl") { -        flag_pgorisrl = readOrientation(&tpgorisrl,fn,&sfw,true); -        pgoris_nrl = 0; -        if (flag_pgorisrl) { -          pgorisrl_ = FD::Convert("OrientationSourceGeneralizedPositionfulLeftRight"); -        }  -        if (params.size()>1) pgoris_nrl = atoi(params[1].c_str()); -        if (params.size()>2) { -          readTags(params[2],&tags); -          generalizeOrientation(&tpgorisrl,tags,true); -        } -      } else if (model == "oris_backward") { -        flag_oris_backward = true; -        if (!flag_oris) readOrientation(&toris,fn,&sfw); -        oris_backward_ = FD::Convert("OrientationSourceBackward"); -        if (params.size()>1) als->setAlphaOris(atof(params[1].c_str())); -        if (params.size()>2) als->setBetaOris(atof(params[2].c_str())); -      } else if (model == "orit") { -        flag_orit = readOrientation(&torit,fn,&tfw);  -        if (flag_orit) { -          orit_ = FD::Convert("OrientationTarget"); -          //orit_bo1_ = FD::Convert("OrientationTarget_BO1"); -          //orit_bo2_ = FD::Convert("OrientationTarget_BO2"); -        } -        if (params.size()>1) als->setAlphaOrit(atof(params[1].c_str())); -        if (params.size()>2) als->setBetaOrit(atof(params[2].c_str())); -      } else if (model == "orit_backward") { -        flag_orit_backward = true; -        if (!flag_orit) readOrientation(&torit,fn,&tfw); -        orit_backward_ = FD::Convert("OrientationTargetBackward"); -        if (params.size()>1) als->setAlphaOrit(atof(params[1].c_str())); -        if (params.size()>2) als->setBetaOrit(atof(params[2].c_str())); -      } else if (model == "doms") { -        flag_doms = readDominance(&tdoms,fn,&sfw);  -        if (flag_doms) { -          doms_ = FD::Convert("DominanceSource"); -          //doms_bo1_ = FD::Convert("DominanceSource_BO1"); -          //doms_bo2_ = FD::Convert("DominanceSource_BO2"); -        } -        if (params.size()>1) als->setAlphaDoms(atof(params[1].c_str())); -        if (params.size()>2) als->setBetaDoms(atof(params[2].c_str())); -      } else if (model == "pdomsrl") { -        flag_pdomsrl = readDominance(&tpdomsrl,fn,&sfw,true); -        if (flag_pdomsrl) { -          pdomsrl_ = FD::Convert("DominanceSourcePositionfulRightLeft"); -        } -        if (params.size()>1) pdoms_nrl = atoi(params[1].c_str()); -      } else if (model == "pdomslr") { -        flag_pdomslr = readDominance(&tpdomslr,fn,&sfw,true); -        tpdomslr.print(); -        if (flag_pdomslr) { -          pdomslr_ = FD::Convert("DominanceSourcePositionfulLeftRight"); -        } -        if (params.size()>1) pdoms_nlr = atoi(params[1].c_str()); -      } else if (model == "pgdomsrl") { -        flag_pgdomsrl = readDominance(&tpgdomsrl,fn,&sfw,true); -        if (flag_pgdomsrl) { -          pgdomsrl_ = FD::Convert("DominanceSourceGeneralizedPositionfulRightLeft"); -        } -        if (params.size()>1) pgdoms_nrl = atoi(params[1].c_str()); -        if (params.size()>2) { -          readTags(params[2],&tags); -          generalizeDominance(&tpgdomsrl,tags,true); -        } -      } else if (model == "pgdomslr") { -        flag_pgdomslr = readDominance(&tpgdomslr,fn,&sfw,true); -        if (flag_pgdomslr) { -          pgdomslr_ = FD::Convert("DominanceSourceGeneralizedPositionfulLeftRight"); -        } -        if (params.size()>1) pgdoms_nlr = atoi(params[1].c_str()); -        if (params.size()>2) { -          readTags(params[2],&tags); -          if (DEBUG) { -            for (map<WordID,WordID>::const_iterator it=tags.begin(); it!=tags.end(); it++) { -              cerr << "tags = " << TD::Convert(it->first) << ", " << TD::Convert(it->second) << endl; -            } -          }  -          generalizeDominance(&tpgdomslr,tags,true); -        } -        if (DEBUG) tpgdomslr.print(); -      } else if (model == "bdoms") { -        flag_bdoms = readDominance(&tbdoms,fn,&sfw); -        if (flag_bdoms) { -          bdoms_ = FD::Convert("BorderDominanceSource"); -        } -      } else if (model == "domt") { -        flag_domt = readDominance(&tdomt,fn,&tfw);  -        if (flag_domt) { -          domt_ = FD::Convert("DominanceTarget"); -          //domt_bo1_ = FD::Convert("DominanceTarget_BO1"); -          //domt_bo2_ = FD::Convert("DominanceTarget_BO2"); -        } -        if (params.size()>1) als->setAlphaDomt(atof(params[1].c_str())); -        if (params.size()>2) als->setBetaDomt(atof(params[2].c_str())); -      } else if (model== "tfw_count") { -        flag_tfw_count = readList(fn,&tfw); -        tfw_count_ = FD::Convert("TargetFunctionWordsCount");         -      } else { -        cerr << "DWARF doesn't understand this model: " << model << endl; -      } -    } else { -      if (w=="tfw_count") { -        flag_tfw_count = true; -        tfw_count_ = FD::Convert("TargetFunctionWordsCount"); -      } else if (w=="oris_backward") { -        flag_oris_backward = true; -        oris_backward_ = FD::Convert("OrientationSourceBackward");  -      } else if (w=="orit_backward") { -        flag_orit_backward = true; -        orit_backward_ = FD::Convert("OrientationTargetBackward"); -      } else if (w=="explicit_soseos") { -        explicit_soseos=true; -      } else { -        cerr << "DWARF doesn't need this param: " << param << endl;  -      } -    }   -  } -  for (map<WordID,int>::const_iterator it=sfw.begin(); it!=sfw.end() && DEBUG; it++) { -    cerr << "   FW:" << TD::Convert(it->first) << endl; -  } -} - -void Dwarf::TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, -                                     const std::vector<const void*>& ant_contexts, -                                     SparseVector<double>* features, -                                     SparseVector<double>* estimated_features, -                                     void* context) const { -  if (DEBUG) cerr << "TraversalFeaturesImpl" << endl; -  double cost, bonus, bo1, bo2, bo1_bonus, bo2_bonus; -  double bdoms_state_mono= 0; double bdoms_state_nonmono = 0; -  TRule r = *edge.rule_; -  if (DEBUG) cerr << " sent_id=" << *_sent_id << ", " << smeta.GetSentenceID() << endl; -  if (DEBUG) cerr << "rule = " << r.AsString() << endl;  -  if (DEBUG) cerr << "rule[i,j] = " << edge.i_ << "," << edge.j_ << endl; -  if (*_sent_id != smeta.GetSentenceID()) { //new sentence -    *_sent_id = smeta.GetSentenceID(); -    const Lattice l = smeta.GetSourceLattice(); -    *_fwcount=0; -    for (int i=0; i<smeta.GetSourceLength(); i++) { -      if (sfw.find(l[i][0].label)!=sfw.end()) { -        *_fwcount+=1; -      } -    } -    if (DEBUG) cerr << "new sentence[" << *_sent_id << "]="<<*_fwcount<<endl; -  } -  bool nofw = als->prepare(*edge.rule_, ant_contexts, sfw, tfw,smeta.GetSourceLattice(),edge.i_,edge.j_);  -  bool isFinal = (edge.i_==0 && edge.j_==smeta.GetSourceLength() && r.GetLHS()==kGOAL); -  // prepare *nofw* outputs whether the resulting alignment, contains function words or not -  // if not, the models do not have to be calcualted and *simplify* is very simple -  if (DEBUG) cerr << "nofw = " << nofw << endl; -  if (flag_tfw_count) { -    double count = 0; -    for (int i=0; i<r.e_.size(); i++) { -      if (tfw.find(r.e_[i])!=tfw.end()) count++; -    } -    features->set_value(tfw_count_,count);   -  } -  if (flag_oris) { -    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0; -    if (!nofw) als->computeOrientationSource(toris,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus);  -    if (isFinal&&!explicit_soseos) { -      cost += bonus; -      bonus = 0; -    } -    features->set_value(oris_,cost);  -    //features->set_value(oris_bo1_,bo1);  -    //features->set_value(oris_bo2_,bo2); -    estimated_features->set_value(oris_,bonus);  -    //estimated_features->set_value(oris_bo1_,bo1_bonus);  -    //estimated_features->set_value(oris_bo2_,bo2_bonus); -  } -  if (flag_porislr) { -    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0; -    if (!nofw)  -      als->computeOrientationSourcePos(tporislr,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,poris_nlr,0); -    if (isFinal&&!explicit_soseos) { -      cost += bonus; -      bonus = 0; -    } -    features->set_value(porislr_,cost); -    estimated_features->set_value(porislr_,bonus); -  } -  if (flag_porisrl) { -    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0; -    if (!nofw) -      als->computeOrientationSourcePos(tporisrl,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,0,poris_nrl); -    if (isFinal&&!explicit_soseos) { -      cost += bonus; -      bonus = 0; -    } -    features->set_value(porisrl_,cost); -    estimated_features->set_value(porisrl_,bonus); -  } -  if (flag_pgorislr) { -    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0; -    if (!nofw) -      als->computeOrientationSourcePos(tpgorislr,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,pgoris_nlr,0); -    if (isFinal&&!explicit_soseos) { -      cost += bonus; -      bonus = 0; -    } -    features->set_value(pgorislr_,cost); -    estimated_features->set_value(pgorislr_,bonus); -  } -  if (flag_pgorisrl) { -    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0; -    if (!nofw) -      als->computeOrientationSourcePos(tpgorisrl,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,0,pgoris_nrl); -    if (isFinal&&!explicit_soseos) { -      cost += bonus; -      bonus = 0; -    } -    features->set_value(pgorisrl_,cost); -    estimated_features->set_value(pgorisrl_,bonus); -  } -  if (flag_goris) { -    cost=0; bonus=0; -    if (!nofw) als->computeOrientationSource(tgoris,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus); -    if (isFinal&&!explicit_soseos) { -      cost += bonus; -      bonus = 0; -    } -    features->set_value(goris_,cost); -    estimated_features->set_value(goris_,bonus); -  } -  if (flag_oris_backward) { -    cost=0; bonus=0; -    if (!nofw)  -      als->computeOrientationSourceBackward(toris,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus); -    if (isFinal&&!explicit_soseos) { -      cost += bonus; -      bonus = 0; -    } -    features->set_value(oris_backward_,cost); -    estimated_features->set_value(oris_backward_,bonus); -  } -  WordID _lfw = kSOS; -  WordID _rfw = kEOS;  -  if (flag_doms || flag_pdomslr || flag_pdomsrl || flag_pgdomslr || flag_pgdomsrl) { -    if (DEBUG) cerr << "   seeking lfw and rfw" << endl; -    int start = edge.i_; -    int end   = edge.j_; -    if (DEBUG) cerr << "   start=" << start << ", end=" << end << endl; -    const Lattice l = smeta.GetSourceLattice(); -    for (int idx=start-1; idx>=0; idx--) { -      if (DEBUG) cerr << "  checking idx=" << idx << ", label=" << l[idx][0].label << "-" << TD::Convert(l[idx][0].label) << endl; -      if (sfw.find(l[idx][0].label) !=sfw.end()) { -        if (DEBUG) cerr << "+"; -        _lfw=l[idx][0].label; break; -      } -    } -    for (int idx=end; idx<l.size(); idx++) { // end or end+1 -      if (DEBUG) cerr << "  checking idx=" << idx << ", label=" << l[idx][0].label << "-" << TD::Convert(l[idx][0].label) << endl; -      if (sfw.find(l[idx][0].label)!=sfw.end()) { -        if (DEBUG) cerr << "."; -        _rfw=l[idx][0].label; break; -      } -    } -    if (isFinal&&!explicit_soseos) { -      _lfw=kSOS; _rfw=kEOS; -    } -  } -  if (flag_doms) { -    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0; -    if (!nofw) als->computeDominanceSource(tdoms,_lfw,_rfw,&cost,&bonus, -                                           &bo1,&bo1_bonus,&bo2,&bo2_bonus);  -    if (DEBUG) cerr << "   COST=" << cost << ", BONUS=" << bonus << endl; -    if (isFinal&&!explicit_soseos) { -      cost += bonus; -      if (DEBUG) cerr << "    final and !explicit_soseos, thus cost = " << cost <<  endl; -      bonus = 0; -    } -    features->set_value(doms_,cost);  -    estimated_features->set_value(doms_,bonus); -  } -  if (flag_pdomslr) { -   if (DEBUG) cerr << " flag_pdomslr true, nofw=" << nofw << endl; -   if (DEBUG) cerr << "   lfw=" << _lfw << ", rfw=" << _rfw << endl; -   if (DEBUG) cerr << "   kSOS=" << kSOS << ", kEOS=" << kEOS << endl; -    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0; -    if (!nofw) als->computeDominanceSourcePos(tpdomslr,_lfw,_rfw,&cost,&bonus, -                                           &bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,pdoms_nlr,0); -    if (isFinal&&!explicit_soseos) { -      cost += bonus; -      bonus = 0; -    } -    features->set_value(pdomslr_,cost); -    estimated_features->set_value(pdomslr_,bonus);   -  } -  if (flag_pdomsrl) { -    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0; -    if (!nofw) als->computeDominanceSourcePos(tpdomsrl,_lfw,_rfw,&cost,&bonus, -                                           &bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,0,pdoms_nrl); -    if (isFinal&&!explicit_soseos) { -      cost += bonus; -      bonus = 0; -    } -    features->set_value(pdomsrl_,cost); -    estimated_features->set_value(pdomsrl_,bonus);  -  } -  if (flag_pgdomslr) { -    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0; -    if (!nofw) als->computeDominanceSourcePos(tpgdomslr,_lfw,_rfw,&cost,&bonus, -                                           &bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,pgdoms_nlr,0); -    if (isFinal&&!explicit_soseos) { -      cost += bonus; -      bonus = 0; -    } -    features->set_value(pgdomslr_,cost); -    estimated_features->set_value(pgdomslr_,bonus);   -  } -  if (flag_pgdomsrl) {    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0; -    if (!nofw) als->computeDominanceSourcePos(tpgdomsrl,_lfw,_rfw,&cost,&bonus, -                                           &bo1,&bo1_bonus,&bo2,&bo2_bonus,*_fwcount,0,pgdoms_nrl); -    if (isFinal&&!explicit_soseos) { -      cost += bonus; -      bonus = 0; -    } -    features->set_value(pgdomsrl_,cost); -    estimated_features->set_value(pgdomsrl_,bonus);  -  } - - -  if (flag_bdoms) { -    cost=0; bonus=0; bdoms_state_mono=0; bdoms_state_nonmono=0;  -    if (!nofw) -      als->computeBorderDominanceSource(tbdoms,&cost,&bonus, -        &bdoms_state_mono, &bdoms_state_nonmono,*edge.rule_, ant_contexts, sfw); -    features->set_value(bdoms_,cost); -    estimated_features->set_value(bdoms_,bonus);  -  } -  if (flag_orit) { -    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0; -    if (!nofw) als->computeOrientationTarget(torit,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus);  -    if (DEBUG) cerr << "cost=" << cost << ", bonus=" << bonus << ", bo1=" << bo1 << ", bo1_bonus=" << bo1_bonus << ", bo2=" << bo2 << ", bo2_bonus=" << bo2_bonus << endl; -    features->set_value(orit_,cost);  -    //features->set_value(orit_bo1_,bo1);  -    //features->set_value(orit_bo2_,bo2); -    estimated_features->set_value(orit_,bonus); -    //estimated_features->set_value(orit_bo1_,bo1_bonus); -    //estimated_features->set_value(orit_bo2_,bo2_bonus); -  } -  if (flag_orit_backward) { -    cost=0; bonus=0; -    if (!nofw) als->computeOrientationTargetBackward(torit,&cost,&bonus,&bo1,&bo1_bonus,&bo2,&bo2_bonus); -    features->set_value(orit_backward_,cost); -    estimated_features->set_value(orit_backward_,bonus); -  } -  if (flag_domt) { -    cost=0; bonus=0; bo1=0; bo2=0; bo1_bonus=0; bo2_bonus=0; -    WordID _lfw=-1; int start = edge.i_; -    WordID _rfw=-1; int end   = edge.j_; -    if (smeta.HasReference()) { -      const Lattice l = smeta.GetReference(); -      for (int idx=start-1; idx>=0; idx--) { -        if (l.size()>0) -          if (tfw.find(l[idx][0].label) !=tfw.end()) { -            _lfw=l[idx][0].label; break; -          } -      } -      for (int idx=end; idx<l.size(); idx++) { // end or end+1 -        if (l[idx].size()>0) -          if (tfw.find(l[idx][0].label)!=tfw.end()) { -            _rfw=l[idx][0].label; break; -          } -      } -    } -    //neighboringFWs(smeta.GetReference(),edge.i_,edge.j_,tfw,&_lfw,&_rfw); -    if (!nofw) als->computeDominanceTarget(tdomt,_lfw,_rfw,&cost,&bonus, -                                           &bo1,&bo1_bonus,&bo2,&bo2_bonus); -    features->set_value(domt_,cost);  -    //features->set_value(domt_bo1_,bo1);  -    //features->set_value(domt_bo2_,bo2); -    estimated_features->set_value(domt_,bonus); -    //estimated_features->set_value(domt_bo1_,bo1_bonus); -    //estimated_features->set_value(domt_bo2_,bo2_bonus); -  } -  int* vcontext = reinterpret_cast<int *>(context); -  if (!nofw) { -    als->BorderingSFWsOnly(); -    als->BorderingTFWsOnly(); -    als->simplify(vcontext);   -  } else { -    als->simplify_nofw(vcontext); -  } -  vcontext[50] = DoubleToInteger(bdoms_state_mono); -  vcontext[51] = DoubleToInteger(bdoms_state_nonmono); -  vcontext[STATE_SIZE-1] = Alignment::link(edge.i_,edge.j_);  -  if (DEBUG) { -    cerr << "state@traverse = "; -    for (int idx=0; idx<STATE_SIZE; idx++) cerr << idx << "." << vcontext[idx] << " "; -    cerr << endl; -    cerr << "bdoms_state_mono=" << bdoms_state_mono << ", state[50]=" << IntegerToDouble(vcontext[50]) << endl; -    cerr << "bdoms_state_nonmono=" << bdoms_state_nonmono << ", state[51]=" << IntegerToDouble(vcontext[51]) << endl; -  } -} - -int Dwarf::DoubleToInteger(double val) { -  float x = (float)val; -  float* px = &x; -  int* pix = reinterpret_cast<int *>(px); -  return *pix;  -} - -double Dwarf::IntegerToDouble(int val) { -  int *py = &val; -  float* pd = reinterpret_cast<float *>(py); -  return (double)*pd; -} - -void Dwarf::neighboringFWs(const Lattice& l, const int& i, const int& j, const map<WordID,int>& fw_hash, int* lfw, int* rfw) { -  *lfw=0; *rfw=0; -  int idx=i-l[i][0].dist2next; -  while (idx>=0) { -    if (l[idx].size()>0) {  -      if (fw_hash.find(l[idx][0].label)!=fw_hash.end()) { -        lfw++;   -      } -    } -    idx-=l[idx][0].dist2next; -  } -  idx=j+l[j][0].dist2next; -  while (idx<l.size()) { -    if (l[idx].size()>0) {  -      if (fw_hash.find(l[idx][0].label)!=fw_hash.end()) { -        rfw++; -      } -    } -    idx+=l[idx][0].dist2next; -  } -} - -bool Dwarf::readOrientation(CountTable* table, const std::string& filename, std::map<WordID,int> *fw, bool pos) { -  // the input format is -  // source target 0 1 2 3 4 0 1 2 3 4 -  // 0 -> MA, 1 -> RA, 2 -> MG, 3 -> RG, 4 -> NO_NEIGHBOR -  // first 01234 corresponds to the left neighbor, the second 01234 corresponds to the right neighbor -  // append 2 more at the end as precomputed total -   -  // TONS of hack here. CountTable should be wrapped as a class   -  // TODO: check whether the file exists or not, return false if not -  if (DEBUG) cerr << "  readOrientation(" << filename << ", pos=" << pos << ")" << endl; -  ReadFile rf(filename);   -  istream& in = *rf.stream(); -  table->setup(24,pos); -  table->ultimate = new int[24]; -  for (int i=0; i<24; i++) table->ultimate[i]=0; -  ostringstream oss; -  while (in) { -    string line; -    getline(in,line); -    if (line=="") break; -    istringstream tokenizer(line); -    string sourceidx, source, target, word; -    tokenizer >> source >> target;  -    if (pos) { -      sourceidx = source; -      source = sourceidx.substr(0,sourceidx.find_last_of("/")); -    } -    if (fw->find(TD::Convert(source))==fw->end()) fw->insert(pair<WordID,int>(TD::Convert(source),1)); - - -    int* element = new int[24]; -    element[5] = 0; -    for (int i=0; i<5; i++) { -      element[i] = 0; -      if (tokenizer >> word) element[i] = atoi(word.c_str()); -      element[5] += element[i]; -    } -    element[11] = 0; -    for (int i=6; i<11; i++) { -      element[i] = 0; -      if (tokenizer >> word) element[i] = atoi(word.c_str()); -      element[11] += element[i]; -    } -    element[17] = 0; -    for (int i=12; i<17; i++) { -      element[i] = 0; -      if (tokenizer >> word) element[i] = atoi(word.c_str()); -      element[17] += element[i]; -    } -    element[23] = 0; -    for (int i=18; i<23; i++) { -      element[i] = 0; -      if (tokenizer >> word) element[i] = atoi(word.c_str()); -      element[23] += element[i]; -    } -    for (int i=0; i<24; i++) table->ultimate[i] += element[i]; -    oss << source << " " << target; -    WordID key_id = TD::Convert(oss.str()); -    oss.str(""); -    if (table->model.find(key_id)!=table->model.end()) {   -      for (int i=0; i<24; i++) table->model[key_id][i]+=element[i]; -    } else { -      int* el2 = new int[24]; -      for (int i=0; i<24; i++) el2[i] = element[i]; -      table->model.insert(pair<WordID,int*>(key_id,el2)); -    } -     -    oss << source; -    key_id = TD::Convert(oss.str()); -    oss.str(""); -    if (table->model.find(key_id)!=table->model.end()) {     -      for (int i=0; i<24; i++) table->model[key_id][i]+=element[i]; -    } else { -      int* el2 = new int[24]; -      for (int i=0; i<24; i++) el2[i] = element[i]; -      table->model.insert(pair<WordID,int*>(key_id,el2)); -    } - -    if (pos) { -      oss << sourceidx << " " << target; -      key_id = TD::Convert(oss.str()); -      oss.str("");  -      if (table->model.find(key_id)!=table->model.end()) { -        for (int i=0; i<24; i++) table->model[key_id][i]+=element[i]; -      } else { -        int* el2 = new int[24]; -        for (int i=0; i<24; i++) el2[i] = element[i]; -        table->model.insert(pair<WordID,int*>(key_id,el2)); -      } -    } -    delete[] element; -  }   -  return true;     -} - -bool Dwarf::readList(const std::string& filename, std::map<WordID,int>* fw) { -  ReadFile rf(filename); -  istream& in = *rf.stream(); -  while (in) { -    string word; -    getline(in,word); -    if (fw->find(TD::Convert(word))==fw->end()) fw->insert(pair<WordID,int>(TD::Convert(word),1));  -  } -  return true; -} - -bool Dwarf::readDominance(CountTable* table, const std::string& filename, std::map<WordID,int>* fw, bool pos) { -  // the input format is  -  // source1 source2 target1 target2 0 1 2 3 -  // 0 -> dontcase 1->leftfirst 2->rightfirst 3->neither  -  if (DEBUG) cerr << "readDominance(" << filename << ",pos="<< pos << ")" << endl; -  ReadFile rf(filename); -  istream& in = *rf.stream(); -  table->ultimate = new int[5]; -  table->setup(5,pos); -  for (int i=0; i<5; i++) table->ultimate[i]=0; -  while (in) { -    string line, word; -    getline(in,line); -    if (line=="") break; -    string source1idx, source2idx, target1, target2, source1, source2; -    ostringstream oss;  -    WordID key_id; -    istringstream tokenizer(line); -    tokenizer >> source1 >> source2 >> target1 >> target2;  -    if (pos) { -      source1idx = source1; -      source2idx = source2; -      source1 = source1idx.substr(0,source1idx.find_last_of("/")); -      source2 = source2idx.substr(0,source2idx.find_last_of("/")); -    } -    if (fw->find(TD::Convert(source1))==fw->end()) fw->insert(pair<WordID,int>(TD::Convert(source1),1)); -    if (fw->find(TD::Convert(source2))==fw->end()) fw->insert(pair<WordID,int>(TD::Convert(source2),1)); - -    int* element = new int[5]; -    element[4]=0; -    for (int i=0; i<4; i++) { -      element[i]  = 0; -      if (tokenizer >> word) element[i] = atoi(word.c_str()); -      element[4]+=element[i]; -    } -    for (int i=0; i<5; i++) table->ultimate[i] += element[i]; - -    oss << source1 << " " << source2 << " " << target1 << " " << target2; -    key_id = TD::Convert(oss.str()); -    oss.str(""); -    if (table->model.find(key_id)!=table->model.end()) {  -      for (int i=0; i<5; i++) table->model[key_id][i]+=element[i]; -    } else { -      int* el2 = new int[5];  -      for (int i=0; i<5; i++) el2[i]=element[i]; -      table->model.insert(pair<WordID,int*>(key_id,el2)); -    } - -    oss << source1 << " " << source2; -    key_id = TD::Convert(oss.str()); -    oss.str(""); -    if (table->model.find(key_id)!=table->model.end()) {   -      for (int i=0; i<5; i++) table->model[key_id][i]+=element[i]; -    } else { -      int* el2 = new int[5];  -      for (int i=0; i<5; i++) el2[i]=element[i]; -      table->model.insert(pair<WordID,int*>(key_id,el2)); -    } - -    if (pos) { -      oss << source1idx << " " << source2idx << " " << target1 << " " << target2; -      key_id = TD::Convert(oss.str()); -      oss.str(""); -      if (table->model.find(key_id)!=table->model.end()) {   -        for (int i=0; i<5; i++) table->model[key_id][i]+=element[i]; -      } else { -        int* el2 = new int[5];  -        for (int i=0; i<5; i++) el2[i]=element[i]; -        table->model.insert(pair<WordID,int*>(key_id,el2)); -      } -    } -    delete element; -  } - -  return true;     -} - -bool Dwarf::readTags(const std::string& filename, std::map<WordID,WordID>* tags) { -  ReadFile rf(filename); -  istream& in = *rf.stream(); -  while(in) { -    string line, word, tag; -    getline(in,line); -    if (line=="") break; -    istringstream tokenizer(line); -    tokenizer >> tag >> word; -    tags->insert(pair<WordID,WordID>(TD::Convert(word),TD::Convert(tag))); -  } -  return true; -} - -bool Dwarf::generalizeOrientation(CountTable* table, const std::map<WordID,WordID>& tags, bool pos) { -  map<string,int*> generalized; -  for (map<WordID,int*>::iterator it=table->model.begin(); it!=table->model.end(); it++) { -    string source, target; -    istringstream tokenizer(TD::Convert(it->first)); -    tokenizer >> source >> target; -    string idx = ""; -    if (pos) { -      int found = source.find_last_of("/"); -      if (found!=string::npos && found>0) {  -        idx = source.substr(found+1); -        source = source.substr(0,found); -      } -    } -    map<WordID,WordID>::const_iterator tags_iter = tags.find(TD::Convert(source)); -    if (tags_iter!=tags.end()) { -      ostringstream genkey; -      genkey << TD::Convert(tags_iter->second); -      if (idx!="") genkey << "/" << idx; -      if (target!="") genkey << " " << target; -      int* model; -      if (generalized.find(genkey.str())!=generalized.end()) { -        model = generalized[genkey.str()]; -        for (int i=0; i<24; i++) model[i] += it->second[i]; -      } else { -        int* el = new int[24]; -        for (int i=0; i<24; i++) el[i] = it->second[i]; -        generalized.insert(pair<string,int*>(genkey.str(),el)); -      } -    } -  } -  for (map<WordID,int*>::iterator it=table->model.begin(); it!=table->model.end(); it++) { -    string source, target; -    istringstream tokenizer(TD::Convert(it->first)); -    tokenizer >> source >> target; -    string idx = ""; -    if (pos) { -      int found = source.find_last_of("/"); -      if (found!=string::npos && found>0) { -        idx = source.substr(found+1); -        source = source.substr(0,found); -      } -    } -    map<WordID,WordID>::const_iterator tags_iter = tags.find(TD::Convert(source)); -    if (tags_iter!=tags.end()) { -      ostringstream genkey; -      genkey << TD::Convert(tags_iter->second); -      if (idx!="") genkey << "/" << idx; -      if (target!="") genkey << " " << target; -      if (generalized.find(genkey.str())!=generalized.end()) { -        delete it->second; -        it->second = generalized[genkey.str()]; -      } -    } -  } -  return false; // no idea if this is right -} -  - - -bool Dwarf::generalizeDominance(CountTable* table, const std::map<WordID,WordID>& tags, bool pos) { -  map<string,int*> generalized; -  ostringstream oss; -  for (map<WordID,int*>::iterator it=table->model.begin(); it!=table->model.end(); it++) { -    string source1, source2, target1, target2; -    string idx1 = ""; string idx2 = ""; -    istringstream tokenizer(TD::Convert(it->first)); -    tokenizer >> source1 >> source2 >> target1 >> target2; -    if (DEBUG) cerr << "source1=|" << source1 << "|, source2=|" << source2 << "|, target1=|" << target1 << "|, target2=|" << target2 << "|" << endl; -    if (pos) { -      int found1 = source1.find_last_of("/"); -      int found2 = source2.find_last_of("/"); -      if (found1!=string::npos && found2!=string::npos && found1>0 && found2>0) { -        idx1 = source1.substr(found1+1); -        source1 = source1.substr(0,found1); -        idx2 = source2.substr(found2+1); -        source2 = source2.substr(0,found2); -      } -    } -    if (DEBUG)  -      cerr << "[U]source1='" << source1 << "', idx1='"<< idx1 << "', source2='" << source2 << "', idx2='"<< idx2 << "', target1='" << target1 << "', target2='" << target2 << "'" << endl; -    map<WordID,WordID>::const_iterator tags_iter1 = tags.find(TD::Convert(source1)); -    map<WordID,WordID>::const_iterator tags_iter2 = tags.find(TD::Convert(source2)); -    if (tags_iter1!=tags.end())  -      source1 = TD::Convert(tags_iter1->second); -    oss << source1; -    if (idx1!="") oss << "/" << idx1; -    if (tags_iter2!=tags.end()) -      source2 = TD::Convert(tags_iter2->second); -    oss << " " << source2; -    if (idx2!="") oss << "/" << idx2; -    if (target1!="" && target2!="") oss << " " << target1 << " " << target2; -     -    if (DEBUG) cerr << "generalized key = '" << oss.str() << "'" << endl;  -    if (generalized.find(oss.str())!=generalized.end()) { -      int* model = generalized[oss.str()]; -      for (int i=0; i<5; i++) model[i] += it->second[i]; -    } else { -      int* model = new int[5]; -      for (int i=0; i<5; i++) model[i] = it->second[i]; -      generalized.insert(pair<string,int*>(oss.str(),model)); -    }     -    oss.str(""); -  } -   -  if (DEBUG) { -    for (map<string,int*>::const_iterator it=generalized.begin(); it!=generalized.end(); it++) { -      cerr << "GENERALIZED = " << it->first << ", "; -      for (int i=0; i<5; i++) cerr << it->second[i] << " "; -      cerr << endl; -    } -  } - -  for (map<WordID,int*>::iterator it=table->model.begin(); it!=table->model.end(); it++) { -    string source1, source2, target1, target2; -    string idx1 = ""; string idx2 = ""; -    istringstream tokenizer(TD::Convert(it->first)); -    tokenizer >> source1 >> source2 >> target1 >> target2; -    if (pos) { -      int found1 = source1.find_last_of("/"); -      int found2 = source2.find_last_of("/"); -      if (found1!=string::npos && found2!=string::npos && found1>0 && found2>0) { -        idx1 = source1.substr(found1+1); -        source1 = source1.substr(0,found1); -        idx2 = source2.substr(found2+1); -        source2 = source2.substr(0,found2); -      } -    } -    map<WordID,WordID>::const_iterator tags_iter1 = tags.find(TD::Convert(source1)); -    map<WordID,WordID>::const_iterator tags_iter2 = tags.find(TD::Convert(source2)); -    if (tags_iter1!=tags.end()) -      source1 = TD::Convert(tags_iter1->second); -    oss << source1; -    if (idx1!="") oss << "/" << idx1; -    if (tags_iter2!=tags.end()) -      source2 = TD::Convert(tags_iter2->second); -    oss << " " << source2; -    if (idx2!="") oss << "/" << idx2; -    if (target1!="" && target2!="") oss << " " << target1 << " " << target2; -     -    if (generalized.find(oss.str())!=generalized.end()) { -      if (DEBUG) cerr << " generalizing "<< TD::Convert(it->first) << " into " << oss.str() << endl;  -      if (DEBUG) { -        cerr << "  model from "; -        for (int i=0; i<5; i++) cerr << it->second[i] << " ";  -        cerr << endl; -      } -      delete it->second; -      it->second = generalized[oss.str()]; -      if (DEBUG) { -        cerr << "  into "; -        for (int i=0; i<5; i++) cerr << it->second[i] << " ";  -        cerr << endl; -      } -    }     -    oss.str(""); -  } - -} diff --git a/decoder/ff_dwarf.h b/decoder/ff_dwarf.h deleted file mode 100644 index 3d6a7da6..00000000 --- a/decoder/ff_dwarf.h +++ /dev/null @@ -1,100 +0,0 @@ -#include <vector> -#include <map> -#include <string> -#include "ff.h" -#include "dwarf.h" -#include "lattice.h" - -using namespace std; - -class Dwarf : public FeatureFunction { - public: -  Dwarf(const std::string& param); -  /* State-related param -     STATE_SIZE: the number of ints  -     MAXIMUM_ALIGNMENTS: the maximum number of alignments in the states,  -                         each alignment point is encoded in one int  -                         (the first two bytes for source, and the remaining one for target) -  */ -  static const int STATE_SIZE=53;  -  static const int IMPOSSIBLY_LARGE_POS = 9999999; -  static const int MAXIMUM_ALIGNMENTS=37; -  /* Read from file the Orientation(Source|Target model parameter. */  -  static bool readOrientation(CountTable* table, const std::string& filename, std::map<WordID,int> *fw, bool pos=false); -  /* Read from file the Dominance(Source|Target) model parameter. */  -  static bool readDominance(CountTable* table, const std::string& filename, std::map<WordID,int> *fw, bool pos=false); -  static bool readList(const std::string& filename, std::map<WordID,int>* fw);      -  static double IntegerToDouble(int val); -  static int DoubleToInteger(double val); -  bool readTags(const std::string& filename, std::map<WordID,WordID>* tags); -  bool generalizeOrientation(CountTable* table, const std::map<WordID,WordID>& tags, bool pos=false);   -  bool generalizeDominance(CountTable* table, const std::map<WordID,WordID>& tags, bool pos=false);   -  static void stripIndex(const string& source, string* pkey, string* pidx) { -    if (DEBUG) cerr << "    stripIndex(" << source << ")" << endl; -    int found = source.find_last_of("/"); -    string idx = source.substr(found+1); -    string key = source.substr(0,found); -    if (DEBUG) cerr << "      found=" << found << "," << key << "," << idx << endl; -    pkey = &key; -    pidx = &idx; -  } - - - protected: -  /* The high-level workflow is as follow: -     1. call *als->prepare*, which constructs the full alignment of the edge while taking into account the antecedents -        also in this call, function words are identified. Most of the work in this call is to make sure the indexes  -        of the alignments (including the function words) are consistent with the newly created alignment -     2. call *als->computeOrientationSource*, *als->computeOrientationTarget*,  -        *als->computeDominanceSource*, or *als->computeDominanceTarget* -        and pass the resulting score to either *features* or to *estimated_features* -     3. call *als->BorderingSFWsOnly()* and *als->BorderingTFWsOnly()*, which removes records of all function word -        alignments except those at the borders. Note that fw alignments kept may be more than two on each side -        for examples if there are a number of unaligned fw alignments before the leftmost alignment or the rightmost one -     4. call *als->simplify()*, which assigns the state of this edge (*context*). It simplifies the alignment space to  -        its most compact representation, enough to compute the unscored models. This is done by observing the surviving -        function word alignments set by 3. -  */  -  void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const HG::Edge& edge, -                                     const std::vector<const void*>& ant_contexts, -                                     SparseVector<double>* features, -                                     SparseVector<double>* estimated_features, -                                     void* context) const; - private: -  Alignment* als; -  /* Feature IDs set by calling FD::Convert(model's string) */ -  int oris_, oris_bo1_, oris_bo2_, orit_, orit_bo1_, orit_bo2_; -  int oris_backward_, orit_backward_, porislr_, porisrl_, goris_, pgorislr_, pgorisrl_; -  int pdomslr_, pdomsrl_, pgdomslr_, pgdomsrl_; -  int doms_, doms_bo1_, doms_bo2_, domt_, domt_bo1_, domt_bo2_; -  int tfw_count_; -  int bdoms_;  -  int poris_count; -  int pgoris_count; -  int poris_nlr, poris_nrl; // maximum depth (1->from the beginning of the sentence, 2-> from the end of the sentence) -  int pgoris_nlr, pgoris_nrl; -  int pdoms_nlr, pdoms_nrl; -  int pgdoms_nlr, pgdoms_nrl; -  int* _sent_id; -  int* _fwcount; -  WordID kSOS; -  WordID kEOS; -  string sSOS; -  string sEOS; -  WordID kGOAL; -  /* model's flag, if set true will invoke the model scoring */ -  bool flag_oris, flag_orit, flag_doms, flag_domt, flag_tfw_count, flag_oris_backward, flag_orit_backward, flag_bdoms; -  bool flag_porislr, flag_porisrl, flag_goris, flag_pgorislr, flag_pgorisrl; -  bool explicit_soseos; -  bool flag_pdomslr, flag_pdomsrl, flag_pgdomslr, flag_pgdomsrl, flag_gdoms; -  /* a collection of Source function words (sfw) and Target function words (tfw) */ -  std::map<WordID,int> sfw; -  std::map<WordID,int> tfw; -  std::map<WordID,WordID> tags; -  /* a collection of model's parameter */ -  CountTable toris, torit, tdoms, tbdoms, tdomt, tporislr, tporisrl, tgoris, tpgorislr, tpgorisrl; -  CountTable tpdomslr, tpdomsrl, tpgdomslr, tpgdomsrl; -  void neighboringFWs(const Lattice& l, const int& i, const int& j, const map<WordID,int>& fw_hash, int* lfw, int* rfw); -}; - diff --git a/decoder/ff_external.cc b/decoder/ff_external.cc index dea0e20f..6ee4b2cf 100644 --- a/decoder/ff_external.cc +++ b/decoder/ff_external.cc @@ -19,7 +19,7 @@ ExternalFeature::ExternalFeature(const string& param) {      cerr << "External requires a path to a dynamic library!\n";      abort();    } -  lib_handle = dlopen(file.c_str(), RTLD_LAZY); +  lib_handle = dlopen(file.c_str(), RTLD_LAZY | RTLD_GLOBAL);    if (!lib_handle) {      cerr << "dlopen reports: " << dlerror() << endl;      cerr << "Did you provide a full path to the dynamic library?\n"; diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index 6ec7b4f3..bc51076f 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -61,11 +61,6 @@ char const* usage_verbose="-n determines the name of the feature (and its weight  #include "hg.h"  #include "stringlib.h" -#ifdef HAVE_RANDLM -// http://randlm.sourceforge.net/ -#include "RandLM.h" -#endif -  using namespace std;  string LanguageModel::usage(bool param,bool verbose) { @@ -542,99 +537,3 @@ void LanguageModel::FinalTraversalFeatures(const void* ant_state,    features->set_value(fid_, imp().FinalTraversalCost(ant_state));  } -#ifdef HAVE_RANDLM -struct RandLMImpl : public LanguageModelImpl { -  RandLMImpl(int order, randlm::RandLM* rlm) : -      LanguageModelImpl(order), -      rlm_(rlm), -      oov_(rlm->getWordID(rlm->getOOV())), -      rb_(1000, oov_) { -    map<int, randlm::WordID> map_cdec2randlm; -    int max_wordid = 0; -    for(map<randlm::Word, randlm::WordID>::const_iterator it = rlm->vocabStart(); -        it != rlm->vocabEnd(); ++it) { -      const int cur = TD::Convert(it->first); -      map_cdec2randlm[TD::Convert(it->first)] = it->second; -      if (cur > max_wordid) max_wordid = cur; -    } -    cdec2randlm_.resize(max_wordid + 1, oov_); -    for (map<int, randlm::WordID>::iterator it = map_cdec2randlm.begin(); -         it != map_cdec2randlm.end(); ++it) -      cdec2randlm_[it->first] = it->second; -    map_cdec2randlm.clear(); -  } - -  inline randlm::WordID Convert2RandLM(int w) { -    return (w < cdec2randlm_.size() ? cdec2randlm_[w] : oov_); -  } - -  virtual double WordProb(int word, int* context) { -    int i = order_; -    int c = 1; -    rb_[i] = Convert2RandLM(word); -    while (i > 1 && *context > 0) { -      --i; -      rb_[i] = Convert2RandLM(*context); -      ++context; -      ++c; -    } -    const void* finalState = 0; -    int found; -    //cerr << "I = " << i << endl; -    return rlm_->getProb(&rb_[i], c, &found, &finalState); -  } - private: -  boost::shared_ptr<randlm::RandLM> rlm_; -  randlm::WordID oov_; -  vector<randlm::WordID> cdec2randlm_; -  vector<randlm::WordID> rb_; -}; - -LanguageModelRandLM::LanguageModelRandLM(const string& param) : -    fid_(FD::Convert("RandLM")) { -  vector<string> argv; -  int argc = SplitOnWhitespace(param, &argv); -  int order = 3; -  // TODO add support for -n FeatureName -  string filename; -  if (argc < 1) { cerr << "RandLM requires a filename, minimally!\n"; abort(); } -  else if (argc == 1) { filename = argv[0]; } -  else if (argc == 2 || argc > 3) { cerr << "Don't understand 'RandLM " << param << "'\n"; } -  else if (argc == 3) { -    if (argv[0] == "-o") { -      order = atoi(argv[1].c_str()); -      filename = argv[2]; -    } else if (argv[1] == "-o") { -      order = atoi(argv[2].c_str()); -      filename = argv[0]; -    } -  } -//  set_order(order); -  int cache_MB = 200; // increase cache size -  randlm::RandLM* rlm = randlm::RandLM::initRandLM(filename, order, cache_MB); -  assert(rlm != NULL); -  pimpl_ = new RandLMImpl(order, rlm); -} - -LanguageModelRandLM::~LanguageModelRandLM() { -  delete pimpl_; -} - -void LanguageModelRandLM::TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                          const Hypergraph::Edge& edge, -                                          const vector<const void*>& ant_states, -                                          SparseVector<double>* features, -                                          SparseVector<double>* estimated_features, -                                          void* state) const { -  (void) smeta; -  features->set_value(fid_, imp().LookupWords(*edge.rule_, ant_states, state)); -  estimated_features->set_value(fid_, imp().EstimateProb(state)); -} - -void LanguageModelRandLM::FinalTraversalFeatures(const void* ant_state, -                                           SparseVector<double>* features) const { -  features->set_value(fid_, imp().FinalTraversalCost(ant_state)); -} - -#endif - diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h index 94e18f00..85e79704 100644 --- a/decoder/ff_lm.h +++ b/decoder/ff_lm.h @@ -69,26 +69,4 @@ class LanguageModel : public FeatureFunction {    /* mutable */ LanguageModelInterface* pimpl_;  }; -#ifdef HAVE_RANDLM -class LanguageModelRandLM : public FeatureFunction { - public: -  // param = "filename.lm [-o n]" -  LanguageModelRandLM(const std::string& param); -  ~LanguageModelRandLM(); -  virtual void FinalTraversalFeatures(const void* context, -                                      SparseVector<double>* features) const; -  std::string DebugStateToString(const void* state) const; - protected: -  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const HG::Edge& edge, -                                     const std::vector<const void*>& ant_contexts, -                                     SparseVector<double>* features, -                                     SparseVector<double>* estimated_features, -                                     void* out_context) const; - private: -  const int fid_; -  mutable LanguageModelImpl* pimpl_; -}; -#endif -  #endif diff --git a/decoder/ff_parse_match.cc b/decoder/ff_parse_match.cc index ed556b91..58026975 100644 --- a/decoder/ff_parse_match.cc +++ b/decoder/ff_parse_match.cc @@ -42,10 +42,8 @@ struct ParseMatchFeaturesImpl {    void InitializeGrids(const string& tree, unsigned src_len) {      assert(tree.size() > 0); -    //fids_cat.clear();      fids_ef.clear();      src_tree.clear(); -    //fids_cat.resize(src_len, src_len + 1);      fids_ef.resize(src_len, src_len + 1);      src_tree.resize(src_len, src_len + 1, TD::Convert("XX"));      ParseTreeString(tree, src_len); @@ -112,7 +110,7 @@ struct ParseMatchFeaturesImpl {      int fid_ef = FD::Convert("PM");      int min_dist; // minimal distance to next syntactic constituent of this rule's LHS      int summed_min_dists; // minimal distances of LHS and NTs summed up -    if (TD::Convert(lhs).compare("XX") != 0)  +    if (TD::Convert(lhs).compare("XX") != 0)          min_dist= 0;      // compute the distance to the next syntactical constituent      else { @@ -131,7 +129,7 @@ struct ParseMatchFeaturesImpl {              ok = 1;              break;            } -          // check if removing k words from the rule span will  +          // check if removing k words from the rule span will            // lead to a syntactical constituent            else {              //cerr << "Hilfe...!" << endl; @@ -144,7 +142,7 @@ struct ParseMatchFeaturesImpl {                ok = 1;                break;              } -          }     +          }          }          if (ok) break;        } @@ -183,9 +181,9 @@ struct ParseMatchFeaturesImpl {      return min_dist;    } -  Array2D<WordID> src_tree;  // src_tree(i,j) NT = type +  Array2D<WordID> src_tree; // src_tree(i,j) NT = type    unsigned int src_sent_len; -  mutable Array2D<map<const TRule*, int> > fids_ef;    // fires for fully lexicalized +  mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized    int scoring_method;  }; @@ -214,5 +212,9 @@ void ParseMatchFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,  }  void ParseMatchFeatures::PrepareForInput(const SentenceMetadata& smeta) { -  impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); +  ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree")); +  string tree; +  f.ReadAll(tree); +  impl->InitializeGrids(tree, smeta.GetSourceLength());  } + diff --git a/decoder/ff_parse_match.h b/decoder/ff_parse_match.h index fa73481a..7820b418 100644 --- a/decoder/ff_parse_match.h +++ b/decoder/ff_parse_match.h @@ -23,3 +23,4 @@ class ParseMatchFeatures : public FeatureFunction {  };  #endif + diff --git a/decoder/ff_soft_syntax.cc b/decoder/ff_soft_syntax.cc index 9981fa45..23fe87bd 100644 --- a/decoder/ff_soft_syntax.cc +++ b/decoder/ff_soft_syntax.cc @@ -13,16 +13,15 @@  using namespace std; -// Implements the soft syntactic features described in  +// Implements the soft syntactic features described in  // Marton and Resnik (2008): "Soft Syntacitc Constraints for Hierarchical Phrase-Based Translation".  // Source trees must be represented in Penn Treebank format,  // e.g. (S (NP John) (VP (V left))). -struct SoftSyntacticFeaturesImpl { -  SoftSyntacticFeaturesImpl(const string& param) { +struct SoftSyntaxFeaturesImpl { +  SoftSyntaxFeaturesImpl(const string& param) {      vector<string> labels = SplitOnWhitespace(param); -	for (unsigned int i = 0; i < labels.size(); i++)  -      //cerr << "Labels: " << labels.at(i) << endl; +  	//for (unsigned int i = 0; i < labels.size(); i++) { cerr << "Labels: " << labels.at(i) << endl; }      for (unsigned int i = 0; i < labels.size(); i++) {        string label = labels.at(i);        pair<string, string> feat_label; @@ -34,10 +33,8 @@ struct SoftSyntacticFeaturesImpl {    void InitializeGrids(const string& tree, unsigned src_len) {      assert(tree.size() > 0); -    //fids_cat.clear();      fids_ef.clear();      src_tree.clear(); -    //fids_cat.resize(src_len, src_len + 1);      fids_ef.resize(src_len, src_len + 1);      src_tree.resize(src_len, src_len + 1, TD::Convert("XX"));      ParseTreeString(tree, src_len); @@ -99,7 +96,7 @@ struct SoftSyntacticFeaturesImpl {      const WordID lhs = src_tree(i,j);  	string lhs_str = TD::Convert(lhs);      //cerr << "LHS: " << lhs_str << " from " << i << " to " << j << endl; -	//cerr << "RULE :"<< rule << endl; +	  //cerr << "RULE :"<< rule << endl;      int& fid_ef = fids_ef(i,j)[&rule];      for (unsigned int i = 0; i < feat_labels.size(); i++) {        ostringstream os; @@ -110,10 +107,10 @@ struct SoftSyntacticFeaturesImpl {        switch(feat_type) {          case '2':            if (lhs_str.compare(label) == 0) { -            os << "SYN:" << label << "_conform"; +            os << "SOFT:" << label << "_conform";            }            else { -            os << "SYN:" << label << "_cross"; +            os << "SOFT:" << label << "_cross";            }            fid_ef = FD::Convert(os.str());            if (fid_ef > 0) { @@ -122,11 +119,11 @@ struct SoftSyntacticFeaturesImpl {            }            break;          case '_': -          os << "SYN:" << label; +          os << "SOFT:" << label;            fid_ef = FD::Convert(os.str());            if (lhs_str.compare(label) == 0) {              if (fid_ef > 0) { -            //cerr << "Feature: " << os.str() << endl; +              //cerr << "Feature: " << os.str() << endl;                feats->set_value(fid_ef, 1.0);              }            } @@ -139,7 +136,7 @@ struct SoftSyntacticFeaturesImpl {            break;          case '+':            if (lhs_str.compare(label) == 0) { -            os << "SYN:" << label << "_conform"; +            os << "SOFT:" << label << "_conform";              fid_ef = FD::Convert(os.str());              if (fid_ef > 0) {                //cerr << "Feature: " << os.str() << endl; @@ -147,10 +144,10 @@ struct SoftSyntacticFeaturesImpl {              }            }            break; -        case '-':  -          //cerr << "-" << endl;            +        case '-': +          //cerr << "-" << endl;            if (lhs_str.compare(label) != 0) { -            os << "SYN:" << label << "_cross"; +            os << "SOFT:" << label << "_cross";              fid_ef = FD::Convert(os.str());              if (fid_ef > 0) {                //cerr << "Feature :" << os.str() << endl; @@ -167,22 +164,22 @@ struct SoftSyntacticFeaturesImpl {      return lhs;    } -  Array2D<WordID> src_tree;  // src_tree(i,j) NT = type -  mutable Array2D<map<const TRule*, int> > fids_ef;    // fires for fully lexicalized +  Array2D<WordID> src_tree; // src_tree(i,j) NT = type +  mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized    vector<pair<string, string> > feat_labels;  }; -SoftSyntacticFeatures::SoftSyntacticFeatures(const string& param) : +SoftSyntaxFeatures::SoftSyntaxFeatures(const string& param) :      FeatureFunction(sizeof(WordID)) { -  impl = new SoftSyntacticFeaturesImpl(param); +  impl = new SoftSyntaxFeaturesImpl(param);  } -SoftSyntacticFeatures::~SoftSyntacticFeatures() { +SoftSyntaxFeatures::~SoftSyntaxFeatures() {    delete impl;    impl = NULL;  } -void SoftSyntacticFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void SoftSyntaxFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge,                                       const vector<const void*>& ant_contexts,                                       SparseVector<double>* features, @@ -196,6 +193,10 @@ void SoftSyntacticFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,       impl->FireFeatures(*edge.rule_, edge.i_, edge.j_, ants, features);  } -void SoftSyntacticFeatures::PrepareForInput(const SentenceMetadata& smeta) { -  impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); +void SoftSyntaxFeatures::PrepareForInput(const SentenceMetadata& smeta) { +  ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree")); +  string tree; +  f.ReadAll(tree); +  impl->InitializeGrids(tree, smeta.GetSourceLength());  } + diff --git a/decoder/ff_soft_syntax.h b/decoder/ff_soft_syntax.h index 79352f49..e71825d5 100644 --- a/decoder/ff_soft_syntax.h +++ b/decoder/ff_soft_syntax.h @@ -1,15 +1,15 @@ -#ifndef _FF_SOFTSYNTAX_H_ -#define _FF_SOFTSYNTAX_H_ +#ifndef _FF_SOFT_SYNTAX_H_ +#define _FF_SOFT_SYNTAX_H_  #include "ff.h"  #include "hg.h" -struct SoftSyntacticFeaturesImpl; +struct SoftSyntaxFeaturesImpl; -class SoftSyntacticFeatures : public FeatureFunction { +class SoftSyntaxFeatures : public FeatureFunction {   public: -  SoftSyntacticFeatures(const std::string& param); -  ~SoftSyntacticFeatures(); +  SoftSyntaxFeatures(const std::string& param); +  ~SoftSyntaxFeatures();   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge, @@ -19,9 +19,9 @@ class SoftSyntacticFeatures : public FeatureFunction {                                       void* context) const;    virtual void PrepareForInput(const SentenceMetadata& smeta);   private: -  SoftSyntacticFeaturesImpl* impl; +  SoftSyntaxFeaturesImpl* impl;  }; -  #endif + diff --git a/decoder/ff_soft_syntax2.cc b/decoder/ff_soft_syntax_mindist.cc index 121bc39b..a23f70f8 100644 --- a/decoder/ff_soft_syntax2.cc +++ b/decoder/ff_soft_syntax_mindist.cc @@ -1,4 +1,4 @@ -#include "ff_soft_syntax2.h" +#include "ff_soft_syntax_mindist.h"  #include <cstdio>  #include <sstream> @@ -13,16 +13,18 @@  using namespace std; -// Implements the soft syntactic features described in  +// Implements the soft syntactic features described in  // Marton and Resnik (2008): "Soft Syntacitc Constraints for Hierarchical Phrase-Based Translation".  // Source trees must be represented in Penn Treebank format,  // e.g. (S (NP John) (VP (V left))). +// +// This variant accepts fuzzy matches, choosing the constituent with +// minimum distance. -struct SoftSyntacticFeatures2Impl { -  SoftSyntacticFeatures2Impl(const string& param) { +struct SoftSyntaxFeaturesMindistImpl { +  SoftSyntaxFeaturesMindistImpl(const string& param) {      vector<string> labels = SplitOnWhitespace(param); -	//for (unsigned int i = 0; i < labels.size(); i++)  -      //cerr << "Labels: " << labels.at(i) << endl; +	  //for (unsigned int i = 0; i < labels.size(); i++) { cerr << "Labels: " << labels.at(i) << endl; }      for (unsigned int i = 0; i < labels.size(); i++) {        string label = labels.at(i);        pair<string, string> feat_label; @@ -30,14 +32,12 @@ struct SoftSyntacticFeatures2Impl {        feat_label.second = label.at(label.size() - 1);        feat_labels.push_back(feat_label);      } -  }  +  }    void InitializeGrids(const string& tree, unsigned src_len) {      assert(tree.size() > 0); -    //fids_cat.clear();      fids_ef.clear();      src_tree.clear(); -    //fids_cat.resize(src_len, src_len + 1);      fids_ef.resize(src_len, src_len + 1);      src_tree.resize(src_len, src_len + 1, TD::Convert("XX"));      ParseTreeString(tree, src_len); @@ -99,14 +99,14 @@ struct SoftSyntacticFeatures2Impl {      const WordID lhs = src_tree(i,j);  	string lhs_str = TD::Convert(lhs);      //cerr << "LHS: " << lhs_str << " from " << i << " to " << j << endl; -	//cerr << "RULE :"<< rule << endl; +	  //cerr << "RULE :"<< rule << endl;      int& fid_ef = fids_ef(i,j)[&rule];      string lhs_to_str = TD::Convert(lhs);      int min_dist;      string min_dist_label;      if (lhs_to_str.compare("XX") != 0) {        min_dist = 0; -      min_dist_label = lhs_to_str;       +      min_dist_label = lhs_to_str;      }      else {        int ok = 0; @@ -128,7 +128,7 @@ struct SoftSyntacticFeatures2Impl {                min_dist_label = (TD::Convert(src_tree(l_rem, r_rem)));                break;              } -          }     +          }          }          if (ok) break;        } @@ -146,10 +146,10 @@ struct SoftSyntacticFeatures2Impl {          case '2':            if (min_dist_label.compare(label) == 0) {              if (min_dist == 0) { -              os << "SYN:" << label << "_conform"; +              os << "SOFTM:" << label << "_conform";              }              else { -              os << "SYN:" << label << "_cross"; +              os << "SOFTM:" << label << "_cross";              }              fid_ef = FD::Convert(os.str());              //cerr << "Feature :" << os.str() << endl; @@ -157,7 +157,7 @@ struct SoftSyntacticFeatures2Impl {            }            break;          case '_': -          os << "SYN:" << label; +          os << "SOFTM:" << label;            fid_ef = FD::Convert(os.str());            if (min_dist_label.compare(label) == 0) {              //cerr << "Feature: " << os.str() << endl; @@ -172,7 +172,7 @@ struct SoftSyntacticFeatures2Impl {            break;          case '+':            if (min_dist_label.compare(label) == 0) { -            os << "SYN:" << label << "_conform"; +            os << "SOFTM:" << label << "_conform";              fid_ef = FD::Convert(os.str());              if (min_dist == 0) {                //cerr << "Feature: " << os.str() << endl; @@ -180,10 +180,10 @@ struct SoftSyntacticFeatures2Impl {              }            }            break; -        case '-':  -          //cerr << "-" << endl;            +        case '-': +          //cerr << "-" << endl;            if (min_dist_label.compare(label) != 0) { -            os << "SYN:" << label << "_cross"; +            os << "SOFTM:" << label << "_cross";              fid_ef = FD::Convert(os.str());              if (min_dist > 0) {                //cerr << "Feature :" << os.str() << endl; @@ -200,22 +200,22 @@ struct SoftSyntacticFeatures2Impl {      return lhs;    } -  Array2D<WordID> src_tree;  // src_tree(i,j) NT = type -  mutable Array2D<map<const TRule*, int> > fids_ef;    // fires for fully lexicalized +  Array2D<WordID> src_tree; // src_tree(i,j) NT = type +  mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized    vector<pair<string, string> > feat_labels;  }; -SoftSyntacticFeatures2::SoftSyntacticFeatures2(const string& param) : +SoftSyntaxFeaturesMindist::SoftSyntaxFeaturesMindist(const string& param) :      FeatureFunction(sizeof(WordID)) { -  impl = new SoftSyntacticFeatures2Impl(param); +  impl = new SoftSyntaxFeaturesMindistImpl(param);  } -SoftSyntacticFeatures2::~SoftSyntacticFeatures2() { +SoftSyntaxFeaturesMindist::~SoftSyntaxFeaturesMindist() {    delete impl;    impl = NULL;  } -void SoftSyntacticFeatures2::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void SoftSyntaxFeaturesMindist::TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge,                                       const vector<const void*>& ant_contexts,                                       SparseVector<double>* features, @@ -229,6 +229,10 @@ void SoftSyntacticFeatures2::TraversalFeaturesImpl(const SentenceMetadata& smeta       impl->FireFeatures(*edge.rule_, edge.i_, edge.j_, ants, features);  } -void SoftSyntacticFeatures2::PrepareForInput(const SentenceMetadata& smeta) { -  impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); +void SoftSyntaxFeaturesMindist::PrepareForInput(const SentenceMetadata& smeta) { +  ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree")); +  string tree; +  f.ReadAll(tree); +  impl->InitializeGrids(tree, smeta.GetSourceLength());  } + diff --git a/decoder/ff_soft_syntax2.h b/decoder/ff_soft_syntax_mindist.h index 4de91d86..bf938b38 100644 --- a/decoder/ff_soft_syntax2.h +++ b/decoder/ff_soft_syntax_mindist.h @@ -1,15 +1,15 @@ -#ifndef _FF_SOFTSYNTAX2_H_ -#define _FF_SOFTSYNTAX2_H_ +#ifndef _FF_SOFT_SYNTAX_MINDIST_H_ +#define _FF_SOFT_SYNTAX_MINDIST_H_  #include "ff.h"  #include "hg.h" -struct SoftSyntacticFeatures2Impl; +struct SoftSyntaxFeaturesMindistImpl; -class SoftSyntacticFeatures2 : public FeatureFunction { +class SoftSyntaxFeaturesMindist : public FeatureFunction {   public: -  SoftSyntacticFeatures2(const std::string& param); -  ~SoftSyntacticFeatures2(); +  SoftSyntaxFeaturesMindist(const std::string& param); +  ~SoftSyntaxFeaturesMindist();   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,                                       const Hypergraph::Edge& edge, @@ -19,9 +19,9 @@ class SoftSyntacticFeatures2 : public FeatureFunction {                                       void* context) const;    virtual void PrepareForInput(const SentenceMetadata& smeta);   private: -  SoftSyntacticFeatures2Impl* impl; +  SoftSyntaxFeaturesMindistImpl* impl;  }; -  #endif + diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc index a1997695..6b183863 100644 --- a/decoder/ff_source_syntax.cc +++ b/decoder/ff_source_syntax.cc @@ -2,8 +2,13 @@  #include <sstream>  #include <stack> +#ifndef HAVE_OLD_CPP +# include <unordered_set> +#else +# include <tr1/unordered_set> +namespace std { using std::tr1::unordered_set; } +#endif -#include "hg.h"  #include "sentence_metadata.h"  #include "array2d.h"  #include "filelib.h" @@ -24,6 +29,17 @@ inline int SpanSizeTransform(unsigned span_size) {  struct SourceSyntaxFeaturesImpl {    SourceSyntaxFeaturesImpl() {} +  SourceSyntaxFeaturesImpl(const string& param) { +    if (!(param.compare("") == 0)) { +      string triggered_features_fn = param; +      ReadFile triggered_features(triggered_features_fn); +      string in; +      while(getline(*triggered_features, in)) { +        feature_filter.insert(FD::Convert(in)); +      } +    } +  } +    void InitializeGrids(const string& tree, unsigned src_len) {      assert(tree.size() > 0);      //fids_cat.clear(); @@ -93,7 +109,7 @@ struct SourceSyntaxFeaturesImpl {      if (fid_ef <= 0) {        ostringstream os;        //ostringstream os2; -      os << "SYN:" << TD::Convert(lhs); +      os << "SSYN:" << TD::Convert(lhs);        //os2 << "SYN:" << TD::Convert(lhs) << '_' << SpanSizeTransform(j - i);        //fid_cat = FD::Convert(os2.str());        os << ':'; @@ -118,21 +134,28 @@ struct SourceSyntaxFeaturesImpl {        }        fid_ef = FD::Convert(os.str());      } -    //if (fid_cat > 0) -    //  feats->set_value(fid_cat, 1.0); -    if (fid_ef > 0) -      feats->set_value(fid_ef, 1.0); +    if (fid_ef > 0) { +      if (feature_filter.size()>0) { +        if (feature_filter.find(fid_ef) != feature_filter.end()) { +          feats->set_value(fid_ef, 1.0); +        } +      } else { +        feats->set_value(fid_ef, 1.0); +      } +    } +    cerr << FD::Convert(fid_ef) << endl;      return lhs;    } -  Array2D<WordID> src_tree;  // src_tree(i,j) NT = type -  // mutable Array2D<int> fids_cat;   // this tends to overfit baddly -  mutable Array2D<map<const TRule*, int> > fids_ef;    // fires for fully lexicalized +  Array2D<WordID> src_tree; // src_tree(i,j) NT = type +  // mutable Array2D<int> fids_cat; // this tends to overfit baddly +  mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized +  unordered_set<int> feature_filter;  };  SourceSyntaxFeatures::SourceSyntaxFeatures(const string& param) :      FeatureFunction(sizeof(WordID)) { -  impl = new SourceSyntaxFeaturesImpl; +  impl = new SourceSyntaxFeaturesImpl(param);  }  SourceSyntaxFeatures::~SourceSyntaxFeatures() { @@ -155,7 +178,10 @@ void SourceSyntaxFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta,  }  void SourceSyntaxFeatures::PrepareForInput(const SentenceMetadata& smeta) { -  impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); +  ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree")); +  string tree; +  f.ReadAll(tree); +  impl->InitializeGrids(tree, smeta.GetSourceLength());  }  struct SourceSpanSizeFeaturesImpl { @@ -230,4 +256,3 @@ void SourceSpanSizeFeatures::PrepareForInput(const SentenceMetadata& smeta) {    impl->InitializeGrids(smeta.GetSourceLength());  } - diff --git a/decoder/ff_source_syntax.h b/decoder/ff_source_syntax.h index a8c7150a..bdd638c1 100644 --- a/decoder/ff_source_syntax.h +++ b/decoder/ff_source_syntax.h @@ -1,7 +1,8 @@ -#ifndef _FF_SOURCE_TOOLS_H_ -#define _FF_SOURCE_TOOLS_H_ +#ifndef _FF_SOURCE_SYNTAX_H_ +#define _FF_SOURCE_SYNTAX_H_  #include "ff.h" +#include "hg.h"  struct SourceSyntaxFeaturesImpl; @@ -11,7 +12,7 @@ class SourceSyntaxFeatures : public FeatureFunction {    ~SourceSyntaxFeatures();   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const HG::Edge& edge, +                                     const Hypergraph::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -28,7 +29,7 @@ class SourceSpanSizeFeatures : public FeatureFunction {    ~SourceSpanSizeFeatures();   protected:    virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const HG::Edge& edge, +                                     const Hypergraph::Edge& edge,                                       const std::vector<const void*>& ant_contexts,                                       SparseVector<double>* features,                                       SparseVector<double>* estimated_features, @@ -39,3 +40,4 @@ class SourceSpanSizeFeatures : public FeatureFunction {  };  #endif + diff --git a/decoder/ff_source_syntax2.cc b/decoder/ff_source_syntax2.cc index 08ece917..a97e31d8 100644 --- a/decoder/ff_source_syntax2.cc +++ b/decoder/ff_source_syntax2.cc @@ -3,7 +3,6 @@  #include <sstream>  #include <stack>  #include <string> -#include <tr1/unordered_set>  #include "sentence_metadata.h"  #include "array2d.h" @@ -17,7 +16,7 @@ using namespace std;  struct SourceSyntaxFeatures2Impl {    SourceSyntaxFeatures2Impl(const string& param) { -    if (!(param.compare("") == 0)) { +    if (param.compare("") != 0) {        string triggered_features_fn = param;        ReadFile triggered_features(triggered_features_fn);        string in; @@ -29,10 +28,8 @@ struct SourceSyntaxFeatures2Impl {    void InitializeGrids(const string& tree, unsigned src_len) {      assert(tree.size() > 0); -    //fids_cat.clear();      fids_ef.clear();      src_tree.clear(); -    //fids_cat.resize(src_len, src_len + 1);      fids_ef.resize(src_len, src_len + 1);      src_tree.resize(src_len, src_len + 1, TD::Convert("XX"));      ParseTreeString(tree, src_len); @@ -40,7 +37,7 @@ struct SourceSyntaxFeatures2Impl {    void ParseTreeString(const string& tree, unsigned src_len) {      //cerr << "TREE: " << tree << endl; -    stack<pair<int, WordID> > stk;  // first = i, second = category +    stack<pair<int, WordID> > stk; // first = i, second = category      pair<int, WordID> cur_cat; cur_cat.first = -1;      unsigned i = 0;      unsigned p = 0; @@ -92,7 +89,7 @@ struct SourceSyntaxFeatures2Impl {      const WordID lhs = src_tree(i,j);      int& fid_ef = fids_ef(i,j)[&rule];      ostringstream os; -    os << "SYN:" << TD::Convert(lhs); +    os << "SSYN2:" << TD::Convert(lhs);      os << ':';      unsigned ntc = 0;      for (unsigned k = 0; k < rule.f_.size(); ++k) { @@ -100,7 +97,7 @@ struct SourceSyntaxFeatures2Impl {        if (k > 0 && fj <= 0) os << '_';        if (fj <= 0) {          os << '[' << TD::Convert(ants[ntc++]) << ']'; -      } /*else { +      }/*else {          os << TD::Convert(fj);        }*/      } @@ -116,18 +113,23 @@ struct SourceSyntaxFeatures2Impl {      fid_ef = FD::Convert(os.str());      //cerr << "FEATURE: " << os.str() << endl;      //cerr << "FID_EF: " << fid_ef << endl; -    if (feature_filter.find(fid_ef) != feature_filter.end()) { -      cerr << "SYN-Feature was trigger more than once on training set." << endl; +    if (feature_filter.size() > 0) { +      if (feature_filter.find(fid_ef) != feature_filter.end()) { +        //cerr << "SYN-Feature was trigger more than once on training set." << endl; +        feats->set_value(fid_ef, 1.0); +      } +      //else cerr << "SYN-Feature was triggered less than once on training set." << endli; +    } +    else {        feats->set_value(fid_ef, 1.0);      } -    else cerr << "SYN-Feature was triggered less than once on training set." << endl; +    cerr << FD::Convert(fid_ef) << endl;      return lhs;    } -  Array2D<WordID> src_tree;  // src_tree(i,j) NT = type -  mutable Array2D<map<const TRule*, int> > fids_ef;    // fires for fully lexicalized -  tr1::unordered_set<int> feature_filter; - +  Array2D<WordID> src_tree; // src_tree(i,j) NT = type +  mutable Array2D<map<const TRule*, int> > fids_ef; // fires for fully lexicalized +  unordered_set<int> feature_filter;  };  SourceSyntaxFeatures2::SourceSyntaxFeatures2(const string& param) : @@ -155,5 +157,9 @@ void SourceSyntaxFeatures2::TraversalFeaturesImpl(const SentenceMetadata& smeta,  }  void SourceSyntaxFeatures2::PrepareForInput(const SentenceMetadata& smeta) { -  impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); +  ReadFile f = ReadFile(smeta.GetSGMLValue("src_tree")); +  string tree; +  f.ReadAll(tree); +  impl->InitializeGrids(tree, smeta.GetSourceLength());  } + diff --git a/decoder/ff_source_syntax2.h b/decoder/ff_source_syntax2.h index b6b7dc3d..f606c2bf 100644 --- a/decoder/ff_source_syntax2.h +++ b/decoder/ff_source_syntax2.h @@ -1,5 +1,5 @@ -#ifndef _FF_SOURCE_TOOLS2_H_ -#define _FF_SOURCE_TOOLS2_H_ +#ifndef _FF_SOURCE_SYNTAX2_H_ +#define _FF_SOURCE_SYNTAX2_H_  #include "ff.h"  #include "hg.h" @@ -23,3 +23,4 @@ class SourceSyntaxFeatures2 : public FeatureFunction {  };  #endif + diff --git a/decoder/ff_source_syntax2_p.cc b/decoder/ff_source_syntax2_p.cc deleted file mode 100644 index dfa791ea..00000000 --- a/decoder/ff_source_syntax2_p.cc +++ /dev/null @@ -1,166 +0,0 @@ -#include "ff_source_syntax2_p.h" - -#include <sstream> -#include <stack> -#include <string> -#include <tr1/unordered_set> - -#include "sentence_metadata.h" -#include "array2d.h" -#include "filelib.h" - -using namespace std; - -// implements the source side syntax features described in Blunsom et al. (EMNLP 2008) -// source trees must be represented in Penn Treebank format, e.g. -//     (S (NP John) (VP (V left))) - -struct PSourceSyntaxFeatures2Impl { -  PSourceSyntaxFeatures2Impl(const string& param) { -    if (param.compare("") != 0) { -      string triggered_features_fn = param; -      ReadFile triggered_features(triggered_features_fn); -      string in; -      while(getline(*triggered_features, in)) { -        feature_filter.insert(FD::Convert(in)); -      } -    } -    /*cerr << "find(\"One\") == " << boolalpha << (table.find("One") != table.end()) << endl;  -    cerr << "find(\"Three\") == " << boolalpha << (table.find("Three") != table.end()) << endl;*/ -  } - -  void InitializeGrids(const string& tree, unsigned src_len) { -    assert(tree.size() > 0); -    //fids_cat.clear(); -    fids_ef.clear(); -    src_tree.clear(); -    //fids_cat.resize(src_len, src_len + 1); -    fids_ef.resize(src_len, src_len + 1); -    src_tree.resize(src_len, src_len + 1, TD::Convert("XX")); -    ParseTreeString(tree, src_len); -  } - -  void ParseTreeString(const string& tree, unsigned src_len) { -    //cerr << "TREE: " << tree << endl; -    stack<pair<int, WordID> > stk;  // first = i, second = category -    pair<int, WordID> cur_cat; cur_cat.first = -1; -    unsigned i = 0; -    unsigned p = 0; -    while(p < tree.size()) { -      const char cur = tree[p]; -      if (cur == '(') { -        stk.push(cur_cat); -        ++p; -        unsigned k = p + 1; -        while (k < tree.size() && tree[k] != ' ') { ++k; } -        cur_cat.first = i; -        cur_cat.second = TD::Convert(tree.substr(p, k - p)); -        // cerr << "NT: '" << tree.substr(p, k-p) << "' (i=" << i << ")\n"; -        p = k + 1; -      } else if (cur == ')') { -        unsigned k = p; -        while (k < tree.size() && tree[k] == ')') { ++k; } -        const unsigned num_closes = k - p; -        for (unsigned ci = 0; ci < num_closes; ++ci) { -          src_tree(cur_cat.first, i) = cur_cat.second; -          cur_cat = stk.top(); -          stk.pop(); -        } -        p = k; -        while (p < tree.size() && (tree[p] == ' ' || tree[p] == '\t')) { ++p; } -      } else if (cur == ' ' || cur == '\t') { -        cerr << "Unexpected whitespace in: " << tree << endl; -        abort(); -      } else { // terminal symbol -        unsigned k = p + 1; -        do { -          while (k < tree.size() && tree[k] != ')' && tree[k] != ' ') { ++k; } -          // cerr << "TERM: '" << tree.substr(p, k-p) << "' (i=" << i << ")\n"; -          ++i; -          assert(i <= src_len); -          while (k < tree.size() && tree[k] == ' ') { ++k; } -          p = k; -        } while (p < tree.size() && tree[p] != ')'); -      } -    //cerr << "i=" << i << "  src_len=" << src_len << endl; -    } -    //cerr << "i=" << i << "  src_len=" << src_len << endl; -    assert(i == src_len);  // make sure tree specified in src_tree is -                           // the same length as the source sentence -  } - -  WordID FireFeatures(const TRule& rule, const int i, const int j, const WordID* ants, SparseVector<double>* feats) { -    //cerr << "fire features: " << rule.AsString() << " for " << i << "," << j << endl; -    const WordID lhs = src_tree(i,j); -    int& fid_ef = fids_ef(i,j)[&rule]; -    ostringstream os; -    os << "SYN:" << TD::Convert(lhs); -    os << ':'; -    unsigned ntc = 0; -    for (unsigned k = 0; k < rule.f_.size(); ++k) { -      int fj = rule.f_[k]; -      if (k > 0 && fj <= 0) os << '_'; -      if (fj <= 0) { -        os << '[' << TD::Convert(ants[ntc++]) << ']'; -      } /*else { -        os << TD::Convert(fj); -      }*/ -    } -    os << ':'; -    for (unsigned k = 0; k < rule.e_.size(); ++k) { -      const int ei = rule.e_[k]; -      if (k > 0) os << '_'; -      if (ei <= 0) -        os << '[' << (1-ei) << ']'; -      else -        os << TD::Convert(ei); -    } -    fid_ef = FD::Convert(os.str()); -    //cerr << "FEATURE: " << os.str() << endl; -    //cerr << "FID_EF: " << fid_ef << endl; -    if (feature_filter.size() > 0) { -      if (feature_filter.find(fid_ef) != feature_filter.end()) { -        //cerr << "SYN-Feature was trigger more than once on training set." << endl; -        feats->set_value(fid_ef, 1.0); -      }   -      //else cerr << "SYN-Feature was triggered less than once on training set." << endli; -    } -    else { -      feats->set_value(fid_ef, 1.0); -    } -    return lhs; -  } - -  Array2D<WordID> src_tree;  // src_tree(i,j) NT = type -  mutable Array2D<map<const TRule*, int> > fids_ef;    // fires for fully lexicalized -  tr1::unordered_set<int> feature_filter; - -}; - -PSourceSyntaxFeatures2::PSourceSyntaxFeatures2(const string& param) : -    FeatureFunction(sizeof(WordID)) { -  impl = new PSourceSyntaxFeatures2Impl(param); -} - -PSourceSyntaxFeatures2::~PSourceSyntaxFeatures2() { -  delete impl; -  impl = NULL; -} - -void PSourceSyntaxFeatures2::TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, -                                     const vector<const void*>& ant_contexts, -                                     SparseVector<double>* features, -                                     SparseVector<double>* estimated_features, -                                     void* context) const { -  WordID ants[8]; -  for (unsigned i = 0; i < ant_contexts.size(); ++i) -    ants[i] = *static_cast<const WordID*>(ant_contexts[i]); - -  *static_cast<WordID*>(context) = -     impl->FireFeatures(*edge.rule_, edge.i_, edge.j_, ants, features); -} - -void PSourceSyntaxFeatures2::PrepareForInput(const SentenceMetadata& smeta) { -  impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); -} diff --git a/decoder/ff_source_syntax2_p.h b/decoder/ff_source_syntax2_p.h deleted file mode 100644 index d56ecab0..00000000 --- a/decoder/ff_source_syntax2_p.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _FF_SOURCE_TOOLS2_H_ -#define _FF_SOURCE_TOOLS2_H_ - -#include "ff.h" -#include "hg.h" - -struct PSourceSyntaxFeatures2Impl; - -class PSourceSyntaxFeatures2 : public FeatureFunction { - public: -  PSourceSyntaxFeatures2(const std::string& param); -  ~PSourceSyntaxFeatures2(); - protected: -  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, -                                     const std::vector<const void*>& ant_contexts, -                                     SparseVector<double>* features, -                                     SparseVector<double>* estimated_features, -                                     void* context) const; -  virtual void PrepareForInput(const SentenceMetadata& smeta); - private: -  PSourceSyntaxFeatures2Impl* impl; -}; - -#endif diff --git a/decoder/ff_source_syntax_p.cc b/decoder/ff_source_syntax_p.cc deleted file mode 100644 index cd081544..00000000 --- a/decoder/ff_source_syntax_p.cc +++ /dev/null @@ -1,245 +0,0 @@ -#include "ff_source_syntax_p.h" - -#include <sstream> -#include <stack> -#include <tr1/unordered_set> - -#include "sentence_metadata.h" -#include "array2d.h" -#include "filelib.h" - -using namespace std; - -// implements the source side syntax features described in Blunsom et al. (EMNLP 2008) -// source trees must be represented in Penn Treebank format, e.g. -//     (S (NP John) (VP (V left))) - -// log transform to make long spans cluster together -// but preserve differences -inline int SpanSizeTransform(unsigned span_size) { -  if (!span_size) return 0; -  return static_cast<int>(log(span_size+1) / log(1.39)) - 1; -} - -struct PSourceSyntaxFeaturesImpl { -  PSourceSyntaxFeaturesImpl() {} - -  PSourceSyntaxFeaturesImpl(const string& param) { -    if (!(param.compare("") == 0)) { -      string triggered_features_fn = param; -      ReadFile triggered_features(triggered_features_fn); -      string in; -      while(getline(*triggered_features, in)) { -        feature_filter.insert(FD::Convert(in)); -      } -    } -  } - -  void InitializeGrids(const string& tree, unsigned src_len) { -    assert(tree.size() > 0); -    //fids_cat.clear(); -    fids_ef.clear(); -    src_tree.clear(); -    //fids_cat.resize(src_len, src_len + 1); -    fids_ef.resize(src_len, src_len + 1); -    src_tree.resize(src_len, src_len + 1, TD::Convert("XX")); -    ParseTreeString(tree, src_len); -  } - -  void ParseTreeString(const string& tree, unsigned src_len) { -    stack<pair<int, WordID> > stk;  // first = i, second = category -    pair<int, WordID> cur_cat; cur_cat.first = -1; -    unsigned i = 0; -    unsigned p = 0; -    while(p < tree.size()) { -      const char cur = tree[p]; -      if (cur == '(') { -        stk.push(cur_cat); -        ++p; -        unsigned k = p + 1; -        while (k < tree.size() && tree[k] != ' ') { ++k; } -        cur_cat.first = i; -        cur_cat.second = TD::Convert(tree.substr(p, k - p)); -        // cerr << "NT: '" << tree.substr(p, k-p) << "' (i=" << i << ")\n"; -        p = k + 1; -      } else if (cur == ')') { -        unsigned k = p; -        while (k < tree.size() && tree[k] == ')') { ++k; } -        const unsigned num_closes = k - p; -        for (unsigned ci = 0; ci < num_closes; ++ci) { -          // cur_cat.second spans from cur_cat.first to i -          // cerr << TD::Convert(cur_cat.second) << " from " << cur_cat.first << " to " << i << endl; -          // NOTE: unary rule chains end up being labeled with the top-most category -          src_tree(cur_cat.first, i) = cur_cat.second; -          cur_cat = stk.top(); -          stk.pop(); -        } -        p = k; -        while (p < tree.size() && (tree[p] == ' ' || tree[p] == '\t')) { ++p; } -      } else if (cur == ' ' || cur == '\t') { -        cerr << "Unexpected whitespace in: " << tree << endl; -        abort(); -      } else { // terminal symbol -        unsigned k = p + 1; -        do { -          while (k < tree.size() && tree[k] != ')' && tree[k] != ' ') { ++k; } -          // cerr << "TERM: '" << tree.substr(p, k-p) << "' (i=" << i << ")\n"; -          ++i; -          assert(i <= src_len); -          while (k < tree.size() && tree[k] == ' ') { ++k; } -          p = k; -        } while (p < tree.size() && tree[p] != ')'); -      } -    } -    // cerr << "i=" << i << "  src_len=" << src_len << endl; -    assert(i == src_len);  // make sure tree specified in src_tree is -                           // the same length as the source sentence -  } - -  WordID FireFeatures(const TRule& rule, const int i, const int j, const WordID* ants, SparseVector<double>* feats) { -    //cerr << "fire features: " << rule.AsString() << " for " << i << "," << j << endl; -    const WordID lhs = src_tree(i,j); -    //int& fid_cat = fids_cat(i,j); -    int& fid_ef = fids_ef(i,j)[&rule]; -    if (fid_ef <= 0) { -      ostringstream os; -      //ostringstream os2; -      os << "SYN:" << TD::Convert(lhs); -      //os2 << "SYN:" << TD::Convert(lhs) << '_' << SpanSizeTransform(j - i); -      //fid_cat = FD::Convert(os2.str()); -      os << ':'; -      unsigned ntc = 0; -      for (unsigned k = 0; k < rule.f_.size(); ++k) { -        if (k > 0) os << '_'; -        int fj = rule.f_[k]; -        if (fj <= 0) { -          os << '[' << TD::Convert(ants[ntc++]) << ']'; -        } else { -          os << TD::Convert(fj); -        } -      } -      os << ':'; -      for (unsigned k = 0; k < rule.e_.size(); ++k) { -        const int ei = rule.e_[k]; -        if (k > 0) os << '_'; -        if (ei <= 0) -          os << '[' << (1-ei) << ']'; -        else -          os << TD::Convert(ei); -      } -      fid_ef = FD::Convert(os.str()); -    } -    //if (fid_cat > 0) -    //  feats->set_value(fid_cat, 1.0); -    if (fid_ef > 0 && (feature_filter.find(fid_ef) != feature_filter.end())) -      feats->set_value(fid_ef, 1.0); -    return lhs; -  } - -  Array2D<WordID> src_tree;  // src_tree(i,j) NT = type -  // mutable Array2D<int> fids_cat;   // this tends to overfit baddly -  mutable Array2D<map<const TRule*, int> > fids_ef;    // fires for fully lexicalized -  tr1::unordered_set<int> feature_filter; -}; - -PSourceSyntaxFeatures::PSourceSyntaxFeatures(const string& param) : -    FeatureFunction(sizeof(WordID)) { -  impl = new PSourceSyntaxFeaturesImpl(param); -} - -PSourceSyntaxFeatures::~PSourceSyntaxFeatures() { -  delete impl; -  impl = NULL; -} - -void PSourceSyntaxFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, -                                     const vector<const void*>& ant_contexts, -                                     SparseVector<double>* features, -                                     SparseVector<double>* estimated_features, -                                     void* context) const { -  WordID ants[8]; -  for (unsigned i = 0; i < ant_contexts.size(); ++i) -    ants[i] = *static_cast<const WordID*>(ant_contexts[i]); - -  *static_cast<WordID*>(context) = -     impl->FireFeatures(*edge.rule_, edge.i_, edge.j_, ants, features); -} - -void PSourceSyntaxFeatures::PrepareForInput(const SentenceMetadata& smeta) { -  impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); -} - -struct PSourceSpanSizeFeaturesImpl { -  PSourceSpanSizeFeaturesImpl() {} - -  void InitializeGrids(unsigned src_len) { -    fids.clear(); -    fids.resize(src_len, src_len + 1); -  } - -  int FireFeatures(const TRule& rule, const int i, const int j, const WordID* ants, SparseVector<double>* feats) { -    if (rule.Arity() > 0) { -      int& fid = fids(i,j)[&rule]; -      if (fid <= 0) { -        ostringstream os; -        os << "SSS:"; -        unsigned ntc = 0; -        for (unsigned k = 0; k < rule.f_.size(); ++k) { -          if (k > 0) os << '_'; -          int fj = rule.f_[k]; -          if (fj <= 0) { -            os << '[' << TD::Convert(-fj) << ants[ntc++] << ']'; -          } else { -            os << TD::Convert(fj); -          } -        } -        os << ':'; -        for (unsigned k = 0; k < rule.e_.size(); ++k) { -          const int ei = rule.e_[k]; -          if (k > 0) os << '_'; -          if (ei <= 0) -            os << '[' << (1-ei) << ']'; -          else -            os << TD::Convert(ei); -        } -        fid = FD::Convert(os.str()); -      } -      if (fid > 0) -        feats->set_value(fid, 1.0); -    } -    return SpanSizeTransform(j - i); -  } - -  mutable Array2D<map<const TRule*, int> > fids; -}; - -PSourceSpanSizeFeatures::PSourceSpanSizeFeatures(const string& param) : -    FeatureFunction(sizeof(char)) { -  impl = new PSourceSpanSizeFeaturesImpl; -} - -PSourceSpanSizeFeatures::~PSourceSpanSizeFeatures() { -  delete impl; -  impl = NULL; -} - -void PSourceSpanSizeFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, -                                     const vector<const void*>& ant_contexts, -                                     SparseVector<double>* features, -                                     SparseVector<double>* estimated_features, -                                     void* context) const { -  int ants[8]; -  for (unsigned i = 0; i < ant_contexts.size(); ++i) -    ants[i] = *static_cast<const char*>(ant_contexts[i]); - -  *static_cast<char*>(context) = -     impl->FireFeatures(*edge.rule_, edge.i_, edge.j_, ants, features); -} - -void PSourceSpanSizeFeatures::PrepareForInput(const SentenceMetadata& smeta) { -  impl->InitializeGrids(smeta.GetSourceLength()); -} - - diff --git a/decoder/ff_source_syntax_p.h b/decoder/ff_source_syntax_p.h deleted file mode 100644 index 2dd9094a..00000000 --- a/decoder/ff_source_syntax_p.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef _FF_SOURCE_TOOLS_H_ -#define _FF_SOURCE_TOOLS_H_ - -#include "ff.h" -#include "hg.h" - -struct PSourceSyntaxFeaturesImpl; - -class PSourceSyntaxFeatures : public FeatureFunction { - public: -  PSourceSyntaxFeatures(const std::string& param); -  ~PSourceSyntaxFeatures(); - protected: -  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, -                                     const std::vector<const void*>& ant_contexts, -                                     SparseVector<double>* features, -                                     SparseVector<double>* estimated_features, -                                     void* context) const; -  virtual void PrepareForInput(const SentenceMetadata& smeta); - private: -  PSourceSyntaxFeaturesImpl* impl; -}; - -struct PSourceSpanSizeFeaturesImpl; -class PSourceSpanSizeFeatures : public FeatureFunction { - public: -  PSourceSpanSizeFeatures(const std::string& param); -  ~PSourceSpanSizeFeatures(); - protected: -  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, -                                     const std::vector<const void*>& ant_contexts, -                                     SparseVector<double>* features, -                                     SparseVector<double>* estimated_features, -                                     void* context) const; -  virtual void PrepareForInput(const SentenceMetadata& smeta); - private: -  PSourceSpanSizeFeaturesImpl* impl; -}; - -#endif diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc index 1491819d..dcb80110 100644 --- a/decoder/ff_wordalign.cc +++ b/decoder/ff_wordalign.cc @@ -7,7 +7,12 @@  #include <string>  #include <cmath>  #include <bitset> -#include <tr1/unordered_map> +#ifndef HAVE_OLD_CPP +# include <unordered_map> +#else +# include <tr1/unordered_map> +namespace std { using std::tr1::unordered_map; } +#endif  #include <boost/tuple/tuple.hpp>  #include "boost/tuple/tuple_comparison.hpp" @@ -249,7 +254,7 @@ void NewJump::FireFeature(const SentenceMetadata& smeta,    if (fp1_)   get<6>(key) = GetSourceWord(id, cur_src_index + 1);    if (fprev_) get<7>(key) = GetSourceWord(id, prev_src_index); -  static std::tr1::unordered_map<NewJumpFeatureKey, int, KeyHash> fids; +  static std::unordered_map<NewJumpFeatureKey, int, KeyHash> fids;    int& fid = fids[key];    if (!fid) {      ostringstream os; diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index ba3d0b9b..0161f603 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -5,8 +5,16 @@  #include "array2d.h"  #include "factored_lexicon_helper.h" +#include <boost/functional/hash.hpp> +#include <cassert>  #include <boost/scoped_ptr.hpp>  #include <boost/multi_array.hpp> +#ifndef HAVE_OLD_CPP +# include <unordered_map> +#else +# include <tr1/unordered_map> +namespace std { using std::tr1::unordered_map; } +#endif  class RelativeSentencePosition : public FeatureFunction {   public: @@ -124,9 +132,6 @@ class LexicalTranslationTrigger : public FeatureFunction {    std::vector<std::vector<WordID> > triggers_;  }; -#include <tr1/unordered_map> -#include <boost/functional/hash.hpp> -#include <cassert>  class BlunsomSynchronousParseHack : public FeatureFunction {   public:    BlunsomSynchronousParseHack(const std::string& param); @@ -196,7 +201,7 @@ class BlunsomSynchronousParseHack : public FeatureFunction {    const int fid_;    mutable int cur_sent_; -  typedef std::tr1::unordered_map<std::vector<WordID>, int, boost::hash<std::vector<WordID> > > Vec2Int; +  typedef std::unordered_map<std::vector<WordID>, int, boost::hash<std::vector<WordID> > > Vec2Int;    mutable Vec2Int cur_map_;    const std::vector<WordID> mutable * cur_ref_;    mutable std::vector<std::vector<WordID> > refs_; diff --git a/decoder/ff_wordset.cc b/decoder/ff_wordset.cc index 70cea7de..9be6f2e0 100644 --- a/decoder/ff_wordset.cc +++ b/decoder/ff_wordset.cc @@ -2,21 +2,67 @@  #include "hg.h"  #include "fdict.h" +#include "filelib.h" +#include <boost/algorithm/string.hpp>  #include <sstream>  #include <iostream>  using namespace std; +void WordSet::parseArgs(const string& args, string* featName, string* vocabFile, bool* oovMode) { +  vector<string> toks(10); +  boost::split(toks, args, boost::is_any_of(" ")); + +  *oovMode = false; + +  // skip initial feature name +  for(vector<string>::const_iterator it = toks.begin(); it != toks.end(); ++it) { +    if(*it == "-v") { +      *vocabFile = *++it; // copy + +    } else if(*it == "-N") { +      *featName = *++it; +    } else if(*it == "--oov") { +       *oovMode = true; +    } else { +       cerr << "Unrecognized argument: " << *it << endl; +       exit(1); +    } +  } + +  if(*featName == "") { +    cerr << "featName (-N) not specified for WordSet" << endl; +    exit(1); +  } +  if(*vocabFile == "") { +    cerr << "vocabFile (-v) not specified for WordSet" << endl; +    exit(1); +  } +} + +void WordSet::loadVocab(const string& vocabFile, unordered_set<WordID>* vocab) { +  ReadFile rf(vocabFile); +  if (!rf) { +    cerr << "Unable to open file: " << vocabFile;  +    abort(); +  } +  string line; +  while (getline(*rf.stream(), line)) { +    boost::trim(line); +    if(line.empty()) continue; +    WordID vocabId = TD::Convert(line); +    vocab->insert(vocabId); +  } +} +  void WordSet::TraversalFeaturesImpl(const SentenceMetadata& /*smeta*/ ,  				    const Hypergraph::Edge& edge,  				    const vector<const void*>& /* ant_contexts */,  				    SparseVector<double>* features,  				    SparseVector<double>* /* estimated_features */,  				    void* /* context */) const { -    double addScore = 0.0; -  for(std::vector<WordID>::const_iterator it = edge.rule_->e_.begin(); it != edge.rule_->e_.end(); ++it) { -     +  for(vector<WordID>::const_iterator it = edge.rule_->e_.begin(); it != edge.rule_->e_.end(); ++it) {      bool inVocab = (vocab_.find(*it) != vocab_.end());      if(oovMode_ && !inVocab) {        addScore += 1.0; diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h index 639e1514..e78cd2fb 100644 --- a/decoder/ff_wordset.h +++ b/decoder/ff_wordset.h @@ -4,14 +4,18 @@  #include "ff.h"  #include "tdict.h" -#include <tr1/unordered_set> -#include <boost/algorithm/string.hpp> -  #include <vector>  #include <string>  #include <iostream>  #include <fstream> +#ifndef HAVE_OLD_CPP +# include <unordered_set> +#else +# include <tr1/unordered_set> +namespace std { using std::tr1::unordered_set; } +#endif +  class WordSet : public FeatureFunction {   public:  // we depend on the order of the initializer list @@ -42,69 +46,12 @@ class WordSet : public FeatureFunction {                                       void* context) const;   private: -  static void loadVocab(const std::string& vocabFile, std::tr1::unordered_set<WordID>* vocab) { - -      std::ifstream file; -      std::string line; - -      file.open(vocabFile.c_str(), std::fstream::in); -      if (file.is_open()) { -	unsigned lineNum = 0; -	while (!file.eof()) { -	  ++lineNum; -	  getline(file, line); -	  boost::trim(line); -	  if(line.empty()) { -	    continue; -	  } -	   -	  WordID vocabId = TD::Convert(line); -	  vocab->insert(vocabId); -	} -	file.close(); -      } else { -	std::cerr << "Unable to open file: " << vocabFile;  -	exit(1); -      } -  } - -  static void parseArgs(const std::string& args, std::string* featName, std::string* vocabFile, bool* oovMode) { - -    std::vector<std::string> toks(10); -    boost::split(toks, args, boost::is_any_of(" ")); - -    *oovMode = false; - -    // skip initial feature name -    for(std::vector<std::string>::const_iterator it = toks.begin(); it != toks.end(); ++it) { -      if(*it == "-v") { -	*vocabFile = *++it; // copy - -      } else if(*it == "-N") { -	*featName = *++it; - -      } else if(*it == "--oov") { -	*oovMode = true; - -      } else { -	std::cerr << "Unrecognized argument: " << *it << std::endl; -	exit(1); -      } -    } - -    if(*featName == "") { -      std::cerr << "featName (-N) not specified for WordSet" << std::endl; -      exit(1); -    } -    if(*vocabFile == "") { -      std::cerr << "vocabFile (-v) not specified for WordSet" << std::endl; -      exit(1); -    } -  } +  static void parseArgs(const std::string& args, std::string* featName, std::string* vocabFile, bool* oovMode); +  static void loadVocab(const std::string& vocabFile, std::unordered_set<WordID>* vocab);    int fid_;    bool oovMode_; -  std::tr1::unordered_set<WordID> vocab_; +  std::unordered_set<WordID> vocab_;  };  #endif diff --git a/decoder/grammar.cc b/decoder/grammar.cc index ee43f537..160d00e6 100644 --- a/decoder/grammar.cc +++ b/decoder/grammar.cc @@ -3,15 +3,20 @@  #include <algorithm>  #include <utility>  #include <map> -#include <tr1/unordered_map> -#include <tr1/unordered_set> +#ifndef HAVE_OLD_CPP +# include <unordered_map> +# include <unordered_set> +#else +# include <tr1/unordered_map> +# include <tr1/unordered_set> +namespace std { using std::tr1::unordered_map; using std::tr1::unordered_set; } +#endif  #include "rule_lexer.h"  #include "filelib.h"  #include "tdict.h"  using namespace std; -using namespace std::tr1;  const vector<TRulePtr> Grammar::NO_RULES; diff --git a/decoder/hg_intersect.cc b/decoder/hg_intersect.cc index ad5b701a..31a9a1ce 100644 --- a/decoder/hg_intersect.cc +++ b/decoder/hg_intersect.cc @@ -1,7 +1,12 @@  #include "hg_intersect.h"  #include <vector> -#include <tr1/unordered_map> +#ifndef HAVE_OLD_CPP +# include <unordered_map> +#else +# include <tr1/unordered_map> +namespace std { using std::tr1::unordered_map; } +#endif  #include "fast_lexical_cast.hpp"  #include <boost/functional/hash.hpp> @@ -13,7 +18,6 @@  #include "bottom_up_parser.h"  using boost::lexical_cast; -using namespace std::tr1;  using namespace std;  struct RuleFilter { diff --git a/decoder/kbest.h b/decoder/kbest.h index 44c23151..c7194c7e 100644 --- a/decoder/kbest.h +++ b/decoder/kbest.h @@ -3,7 +3,12 @@  #include <vector>  #include <utility> -#include <tr1/unordered_set> +#ifndef HAVE_OLD_CPP +# include <unordered_set> +#else +# include <tr1/unordered_set> +namespace std { using std::tr1::unordered_set; } +#endif  #include <boost/shared_ptr.hpp>  #include <boost/type_traits.hpp> @@ -22,7 +27,7 @@ namespace KBest {    // optional, filter unique yield strings    struct FilterUnique { -    std::tr1::unordered_set<std::vector<WordID>, boost::hash<std::vector<WordID> > > unique; +    std::unordered_set<std::vector<WordID>, boost::hash<std::vector<WordID> > > unique;      bool operator()(const std::vector<WordID>& yield) {        return !unique.insert(yield).second; @@ -111,7 +116,7 @@ namespace KBest {      };      typedef std::vector<Derivation*> CandidateHeap;      typedef std::vector<Derivation*> DerivationList; -    typedef std::tr1::unordered_set< +    typedef std::unordered_set<         const Derivation*, DerivationUniquenessHash, DerivationUniquenessEquals> UniqueDerivationSet;      struct NodeDerivationState { diff --git a/decoder/maxtrans_blunsom.cc b/decoder/maxtrans_blunsom.cc index 774e4170..a9f65fab 100644 --- a/decoder/maxtrans_blunsom.cc +++ b/decoder/maxtrans_blunsom.cc @@ -2,8 +2,14 @@  #include <vector>  #include <algorithm> -#include <tr1/unordered_map> -#include <tr1/unordered_set> +#ifndef HAVE_OLD_CPP +# include <unordered_map> +# include <unordered_set> +#else +# include <tr1/unordered_map> +# include <tr1/unordered_set> +namespace std { using std::tr1::unordered_map; using std::tr1::unordered_set; } +#endif  #include <boost/tuple/tuple.hpp>  #include <boost/functional/hash.hpp> @@ -14,7 +20,6 @@  using boost::tuple;  using namespace std; -using namespace std::tr1;  namespace Hack { diff --git a/decoder/phrasebased_translator.cc b/decoder/phrasebased_translator.cc index d65e44d1..8048248e 100644 --- a/decoder/phrasebased_translator.cc +++ b/decoder/phrasebased_translator.cc @@ -2,8 +2,14 @@  #include <queue>  #include <iostream> -#include <tr1/unordered_map> -#include <tr1/unordered_set> +#ifndef HAVE_OLD_CPP +# include <unordered_map> +# include <unordered_set> +#else +# include <tr1/unordered_map> +# include <tr1/unordered_set> +namespace std { using std::tr1::unordered_map; using std::tr1::unordered_set; } +#endif  #include <boost/tuple/tuple.hpp>  #include <boost/functional/hash.hpp> @@ -17,7 +23,6 @@  #include "array2d.h"  using namespace std; -using namespace std::tr1;  using namespace boost::tuples;  struct Coverage : public vector<bool> { @@ -49,10 +54,13 @@ struct Coverage : public vector<bool> {  };  struct CoverageHash {    size_t operator()(const Coverage& cov) const { -    return hasher_(static_cast<const vector<bool>&>(cov)); +    int seed = 131; +    size_t res = 0; +    for (vector<bool>::const_iterator it = cov.begin(); it != cov.end(); ++it) { +      res = (res * seed) + (*it + 1); +    } +    return res;    } - private: -  boost::hash<vector<bool> > hasher_;  };  ostream& operator<<(ostream& os, const Coverage& cov) {    os << '['; diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index 6f0b003b..a506c591 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -1,13 +1,9 @@ -//TODO: bottom-up pruning, with actual final models' (appropriately weighted) heuristics and local scores. - -//TODO: grammar heuristic (min cost of reachable rule set) for binarizations (active edges) if we wish to prune those also - -#include "hash.h" -#include "translator.h"  #include <algorithm>  #include <vector>  #include <boost/foreach.hpp>  #include <boost/functional/hash.hpp> +#include "hash.h" +#include "translator.h"  #include "hg.h"  #include "grammar.h"  #include "bottom_up_parser.h" @@ -16,13 +12,11 @@  #include "tdict.h"  #include "viterbi.h"  #include "verbose.h" -#include <tr1/unordered_map>  #define foreach         BOOST_FOREACH  #define reverse_foreach BOOST_REVERSE_FOREACH  using namespace std; -using namespace std::tr1;  static bool printGrammarsUsed = false;  struct GlueGrammar : public TextGrammar { diff --git a/decoder/sentence_metadata.h b/decoder/sentence_metadata.h index eab9f15d..52586331 100644 --- a/decoder/sentence_metadata.h +++ b/decoder/sentence_metadata.h @@ -5,7 +5,9 @@  #include <map>  #include <cassert>  #include "lattice.h" -#include "scorer.h" + +struct DocScorer;  // deprecated, will be removed +struct Score;     // deprecated, will be removed  struct SentenceMetadata {    friend class DecoderImpl; diff --git a/decoder/tromble_loss.cc b/decoder/tromble_loss.cc deleted file mode 100644 index 24cfef5f..00000000 --- a/decoder/tromble_loss.cc +++ /dev/null @@ -1,309 +0,0 @@ -#include "tromble_loss.h" -#include "fast_lexical_cast.hpp" - -#include <boost/algorithm/string/predicate.hpp> -#include <boost/circular_buffer.hpp> -#include <boost/functional/hash.hpp> -#include <boost/range/iterator_range.hpp> -#include <boost/tokenizer.hpp> -#include <boost/unordered_map.hpp> - -#include <cmath> -#include <fstream> -#include <vector> - -#include "sentence_metadata.h" -#include "trule.h" -#include "tdict.h" - -using namespace std; - -namespace { - -typedef unsigned char GramCount; - -struct RefCounts { -  GramCount max; -  std::vector<GramCount> refs; -  size_t length; -}; - -typedef boost::unordered_map<std::vector<WordID>, size_t, boost::hash<std::vector<WordID> > > NGramMap; - -// Take all the n-grams in the references and stuff them into ngrams. -void MakeNGramMapFromReferences(const vector<vector<WordID> > &references, -                                int n, -                                vector<RefCounts> *counts, -                                NGramMap *ngrams) { -  ngrams->clear(); -  std::pair<vector<WordID>, size_t> insert_me; -  vector<WordID> &ngram = insert_me.first; -  ngram.reserve(n); -  size_t &id = insert_me.second; -  id = 0; -  for (int refi = 0; refi < references.size(); ++refi) { -    const vector<WordID>& ref = references[refi]; -    const int s = ref.size(); -    for (int j=0; j<s; ++j) { -      const int remaining = s-j; -      const int k = (n < remaining ? n : remaining); -      ngram.clear(); -      for (unsigned int i = 0; i < k; ++i) { -        ngram.push_back(ref[j + i]); -        std::pair<NGramMap::iterator, bool> ret(ngrams->insert(insert_me)); -        if (ret.second) { -          counts->resize(id + 1); -          RefCounts &ref_counts = counts->back(); -          ref_counts.max = 1; -          ref_counts.refs.resize(references.size()); -          ref_counts.refs[refi] = 1; -          ref_counts.length = ngram.size(); -          ++id; -        } else { -          RefCounts &ref_counts = (*counts)[ret.first->second]; -          ref_counts.max = std::max(ref_counts.max, ++ref_counts.refs[refi]); -        } -      } -    } -  } -} - -struct MutableState { -  MutableState(void *from, size_t n) : length(reinterpret_cast<size_t*>(from)), left(reinterpret_cast<WordID *>(length + 1)), right(left + n - 1), counts(reinterpret_cast<GramCount *>(right + n - 1)) {} -  size_t *length; -  WordID *left, *right; -  GramCount *counts; -  static size_t Size(size_t n, size_t bound_ngram_id) { return sizeof(size_t) + (n - 1) * 2 * sizeof(WordID) + bound_ngram_id * sizeof(GramCount); } -}; - -struct ConstState { -  ConstState(const void *from, size_t n) : length(reinterpret_cast<const size_t*>(from)), left(reinterpret_cast<const WordID *>(length + 1)), right(left + n - 1), counts(reinterpret_cast<const GramCount *>(right + n - 1)) {} -  const size_t *length; -  const WordID *left, *right; -  const GramCount *counts; -  static size_t Size(size_t n, size_t bound_ngram_id) { return sizeof(size_t) + (n - 1) * 2 * sizeof(WordID) + bound_ngram_id * sizeof(GramCount); } -}; - -template <class T> struct CompatibleHashRange : public std::unary_function<const boost::iterator_range<T> &, size_t> { -  size_t operator()(const boost::iterator_range<T> &range) const { -    return boost::hash_range(range.begin(), range.end()); -  } -}; - -template <class T> struct CompatibleEqualsRange : public std::binary_function<const boost::iterator_range<T> &, const std::vector<WordID> &, size_t> { -  size_t operator()(const boost::iterator_range<T> &range, const std::vector<WordID> &vec) const { -    return boost::algorithm::equals(range, vec); -  } -  size_t operator()(const std::vector<WordID> &vec, const boost::iterator_range<T> &range) const { -    return boost::algorithm::equals(range, vec); -  } -}; - -void AddWord(const boost::circular_buffer<WordID> &segment, size_t min_length, const NGramMap &ref_grams, GramCount *counters) { -  typedef boost::circular_buffer<WordID>::const_iterator BufferIt; -  typedef boost::iterator_range<BufferIt> SegmentRange; -  if (segment.size() < min_length) return; -#if 0 -  CompatibleHashRange<BufferIt> hasher; -  CompatibleEqualsRange<BufferIt> equals; -  for (BufferIt seg_start(segment.end() - min_length); ; --seg_start) { -    NGramMap::const_iterator found = ref_grams.find(SegmentRange(seg_start, segment.end())); -    if (found == ref_grams.end()) break; -    ++counters[found->second]; -    if (seg_start == segment.begin()) break; -  } -#endif -} - -} // namespace - -class TrombleLossComputerImpl { - public: -  explicit TrombleLossComputerImpl(const std::string ¶ms) : star_(TD::Convert("<{STAR}>")) { -    typedef boost::tokenizer<boost::char_separator<char> > Tokenizer; -    // Argument parsing -    std::string ref_file_name; -    Tokenizer tok(params, boost::char_separator<char>(" ")); -    Tokenizer::iterator i = tok.begin(); -    if (i == tok.end()) { -      std::cerr << "TrombleLossComputer needs a reference file name." << std::endl; -      exit(1); -    } -    ref_file_name = *i++; -    if (i == tok.end()) { -      std::cerr << "TrombleLossComputer needs to know how many references." << std::endl; -      exit(1); -    } -    num_refs_ = boost::lexical_cast<unsigned int>(*i++); -    for (; i != tok.end(); ++i) { -     thetas_.push_back(boost::lexical_cast<double>(*i)); -    } -    if (thetas_.empty()) { -      std::cerr << "TrombleLossComputer is pointless with no weight on n-grams." << std::endl; -      exit(1); -    } - -    // Read references file. -    std::ifstream ref_file(ref_file_name.c_str()); -    if (!ref_file) { -      std::cerr << "Could not open TrombleLossComputer file " << ref_file_name << std::endl; -      exit(1); -    } -    std::string ref; -    vector<vector<WordID> > references(num_refs_); -    bound_ngram_id_ = 0; -    for (unsigned int sentence = 0; ref_file; ++sentence) { -      for (unsigned int refidx = 0; refidx < num_refs_; ++refidx) { -        if (!getline(ref_file, ref)) { -          if (refidx == 0) break; -          std::cerr << "Short read of " << refidx << " references for sentence " << sentence << std::endl; -          exit(1); -        } -        TD::ConvertSentence(ref, &references[refidx]); -      } -      ref_ids_.resize(sentence + 1); -      ref_counts_.resize(sentence + 1); -      MakeNGramMapFromReferences(references, thetas_.size(), &ref_counts_.back(), &ref_ids_.back()); -      bound_ngram_id_ = std::max(bound_ngram_id_, ref_ids_.back().size()); -    } -  } - -  size_t StateSize() const { -    // n-1 boundary words plus counts for n-grams currently rendered as bytes even though most would fit in bits. -    // Also, this is cached by higher up classes so no need to cache here. -    return MutableState::Size(thetas_.size(), bound_ngram_id_); -  } - -  double Traversal( -      const SentenceMetadata &smeta, -      const TRule &rule, -      const vector<const void*> &ant_contexts, -      void *out_context) const { -    // TODO: get refs from sentence metadata. -    // This will require resizable features. -    if (smeta.GetSentenceID() >= ref_ids_.size()) { -      std::cerr << "Sentence ID " << smeta.GetSentenceID() << " doesn't have references; there are only " << ref_ids_.size() << " references." << std::endl; -      exit(1); -    } -    const NGramMap &ngrams = ref_ids_[smeta.GetSentenceID()]; -    MutableState out_state(out_context, thetas_.size()); -    memset(out_state.counts, 0, bound_ngram_id_ * sizeof(GramCount)); -    boost::circular_buffer<WordID> history(thetas_.size()); -    std::vector<const void*>::const_iterator ant_context = ant_contexts.begin(); -    *out_state.length = 0; -    size_t pushed = 0; -    const size_t keep = thetas_.size() - 1; -    for (vector<WordID>::const_iterator rhs = rule.e().begin(); rhs != rule.e().end(); ++rhs) { -      if (*rhs < 1) { -        assert(ant_context != ant_contexts.end()); -        // Constituent -        ConstState rhs_state(*ant_context, thetas_.size()); -        *out_state.length += *rhs_state.length; -        { -          GramCount *accum = out_state.counts; -          for (const GramCount *c = rhs_state.counts; c != rhs_state.counts + ngrams.size(); ++c, ++accum) { -            *accum += *c; -          } -        } -        const WordID *w = rhs_state.left; -        bool long_constit = true; -        for (size_t i = 1; i <= keep; ++i, ++w) { -          if (*w == star_) { -            long_constit = false; -            break; -          } -          history.push_back(*w); -          if (++pushed == keep) { -            std::copy(history.begin(), history.end(), out_state.left); -          } -          // Now i is the length of the history coming from this constituent.  So it needs at least i+1 words to have a cross-child add. -          AddWord(history, i + 1, ngrams, out_state.counts); -        } -        // If the consituent is shorter than thetas_.size(), then the -        // constituent's left is the entire constituent, so history is already -        // correct.  Otherwise, the entire right hand side is the entire -        // history. -        if (long_constit) { -          history.assign(thetas_.size(), rhs_state.right, rhs_state.right + keep); -        } -        ++ant_context; -      } else { -        // Word -        ++*out_state.length; -        history.push_back(*rhs); -        if (++pushed == keep) { -          std::copy(history.begin(), history.end(), out_state.left); -        } -        AddWord(history, 1, ngrams, out_state.counts); -      } -    } -    // Fill in left and right constituents. -    if (pushed < keep) { -      std::copy(history.begin(), history.end(), out_state.left); -      for (WordID *i = out_state.left + pushed; i != out_state.left + keep; ++i) { -        *i = star_; -      } -      std::copy(out_state.left, out_state.left + keep, out_state.right); -    } else if(pushed == keep) { -      std::copy(history.begin(), history.end(), out_state.right); -    } else if ((pushed > keep) && !history.empty()) { -      std::copy(history.begin() + 1, history.end(), out_state.right); -    } -    std::vector<RefCounts>::const_iterator ref_info = ref_counts_[smeta.GetSentenceID()].begin(); -    // Clip the counts and count matches. -    // Indexed by reference then by length. -    std::vector<std::vector<unsigned int> > matches(num_refs_, std::vector<unsigned int>(thetas_.size())); -    for (GramCount *c = out_state.counts; c != out_state.counts + ngrams.size(); ++c, ++ref_info) { -      *c = std::min(*c, ref_info->max); -      if (*c) { -        for (unsigned int refidx = 0; refidx < num_refs_; ++refidx) { -          assert(ref_info->length >= 1); -          assert(ref_info->length - 1 < thetas_.size()); -          matches[refidx][ref_info->length - 1] += std::min(*c, ref_info->refs[refidx]); -        } -      } -    } -    double best_score = 0.0; -    for (unsigned int refidx = 0; refidx < num_refs_; ++refidx) { -      double score = 0.0; -      for (unsigned int j = 0; j < std::min(*out_state.length, thetas_.size()); ++j) { -        score += thetas_[j] * static_cast<double>(matches[refidx][j]) / static_cast<double>(*out_state.length - j); -      } -      best_score = std::max(best_score, score); -    } -    return best_score; -  } - - private: -  unsigned int num_refs_; -  // Indexed by sentence id. -  std::vector<NGramMap> ref_ids_; -  // Then by id from ref_ids_. -  std::vector<std::vector<RefCounts> > ref_counts_; - -  // thetas_[0] is the weight for 1-grams -  std::vector<double> thetas_; - -  // All ngram ids in ref_ids_ are < this value. -  size_t bound_ngram_id_; - -  const WordID star_; -}; - -TrombleLossComputer::TrombleLossComputer(const std::string ¶ms) : -    boost::base_from_member<PImpl>(new TrombleLossComputerImpl(params)), -    FeatureFunction(boost::base_from_member<PImpl>::member->StateSize()), -    fid_(FD::Convert("TrombleLossComputer")) {} - -TrombleLossComputer::~TrombleLossComputer() {} - -void TrombleLossComputer::TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const Hypergraph::Edge& edge, -                                     const vector<const void*>& ant_contexts, -                                     SparseVector<double>* features, -                                     SparseVector<double>* estimated_features, -                                     void* out_context) const { -  (void) estimated_features; -  const double loss = boost::base_from_member<PImpl>::member->Traversal(smeta, *edge.rule_, ant_contexts, out_context); -  features->set_value(fid_, loss); -} diff --git a/decoder/tromble_loss.h b/decoder/tromble_loss.h deleted file mode 100644 index fde33100..00000000 --- a/decoder/tromble_loss.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef _TROMBLE_LOSS_H_ -#define _TROMBLE_LOSS_H_ - -#include <vector> -#include <boost/scoped_ptr.hpp> -#include <boost/utility/base_from_member.hpp> - -#include "ff.h" -#include "wordid.h" - -// this may not be the most elegant way to implement this computation, but since we -// may need cube pruning and state splitting, we reuse the feature detector framework. -// the loss is then stored in a feature #0 (which is guaranteed to have weight 0 and -// never be a "real" feature). -class TrombleLossComputerImpl; -class TrombleLossComputer : private boost::base_from_member<boost::scoped_ptr<TrombleLossComputerImpl> >, public FeatureFunction { - private: -  typedef boost::scoped_ptr<TrombleLossComputerImpl> PImpl; -  typedef FeatureFunction Base; - - public: -  // String parameters are ref.txt num_ref weight1 weight2 ... weightn -  // where ref.txt contains references on per line, with num_ref references per sentence -  // The weights are the weight on each length n-gram. -  explicit TrombleLossComputer(const std::string ¶ms); - -  ~TrombleLossComputer(); - - protected: -  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -                                     const HG::Edge& edge, -                                     const std::vector<const void*>& ant_contexts, -                                     SparseVector<double>* features, -                                     SparseVector<double>* estimated_features, -                                     void* out_context) const; - private: -  const int fid_; -}; - -#endif | 
