Merge remote-tracking branch 'upstream/master'

author: Patrick Simianer <p@simianer.de> 2014-10-13 19:03:48 +0100
committer: Patrick Simianer <p@simianer.de> 2014-10-13 19:03:48 +0100
commit: cb9fb7088dde35881516c088db402abe747d49fa (patch)
tree: a91e4935a7941f1b261f76d88ab41fa3078a1891 /decoder
parent: 0a00e57e921c8eca8e02364db7d2e6607bfdcebc (diff)
parent: b1ed81ef3216b212295afa76c5d20a56fb647204 (diff)
73 files changed, 184 insertions, 155 deletions
diff --git a/decoder/aligner.cc b/decoder/aligner.cc
index 232e022a..fd648370 100644
--- a/decoder/aligner.cc
+++ b/decoder/aligner.cc
@@ -198,13 +198,13 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice,
   }
   const Hypergraph* g = &in_g;
   HypergraphP new_hg;
-  if (!src_lattice.IsSentence() ||
-      !trg_lattice.IsSentence()) {
+  if (!IsSentence(src_lattice) ||
+      !IsSentence(trg_lattice)) {
     if (map_instead_of_viterbi) {
       cerr << "  Lattice alignment: using Viterbi instead of MAP alignment\n";
     }
     map_instead_of_viterbi = false;
-    fix_up_src_spans = !src_lattice.IsSentence();
+    fix_up_src_spans = !IsSentence(src_lattice);
   }
 
   KBest::KBestDerivations<vector<Hypergraph::Edge const*>, ViterbiPathTraversal> kbest(in_g, k_best);
diff --git a/decoder/apply_models.h b/decoder/apply_models.h
index 19a4c7be..f03c973a 100644
--- a/decoder/apply_models.h
+++ b/decoder/apply_models.h
@@ -1,5 +1,5 @@
-#ifndef _APPLY_MODELS_H_
-#define _APPLY_MODELS_H_
+#ifndef APPLY_MODELS_H_
+#define APPLY_MODELS_H_
 
 #include <iostream>
 
diff --git a/decoder/bottom_up_parser.h b/decoder/bottom_up_parser.h
index 546bfb54..628bb96d 100644
--- a/decoder/bottom_up_parser.h
+++ b/decoder/bottom_up_parser.h
@@ -1,5 +1,5 @@
-#ifndef _BOTTOM_UP_PARSER_H_
-#define _BOTTOM_UP_PARSER_H_
+#ifndef BOTTOM_UP_PARSER_H_
+#define BOTTOM_UP_PARSER_H_
 
 #include <vector>
 #include <string>
diff --git a/decoder/csplit.cc b/decoder/csplit.cc
index 4a723822..7ee4092e 100644
--- a/decoder/csplit.cc
+++ b/decoder/csplit.cc
@@ -151,6 +151,7 @@ bool CompoundSplit::TranslateImpl(const string& input,
   smeta->SetSourceLength(in.size());  // TODO do utf8 or somethign
   for (int i = 0; i < in.size(); ++i)
     smeta->src_lattice_.push_back(vector<LatticeArc>(1, LatticeArc(TD::Convert(in[i]), 0.0, 1)));
+  smeta->ComputeInputLatticeType();
   pimpl_->BuildTrellis(in, forest);
   forest->Reweight(weights);
   return true;
diff --git a/decoder/csplit.h b/decoder/csplit.h
index 82ed23fc..83d457b8 100644
--- a/decoder/csplit.h
+++ b/decoder/csplit.h
@@ -1,5 +1,5 @@
-#ifndef _CSPLIT_H_
-#define _CSPLIT_H_
+#ifndef CSPLIT_H_
+#define CSPLIT_H_
 
 #include "translator.h"
 #include "lattice.h"
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index c384c33f..9e8d692a 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -86,7 +86,7 @@ struct ELengthWeightFunction {
   }
 };
 inline void ShowBanner() {
-  cerr << "cdec (c) 2009--2014 by Chris Dyer\n";
+  cerr << "cdec (c) 2009--2014 by Chris Dyer" << endl;
 }
 
 inline string str(char const* name,po::variables_map const& conf) {
diff --git a/decoder/decoder.h b/decoder/decoder.h
index 8039a42b..a545206b 100644
--- a/decoder/decoder.h
+++ b/decoder/decoder.h
@@ -1,5 +1,5 @@
-#ifndef _DECODER_H_
-#define _DECODER_H_
+#ifndef DECODER_H_
+#define DECODER_H_
 
 #include <iostream>
 #include <string>
diff --git a/decoder/earley_composer.h b/decoder/earley_composer.h
index 9f786bf6..31602f67 100644
--- a/decoder/earley_composer.h
+++ b/decoder/earley_composer.h
@@ -1,5 +1,5 @@
-#ifndef _EARLEY_COMPOSER_H_
-#define _EARLEY_COMPOSER_H_
+#ifndef EARLEY_COMPOSER_H_
+#define EARLEY_COMPOSER_H_
 
 #include <iostream>
 
diff --git a/decoder/factored_lexicon_helper.h b/decoder/factored_lexicon_helper.h
index 7fedc517..460bdebb 100644
--- a/decoder/factored_lexicon_helper.h
+++ b/decoder/factored_lexicon_helper.h
@@ -1,5 +1,5 @@
-#ifndef _FACTORED_LEXICON_HELPER_
-#define _FACTORED_LEXICON_HELPER_
+#ifndef FACTORED_LEXICON_HELPER_
+#define FACTORED_LEXICON_HELPER_
 
 #include <cassert>
 #include <vector>
diff --git a/decoder/ff.h b/decoder/ff.h
index 3280592e..eed1e3fb 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -1,5 +1,5 @@
-#ifndef _FF_H_
-#define _FF_H_
+#ifndef FF_H_
+#define FF_H_
 
 #include <string>
 #include <vector>
diff --git a/decoder/ff_basic.cc b/decoder/ff_basic.cc
index f9404d24..f960418a 100644
--- a/decoder/ff_basic.cc
+++ b/decoder/ff_basic.cc
@@ -49,9 +49,7 @@ void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
   features->set_value(fid_, edge.rule_->FWords() * value_);
 }
 
-
-ArityPenalty::ArityPenalty(const std::string& param) :
-    value_(-1.0 / log(10)) {
+ArityPenalty::ArityPenalty(const std::string& param) {
   string fname = "Arity_";
   unsigned MAX=DEFAULT_MAX_ARITY;
   using namespace boost;
@@ -61,7 +59,8 @@ ArityPenalty::ArityPenalty(const std::string& param) :
     WordID fid=FD::Convert(fname+lexical_cast<string>(i));
     fids_.push_back(fid);
   }
-  while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen.  doesn't change anything
+  // pretty up features vector in case FD was frozen.  doesn't change anything
+  while (!fids_.empty() && fids_.back()==0) fids_.pop_back();
 }
 
 void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
@@ -75,6 +74,6 @@ void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
   (void) state;
   (void) estimated_features;
   unsigned a=edge.Arity();
-  features->set_value(a<fids_.size()?fids_[a]:0, value_);
+  if (a < fids_.size()) features->set_value(fids_[a], 1.0);
 }
 
diff --git a/decoder/ff_basic.h b/decoder/ff_basic.h
index 901c0110..c63daf0f 100644
--- a/decoder/ff_basic.h
+++ b/decoder/ff_basic.h
@@ -1,5 +1,5 @@
-#ifndef _FF_BASIC_H_
-#define _FF_BASIC_H_
+#ifndef FF_BASIC_H_
+#define FF_BASIC_H_
 
 #include "ff.h"
 
@@ -41,7 +41,7 @@ class SourceWordPenalty : public FeatureFunction {
   const double value_;
 };
 
-#define DEFAULT_MAX_ARITY 9
+#define DEFAULT_MAX_ARITY 50
 #define DEFAULT_MAX_ARITY_STRINGIZE(x) #x
 #define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x)
 #define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY)
@@ -62,7 +62,6 @@ class ArityPenalty : public FeatureFunction {
                                      void* context) const;
  private:
   std::vector<WordID> fids_;
-  const double value_;
 };
 
 #endif
diff --git a/decoder/ff_bleu.h b/decoder/ff_bleu.h
index 344dc788..8ca2c095 100644
--- a/decoder/ff_bleu.h
+++ b/decoder/ff_bleu.h
@@ -1,5 +1,5 @@
-#ifndef _BLEU_FF_H_
-#define _BLEU_FF_H_
+#ifndef BLEU_FF_H_
+#define BLEU_FF_H_
 
 #include <vector>
 #include <string>
diff --git a/decoder/ff_charset.h b/decoder/ff_charset.h
index 267ef65d..e22ece2b 100644
--- a/decoder/ff_charset.h
+++ b/decoder/ff_charset.h
@@ -1,5 +1,5 @@
-#ifndef _FFCHARSET_H_
-#define _FFCHARSET_H_
+#ifndef FFCHARSET_H_
+#define FFCHARSET_H_
 
 #include <string>
 #include <map>
diff --git a/decoder/ff_context.h b/decoder/ff_context.h
index 19198ec3..ed1aea2b 100644
--- a/decoder/ff_context.h
+++ b/decoder/ff_context.h
@@ -1,6 +1,5 @@
-
-#ifndef _FF_CONTEXT_H_
-#define _FF_CONTEXT_H_
+#ifndef FF_CONTEXT_H_
+#define FF_CONTEXT_H_
 
 #include <vector>
 #include <boost/xpressive/xpressive.hpp>
diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc
index a0e538d3..550ff69a 100644
--- a/decoder/ff_csplit.cc
+++ b/decoder/ff_csplit.cc
@@ -2,6 +2,8 @@
 
 #include <set>
 #include <cstring>
+#include <unordered_set>
+#include <unordered_map>
 
 #include "klm/lm/model.hh"
 
@@ -14,12 +16,6 @@
 #include "stringlib.h"
 #include "tdict.h"
 
-#ifndef HAVE_OLD_CPP
-# include <unordered_set>
-#else
-# include <tr1/unordered_set>
-namespace std { using std::tr1::unordered_set; }
-#endif
 using namespace std;
 
 struct BasicCSplitFeaturesImpl {
diff --git a/decoder/ff_csplit.h b/decoder/ff_csplit.h
index 79bf2886..227f2a14 100644
--- a/decoder/ff_csplit.h
+++ b/decoder/ff_csplit.h
@@ -1,5 +1,5 @@
-#ifndef _FF_CSPLIT_H_
-#define _FF_CSPLIT_H_
+#ifndef FF_CSPLIT_H_
+#define FF_CSPLIT_H_
 
 #include <boost/shared_ptr.hpp>
 
diff --git a/decoder/ff_external.h b/decoder/ff_external.h
index 3e2bee51..fd12a37c 100644
--- a/decoder/ff_external.h
+++ b/decoder/ff_external.h
@@ -1,5 +1,5 @@
-#ifndef _FFEXTERNAL_H_
-#define _FFEXTERNAL_H_
+#ifndef FFEXTERNAL_H_
+#define FFEXTERNAL_H_
 
 #include "ff.h"
 
diff --git a/decoder/ff_factory.h b/decoder/ff_factory.h
index 1aa8e55f..ba9be9ac 100644
--- a/decoder/ff_factory.h
+++ b/decoder/ff_factory.h
@@ -1,5 +1,5 @@
-#ifndef _FF_FACTORY_H_
-#define _FF_FACTORY_H_
+#ifndef FF_FACTORY_H_
+#define FF_FACTORY_H_
 
 //TODO: use http://www.boost.org/doc/libs/1_43_0/libs/functional/factory/doc/html/index.html ?
 
diff --git a/decoder/ff_klm.h b/decoder/ff_klm.h
index db4032f7..c8350623 100644
--- a/decoder/ff_klm.h
+++ b/decoder/ff_klm.h
@@ -1,5 +1,5 @@
-#ifndef _KLM_FF_H_
-#define _KLM_FF_H_
+#ifndef KLM_FF_H_
+#define KLM_FF_H_
 
 #include <vector>
 #include <string>
diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h
index 85e79704..83a2e186 100644
--- a/decoder/ff_lm.h
+++ b/decoder/ff_lm.h
@@ -1,5 +1,5 @@
-#ifndef _LM_FF_H_
-#define _LM_FF_H_
+#ifndef LM_FF_H_
+#define LM_FF_H_
 
 #include <vector>
 #include <string>
diff --git a/decoder/ff_ngrams.h b/decoder/ff_ngrams.h
index 4965d235..5dea9a7d 100644
--- a/decoder/ff_ngrams.h
+++ b/decoder/ff_ngrams.h
@@ -1,5 +1,5 @@
-#ifndef _NGRAMS_FF_H_
-#define _NGRAMS_FF_H_
+#ifndef NGRAMS_FF_H_
+#define NGRAMS_FF_H_
 
 #include <vector>
 #include <map>
diff --git a/decoder/ff_parse_match.h b/decoder/ff_parse_match.h
index 7820b418..188c406a 100644
--- a/decoder/ff_parse_match.h
+++ b/decoder/ff_parse_match.h
@@ -1,5 +1,5 @@
-#ifndef _FF_PARSE_MATCH_H_
-#define _FF_PARSE_MATCH_H_
+#ifndef FF_PARSE_MATCH_H_
+#define FF_PARSE_MATCH_H_
 
 #include "ff.h"
 #include "hg.h"
diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h
index f210dc65..5c4cf45e 100644
--- a/decoder/ff_rules.h
+++ b/decoder/ff_rules.h
@@ -1,5 +1,5 @@
-#ifndef _FF_RULES_H_
-#define _FF_RULES_H_
+#ifndef FF_RULES_H_
+#define FF_RULES_H_
 
 #include <vector>
 #include <map>
diff --git a/decoder/ff_ruleshape.h b/decoder/ff_ruleshape.h
index 488cfd84..66914f5d 100644
--- a/decoder/ff_ruleshape.h
+++ b/decoder/ff_ruleshape.h
@@ -1,5 +1,5 @@
-#ifndef _FF_RULESHAPE_H_
-#define _FF_RULESHAPE_H_
+#ifndef FF_RULESHAPE_H_
+#define FF_RULESHAPE_H_
 
 #include <vector>
 #include <map>
diff --git a/decoder/ff_soft_syntax.h b/decoder/ff_soft_syntax.h
index e71825d5..da51df7f 100644
--- a/decoder/ff_soft_syntax.h
+++ b/decoder/ff_soft_syntax.h
@@ -1,5 +1,5 @@
-#ifndef _FF_SOFT_SYNTAX_H_
-#define _FF_SOFT_SYNTAX_H_
+#ifndef FF_SOFT_SYNTAX_H_
+#define FF_SOFT_SYNTAX_H_
 
 #include "ff.h"
 #include "hg.h"
diff --git a/decoder/ff_soft_syntax_mindist.h b/decoder/ff_soft_syntax_mindist.h
index bf938b38..205eff4b 100644
--- a/decoder/ff_soft_syntax_mindist.h
+++ b/decoder/ff_soft_syntax_mindist.h
@@ -1,5 +1,5 @@
-#ifndef _FF_SOFT_SYNTAX_MINDIST_H_
-#define _FF_SOFT_SYNTAX_MINDIST_H_
+#ifndef FF_SOFT_SYNTAX_MINDIST_H_
+#define FF_SOFT_SYNTAX_MINDIST_H_
 
 #include "ff.h"
 #include "hg.h"
diff --git a/decoder/ff_source_path.h b/decoder/ff_source_path.h
index 03126412..fc309264 100644
--- a/decoder/ff_source_path.h
+++ b/decoder/ff_source_path.h
@@ -1,5 +1,5 @@
-#ifndef _FF_SOURCE_PATH_H_
-#define _FF_SOURCE_PATH_H_
+#ifndef FF_SOURCE_PATH_H_
+#define FF_SOURCE_PATH_H_
 
 #include <vector>
 #include <map>
diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc
index 6b183863..f6f673d2 100644
--- a/decoder/ff_source_syntax.cc
+++ b/decoder/ff_source_syntax.cc
@@ -2,12 +2,7 @@
 
 #include <sstream>
 #include <stack>
-#ifndef HAVE_OLD_CPP
-# include <unordered_set>
-#else
-# include <tr1/unordered_set>
-namespace std { using std::tr1::unordered_set; }
-#endif
+#include <unordered_set>
 
 #include "sentence_metadata.h"
 #include "array2d.h"
diff --git a/decoder/ff_source_syntax.h b/decoder/ff_source_syntax.h
index bdd638c1..6316e881 100644
--- a/decoder/ff_source_syntax.h
+++ b/decoder/ff_source_syntax.h
@@ -1,5 +1,5 @@
-#ifndef _FF_SOURCE_SYNTAX_H_
-#define _FF_SOURCE_SYNTAX_H_
+#ifndef FF_SOURCE_SYNTAX_H_
+#define FF_SOURCE_SYNTAX_H_
 
 #include "ff.h"
 #include "hg.h"
diff --git a/decoder/ff_source_syntax2.cc b/decoder/ff_source_syntax2.cc
index a97e31d8..48991920 100644
--- a/decoder/ff_source_syntax2.cc
+++ b/decoder/ff_source_syntax2.cc
@@ -3,6 +3,7 @@
 #include <sstream>
 #include <stack>
 #include <string>
+#include <unordered_set>
 
 #include "sentence_metadata.h"
 #include "array2d.h"
diff --git a/decoder/ff_source_syntax2.h b/decoder/ff_source_syntax2.h
index f606c2bf..bbfa9eb6 100644
--- a/decoder/ff_source_syntax2.h
+++ b/decoder/ff_source_syntax2.h
@@ -1,5 +1,5 @@
-#ifndef _FF_SOURCE_SYNTAX2_H_
-#define _FF_SOURCE_SYNTAX2_H_
+#ifndef FF_SOURCE_SYNTAX2_H_
+#define FF_SOURCE_SYNTAX2_H_
 
 #include "ff.h"
 #include "hg.h"
diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h
index d2f5e84c..e2475491 100644
--- a/decoder/ff_spans.h
+++ b/decoder/ff_spans.h
@@ -1,5 +1,5 @@
-#ifndef _FF_SPANS_H_
-#define _FF_SPANS_H_
+#ifndef FF_SPANS_H_
+#define FF_SPANS_H_
 
 #include <vector>
 #include <map>
diff --git a/decoder/ff_tagger.h b/decoder/ff_tagger.h
index 46418b0c..0cb8c648 100644
--- a/decoder/ff_tagger.h
+++ b/decoder/ff_tagger.h
@@ -1,5 +1,5 @@
-#ifndef _FF_TAGGER_H_
-#define _FF_TAGGER_H_
+#ifndef FF_TAGGER_H_
+#define FF_TAGGER_H_
 
 #include <map>
 #include <boost/scoped_ptr.hpp>
diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h
index 0161f603..ec454621 100644
--- a/decoder/ff_wordalign.h
+++ b/decoder/ff_wordalign.h
@@ -1,5 +1,5 @@
-#ifndef _FF_WORD_ALIGN_H_
-#define _FF_WORD_ALIGN_H_
+#ifndef FF_WORD_ALIGN_H_
+#define FF_WORD_ALIGN_H_
 
 #include "ff.h"
 #include "array2d.h"
diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h
index e78cd2fb..94f5ff8a 100644
--- a/decoder/ff_wordset.h
+++ b/decoder/ff_wordset.h
@@ -1,5 +1,5 @@
-#ifndef _FF_WORDSET_H_
-#define _FF_WORDSET_H_
+#ifndef FF_WORDSET_H_
+#define FF_WORDSET_H_
 
 #include "ff.h"
 #include "tdict.h"
diff --git a/decoder/ffset.h b/decoder/ffset.h
index 28aef667..b7322ee2 100644
--- a/decoder/ffset.h
+++ b/decoder/ffset.h
@@ -1,5 +1,5 @@
-#ifndef _FFSET_H_
-#define _FFSET_H_
+#ifndef FFSET_H_
+#define FFSET_H_
 
 #include <vector>
 #include "value_array.h"
diff --git a/decoder/forest_writer.h b/decoder/forest_writer.h
index 819a8940..4d28de77 100644
--- a/decoder/forest_writer.h
+++ b/decoder/forest_writer.h
@@ -1,5 +1,5 @@
-#ifndef _FOREST_WRITER_H_
-#define _FOREST_WRITER_H_
+#ifndef FOREST_WRITER_H_
+#define FOREST_WRITER_H_
 
 #include <string>
 
diff --git a/decoder/freqdict.h b/decoder/freqdict.h
index 4e03fadd..07d797e2 100644
--- a/decoder/freqdict.h
+++ b/decoder/freqdict.h
@@ -1,5 +1,5 @@
-#ifndef _FREQDICT_H_
-#define _FREQDICT_H_
+#ifndef FREQDICT_H_
+#define FREQDICT_H_
 
 #include <iostream>
 #include <map>
diff --git a/decoder/fst_translator.cc b/decoder/fst_translator.cc
index 4253b652..50e6adcc 100644
--- a/decoder/fst_translator.cc
+++ b/decoder/fst_translator.cc
@@ -95,6 +95,7 @@ bool FSTTranslator::TranslateImpl(const string& input,
                               const vector<double>& weights,
                               Hypergraph* minus_lm_forest) {
   smeta->SetSourceLength(0);  // don't know how to compute this
+  smeta->input_type_ = cdec::kFOREST;
   return pimpl_->Translate(input, weights, minus_lm_forest);
 }
 
diff --git a/decoder/hg.h b/decoder/hg.h
index 4ed27d87..256f650f 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -1,5 +1,5 @@
-#ifndef _HG_H_
-#define _HG_H_
+#ifndef HG_H_
+#define HG_H_
 
 // define USE_INFO_EDGE 1 if you want lots of debug info shown with --show_derivations - otherwise it adds quite a bit of overhead if ffs have their logging enabled (e.g. ff_from_fsa)
 #ifndef USE_INFO_EDGE
diff --git a/decoder/hg_intersect.cc b/decoder/hg_intersect.cc
index 02f5a401..b9381d02 100644
--- a/decoder/hg_intersect.cc
+++ b/decoder/hg_intersect.cc
@@ -88,7 +88,7 @@ namespace HG {
 bool Intersect(const Lattice& target, Hypergraph* hg) {
   // there are a number of faster algorithms available for restricted
   // classes of hypergraph and/or target.
-  if (hg->IsLinearChain() && target.IsSentence())
+  if (hg->IsLinearChain() && IsSentence(target))
     return FastLinearIntersect(target, hg);
 
   vector<bool> rem(hg->edges_.size(), false);
diff --git a/decoder/hg_intersect.h b/decoder/hg_intersect.h
index 29a5ea2a..19c1c177 100644
--- a/decoder/hg_intersect.h
+++ b/decoder/hg_intersect.h
@@ -1,5 +1,5 @@
-#ifndef _HG_INTERSECT_H_
-#define _HG_INTERSECT_H_
+#ifndef HG_INTERSECT_H_
+#define HG_INTERSECT_H_
 
 #include "lattice.h"
 
diff --git a/decoder/hg_io.h b/decoder/hg_io.h
index 58af8132..5a2bd808 100644
--- a/decoder/hg_io.h
+++ b/decoder/hg_io.h
@@ -1,5 +1,5 @@
-#ifndef _HG_IO_H_
-#define _HG_IO_H_
+#ifndef HG_IO_H_
+#define HG_IO_H_
 
 #include <iostream>
 #include <string>
diff --git a/decoder/hg_remove_eps.h b/decoder/hg_remove_eps.h
index 82f06039..f67fe6e2 100644
--- a/decoder/hg_remove_eps.h
+++ b/decoder/hg_remove_eps.h
@@ -1,5 +1,5 @@
-#ifndef _HG_REMOVE_EPS_H_
-#define _HG_REMOVE_EPS_H_
+#ifndef HG_REMOVE_EPS_H_
+#define HG_REMOVE_EPS_H_
 
 #include "wordid.h"
 class Hypergraph;
diff --git a/decoder/hg_sampler.h b/decoder/hg_sampler.h
index 6ac39a20..4267b5ec 100644
--- a/decoder/hg_sampler.h
+++ b/decoder/hg_sampler.h
@@ -1,6 +1,5 @@
-#ifndef _HG_SAMPLER_H_
-#define _HG_SAMPLER_H_
-
+#ifndef HG_SAMPLER_H_
+#define HG_SAMPLER_H_
 
 #include <vector>
 #include <string>
diff --git a/decoder/hg_union.h b/decoder/hg_union.h
index 34624246..bb7e2d09 100644
--- a/decoder/hg_union.h
+++ b/decoder/hg_union.h
@@ -1,5 +1,5 @@
-#ifndef _HG_UNION_H_
-#define _HG_UNION_H_
+#ifndef HG_UNION_H_
+#define HG_UNION_H_
 
 class Hypergraph;
 namespace HG {
diff --git a/decoder/incremental.h b/decoder/incremental.h
index f791a626..46b4817b 100644
--- a/decoder/incremental.h
+++ b/decoder/incremental.h
@@ -1,5 +1,5 @@
-#ifndef _INCREMENTAL_H_
-#define _INCREMENTAL_H_
+#ifndef INCREMENTAL_H_
+#define INCREMENTAL_H_
 
 #include "weights.h"
 #include <vector>
diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h
index c0377fe8..d5bda63c 100644
--- a/decoder/inside_outside.h
+++ b/decoder/inside_outside.h
@@ -1,5 +1,5 @@
-#ifndef _INSIDE_OUTSIDE_H_
-#define _INSIDE_OUTSIDE_H_
+#ifndef INSIDE_OUTSIDE_H_
+#define INSIDE_OUTSIDE_H_
 
 #include <vector>
 #include <algorithm>
diff --git a/decoder/json_parse.h b/decoder/json_parse.h
index c3cba954..85e2eff1 100644
--- a/decoder/json_parse.h
+++ b/decoder/json_parse.h
@@ -1,5 +1,5 @@
-#ifndef _JSON_WRAPPER_H_
-#define _JSON_WRAPPER_H_
+#ifndef JSON_WRAPPER_H_
+#define JSON_WRAPPER_H_
 
 #include <iostream>
 #include <cassert>
diff --git a/decoder/kbest.h b/decoder/kbest.h
index c7194c7e..d6b3eb94 100644
--- a/decoder/kbest.h
+++ b/decoder/kbest.h
@@ -1,5 +1,5 @@
-#ifndef _HG_KBEST_H_
-#define _HG_KBEST_H_
+#ifndef HG_KBEST_H_
+#define HG_KBEST_H_
 
 #include <vector>
 #include <utility>
diff --git a/decoder/lattice.cc b/decoder/lattice.cc
index 89da3cd0..1f97048d 100644
--- a/decoder/lattice.cc
+++ b/decoder/lattice.cc
@@ -50,7 +50,6 @@ void LatticeTools::ConvertTextToLattice(const string& text, Lattice* pl) {
   l.resize(ids.size());
   for (int i = 0; i < l.size(); ++i)
     l[i].push_back(LatticeArc(ids[i], 0.0, 1));
-  l.is_sentence_ = true;
 }
 
 void LatticeTools::ConvertTextOrPLF(const string& text_or_plf, Lattice* pl) {
diff --git a/decoder/lattice.h b/decoder/lattice.h
index ad4ca50d..1258d3f5 100644
--- a/decoder/lattice.h
+++ b/decoder/lattice.h
@@ -1,5 +1,5 @@
-#ifndef __LATTICE_H_
-#define __LATTICE_H_
+#ifndef LATTICE_H_
+#define LATTICE_H_
 
 #include <string>
 #include <vector>
@@ -25,22 +25,24 @@ class Lattice : public std::vector<std::vector<LatticeArc> > {
   friend void LatticeTools::ConvertTextOrPLF(const std::string& text_or_plf, Lattice* pl);
   friend void LatticeTools::ConvertTextToLattice(const std::string& text, Lattice* pl);
  public:
-  Lattice() : is_sentence_(false) {}
+  Lattice() {}
   explicit Lattice(size_t t, const std::vector<LatticeArc>& v = std::vector<LatticeArc>()) :
-   std::vector<std::vector<LatticeArc> >(t, v),
-   is_sentence_(false) {}
+   std::vector<std::vector<LatticeArc>>(t, v) {}
   int Distance(int from, int to) const {
     if (dist_.empty())
       return (to - from);
     return dist_(from, to);
   }
-  // TODO this should actually be computed based on the contents
-  // of the lattice
-  bool IsSentence() const { return is_sentence_; }
  private:
   void ComputeDistances();
   Array2D<int> dist_;
-  bool is_sentence_;
 };
 
+inline bool IsSentence(const Lattice& in) {
+  bool res = true;
+  for (auto& alt : in)
+    if (alt.size() > 1) { res = false; break; }
+  return res;
+}
+
 #endif
diff --git a/decoder/lexalign.cc b/decoder/lexalign.cc
index 11f20de7..dd529311 100644
--- a/decoder/lexalign.cc
+++ b/decoder/lexalign.cc
@@ -114,10 +114,9 @@ bool LexicalAlign::TranslateImpl(const string& input,
                       Hypergraph* forest) {
   Lattice& lattice = smeta->src_lattice_;
   LatticeTools::ConvertTextOrPLF(input, &lattice);
-  if (!lattice.IsSentence()) {
-    // lexical models make independence assumptions
-    // that don't work with lattices or conf nets
-    cerr << "LexicalTrans: cannot deal with lattice source input!\n";
+  smeta->ComputeInputLatticeType();
+  if (smeta->GetInputType() != cdec::kSEQUENCE) {
+    cerr << "LexicalTrans: cannot deal with non-sequence input!";
     abort();
   }
   smeta->SetSourceLength(lattice.size());
diff --git a/decoder/lexalign.h b/decoder/lexalign.h
index 7ba4fe64..6415f4f9 100644
--- a/decoder/lexalign.h
+++ b/decoder/lexalign.h
@@ -1,5 +1,5 @@
-#ifndef _LEXALIGN_H_
-#define _LEXALIGN_H_
+#ifndef LEXALIGN_H_
+#define LEXALIGN_H_
 
 #include "translator.h"
 #include "lattice.h"
diff --git a/decoder/lextrans.cc b/decoder/lextrans.cc
index 74a18c3f..d13a891a 100644
--- a/decoder/lextrans.cc
+++ b/decoder/lextrans.cc
@@ -271,10 +271,9 @@ bool LexicalTrans::TranslateImpl(const string& input,
                       Hypergraph* forest) {
   Lattice& lattice = smeta->src_lattice_;
   LatticeTools::ConvertTextOrPLF(input, &lattice);
-  if (!lattice.IsSentence()) {
-    // lexical models make independence assumptions
-    // that don't work with lattices or conf nets
-    cerr << "LexicalTrans: cannot deal with lattice source input!\n";
+  smeta->ComputeInputLatticeType();
+  if (smeta->GetInputType() != cdec::kSEQUENCE) {
+    cerr << "LexicalTrans: cannot deal with non-sequence inputs\n";
     abort();
   }
   smeta->SetSourceLength(lattice.size());
diff --git a/decoder/lextrans.h b/decoder/lextrans.h
index 2d51e7c0..a23a4e0d 100644
--- a/decoder/lextrans.h
+++ b/decoder/lextrans.h
@@ -1,5 +1,5 @@
-#ifndef _LEXTrans_H_
-#define _LEXTrans_H_
+#ifndef LEXTrans_H_
+#define LEXTrans_H_
 
 #include "translator.h"
 #include "lattice.h"
diff --git a/decoder/node_state_hash.h b/decoder/node_state_hash.h
index 9fc01a09..f380fcb1 100644
--- a/decoder/node_state_hash.h
+++ b/decoder/node_state_hash.h
@@ -1,5 +1,5 @@
-#ifndef _NODE_STATE_HASH_
-#define _NODE_STATE_HASH_
+#ifndef NODE_STATE_HASH_
+#define NODE_STATE_HASH_
 
 #include <cassert>
 #include <cstring>
diff --git a/decoder/phrasebased_translator.cc b/decoder/phrasebased_translator.cc
index 8048248e..8415353a 100644
--- a/decoder/phrasebased_translator.cc
+++ b/decoder/phrasebased_translator.cc
@@ -114,6 +114,7 @@ struct PhraseBasedTranslatorImpl {
     Lattice lattice;
     LatticeTools::ConvertTextOrPLF(input, &lattice);
     smeta->SetSourceLength(lattice.size());
+    smeta->ComputeInputLatticeType();
     size_t est_nodes = lattice.size() * lattice.size() * (1 << max_distortion);
     minus_lm_forest->ReserveNodes(est_nodes, est_nodes * 100);
     if (add_pass_through_rules) {
diff --git a/decoder/phrasebased_translator.h b/decoder/phrasebased_translator.h
index e5e3f8a2..10790d0d 100644
--- a/decoder/phrasebased_translator.h
+++ b/decoder/phrasebased_translator.h
@@ -1,5 +1,5 @@
-#ifndef _PHRASEBASED_TRANSLATOR_H_
-#define _PHRASEBASED_TRANSLATOR_H_
+#ifndef PHRASEBASED_TRANSLATOR_H_
+#define PHRASEBASED_TRANSLATOR_H_
 
 #include "translator.h"
 
diff --git a/decoder/phrasetable_fst.h b/decoder/phrasetable_fst.h
index 477de1f7..966bb14d 100644
--- a/decoder/phrasetable_fst.h
+++ b/decoder/phrasetable_fst.h
@@ -1,5 +1,5 @@
-#ifndef _PHRASETABLE_FST_H_
-#define _PHRASETABLE_FST_H_
+#ifndef PHRASETABLE_FST_H_
+#define PHRASETABLE_FST_H_
 
 #include <vector>
 #include <string>
diff --git a/decoder/rescore_translator.cc b/decoder/rescore_translator.cc
index 10192f7a..18c83c56 100644
--- a/decoder/rescore_translator.cc
+++ b/decoder/rescore_translator.cc
@@ -53,6 +53,7 @@ bool RescoreTranslator::TranslateImpl(const string& input,
                               const vector<double>& weights,
                               Hypergraph* minus_lm_forest) {
   smeta->SetSourceLength(0);  // don't know how to compute this
+  smeta->input_type_ = cdec::kFOREST;
   return pimpl_->Translate(input, weights, minus_lm_forest);
 }
 
diff --git a/decoder/rule_lexer.h b/decoder/rule_lexer.h
index e15c056d..5267f9ca 100644
--- a/decoder/rule_lexer.h
+++ b/decoder/rule_lexer.h
@@ -1,5 +1,5 @@
-#ifndef _RULE_LEXER_H_
-#define _RULE_LEXER_H_
+#ifndef RULE_LEXER_H_
+#define RULE_LEXER_H_
 
 #include <iostream>
 #include <string>
diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc
index c3cfcaad..538f82ec 100644
--- a/decoder/scfg_translator.cc
+++ b/decoder/scfg_translator.cc
@@ -1,5 +1,6 @@
 #include <algorithm>
 #include <vector>
+#include <unordered_set>
 #include <boost/foreach.hpp>
 #include <boost/functional/hash.hpp>
 #include "fast_lexical_cast.hpp"
@@ -194,6 +195,7 @@ struct SCFGTranslatorImpl {
     Lattice& lattice = smeta->src_lattice_;
     LatticeTools::ConvertTextOrPLF(input, &lattice);
     smeta->SetSourceLength(lattice.size());
+    smeta->ComputeInputLatticeType();
     if (add_pass_through_rules){
       if (!SILENT) cerr << "Adding pass through grammar" << endl;
       PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_, num_pt_features);
diff --git a/decoder/sentence_metadata.h b/decoder/sentence_metadata.h
index f2a779f4..e13c2ca5 100644
--- a/decoder/sentence_metadata.h
+++ b/decoder/sentence_metadata.h
@@ -1,14 +1,20 @@
-#ifndef _SENTENCE_METADATA_H_
-#define _SENTENCE_METADATA_H_
+#ifndef SENTENCE_METADATA_H_
+#define SENTENCE_METADATA_H_
 
 #include <string>
 #include <map>
 #include <cassert>
 #include "lattice.h"
+#include "tree_fragment.h"
 
 struct DocScorer;  // deprecated, will be removed
 struct Score;     // deprecated, will be removed
 
+namespace cdec {
+enum InputType { kSEQUENCE, kTREE, kLATTICE, kFOREST, kUNKNOWN };
+class TreeFragment;
+}
+
 class SentenceMetadata {
  public:
   friend class DecoderImpl;
@@ -17,7 +23,17 @@ class SentenceMetadata {
     src_len_(-1),
     has_reference_(ref.size() > 0),
     trg_len_(ref.size()),
-    ref_(has_reference_ ? &ref : NULL) {}
+    ref_(has_reference_ ? &ref : NULL),
+    input_type_(cdec::kUNKNOWN) {}
+
+  // helper function for lattice inputs
+  void ComputeInputLatticeType() {
+    input_type_ = cdec::kSEQUENCE;
+    for (auto& alt : src_lattice_) {
+      if (alt.size() > 1) { input_type_ = cdec::kLATTICE; break; }
+    }
+  }
+  cdec::InputType GetInputType() const { return input_type_; }
 
   int GetSentenceId() const { return sent_id_; }
 
@@ -25,6 +41,8 @@ class SentenceMetadata {
   // it has parsed the source
   void SetSourceLength(int sl) { src_len_ = sl; }
 
+  const cdec::TreeFragment& GetSourceTree() const { return src_tree_; }
+
   // this should be called if a separate model needs to
   // specify how long the target sentence should be
   void SetTargetLength(int tl) {
@@ -64,12 +82,15 @@ class SentenceMetadata {
   const Score* app_score;
  public:
   Lattice src_lattice_;  // this will only be set if inputs are finite state!
+  cdec::TreeFragment src_tree_; // this will be set only if inputs are trees
  private:
   // you need to be very careful when depending on these values
   // they will only be set during training / alignment contexts
   const bool has_reference_;
   int trg_len_;
   const Lattice* const ref_;
+ public:
+  cdec::InputType input_type_;
 };
 
 #endif
diff --git a/decoder/tagger.cc b/decoder/tagger.cc
index 30fb055f..500d2061 100644
--- a/decoder/tagger.cc
+++ b/decoder/tagger.cc
@@ -100,6 +100,8 @@ bool Tagger::TranslateImpl(const string& input,
   Lattice& lattice = smeta->src_lattice_;
   LatticeTools::ConvertTextToLattice(input, &lattice);
   smeta->SetSourceLength(lattice.size());
+  smeta->ComputeInputLatticeType();
+  assert(smeta->GetInputType() == cdec::kSEQUENCE);
   vector<WordID> sequence(lattice.size());
   for (int i = 0; i < lattice.size(); ++i) {
     assert(lattice[i].size() == 1);
diff --git a/decoder/tagger.h b/decoder/tagger.h
index 9ac820d9..51659d5b 100644
--- a/decoder/tagger.h
+++ b/decoder/tagger.h
@@ -1,5 +1,5 @@
-#ifndef _TAGGER_H_
-#define _TAGGER_H_
+#ifndef TAGGER_H_
+#define TAGGER_H_
 
 #include "translator.h"
 
diff --git a/decoder/translator.h b/decoder/translator.h
index ba218a0b..096cf191 100644
--- a/decoder/translator.h
+++ b/decoder/translator.h
@@ -1,5 +1,5 @@
-#ifndef _TRANSLATOR_H_
-#define _TRANSLATOR_H_
+#ifndef TRANSLATOR_H_
+#define TRANSLATOR_H_
 
 #include <string>
 #include <vector>
diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc
index adc8dc89..08dae64c 100644
--- a/decoder/tree2string_translator.cc
+++ b/decoder/tree2string_translator.cc
@@ -2,6 +2,7 @@
 #include <vector>
 #include <queue>
 #include <map>
+#include <unordered_map>
 #include <unordered_set>
 #include <boost/shared_ptr.hpp>
 #include <boost/functional/hash.hpp>
@@ -31,12 +32,12 @@ static void ReadTree2StringGrammar(istream* in, Tree2StringGrammarNode* root, bo
     ++lc;
     if (line.size() == 0 || line[0] == '#') continue;
     std::vector<StringPiece> fields = TokenizeMultisep(line, " ||| ");
-    if (has_multiple_states && fields.size() != 4) {
-      cerr << "Expected 4 fields in rule file but line " << lc << " is:\n" << line << endl;
+    if (has_multiple_states && fields.size() < 4) {
+      cerr << "Expected at least 4 fields in rule file but line " << lc << " is:\n" << line << endl;
       abort();
     }
-    if (!has_multiple_states && fields.size() != 3) {
-      cerr << "Expected 3 fields in rule file but line " << lc << " is:\n" << line << endl;
+    if (!has_multiple_states && fields.size() < 3) {
+      cerr << "Expected at least 3 fields in rule file but line " << lc << " is:\n" << line << endl;
       abort();
     }
     
@@ -72,6 +73,7 @@ static void ReadTree2StringGrammar(istream* in, Tree2StringGrammarNode* root, bo
       cerr << "Not implemented...\n"; abort(); // TODO read in states
     } else {
       os << " ||| " << fields[1] << " ||| " << fields[2];
+      if (fields.size() > 3) os << " ||| " << fields[3];
       rule.reset(new TRule(os.str()));
     }
     cur->rules.push_back(rule);
@@ -286,6 +288,8 @@ struct Tree2StringTranslatorImpl {
                  const vector<double>& weights,
                  Hypergraph* minus_lm_forest) {
     cdec::TreeFragment input_tree(input, false);
+    smeta->src_tree_ = input_tree;
+    smeta->input_type_ = cdec::kTREE;
     if (add_pass_through_rules) CreatePassThroughRules(input_tree);
     Hypergraph hg;
     hg.ReserveNodes(input_tree.nodes.size());
diff --git a/decoder/tree_fragment.cc b/decoder/tree_fragment.cc
index 42f7793a..5f717c5b 100644
--- a/decoder/tree_fragment.cc
+++ b/decoder/tree_fragment.cc
@@ -64,6 +64,13 @@ int TreeFragment::SetupSpansRec(unsigned cur, int left) {
   return right;
 }
 
+vector<int> TreeFragment::Terminals() const {
+  vector<int> terms;
+  for (auto& x : *this)
+    if (IsTerminal(x)) terms.push_back(x);
+  return terms;
+}
+
 // cp is the character index in the tree
 // np keeps track of the nodes (nonterminals) that have been built
 // symp keeps track of the terminal symbols that have been built
diff --git a/decoder/tree_fragment.h b/decoder/tree_fragment.h
index 6b4842ee..e19b79fb 100644
--- a/decoder/tree_fragment.h
+++ b/decoder/tree_fragment.h
@@ -72,6 +72,8 @@ class TreeFragment {
   BreadthFirstIterator bfs_begin(unsigned node_idx) const;
   BreadthFirstIterator bfs_end() const;
 
+  std::vector<int> Terminals() const;
+
  private:
   // cp is the character index in the tree
   // np keeps track of the nodes (nonterminals) that have been built
diff --git a/decoder/trule.h b/decoder/trule.h
index 243b0da9..adef7cc7 100644
--- a/decoder/trule.h
+++ b/decoder/trule.h
@@ -1,5 +1,5 @@
-#ifndef _RULE_H_
-#define _RULE_H_
+#ifndef TRULE_H_
+#define TRULE_H_
 
 #include <algorithm>
 #include <vector>
diff --git a/decoder/viterbi.h b/decoder/viterbi.h
index a8a0ea7f..20ee73cc 100644
--- a/decoder/viterbi.h
+++ b/decoder/viterbi.h
@@ -1,5 +1,5 @@
-#ifndef _VITERBI_H_
-#define _VITERBI_H_
+#ifndef VITERBI_H_
+#define VITERBI_H_
 
 #include <vector>
 #include "prob.h"
author	Patrick Simianer <p@simianer.de>	2014-10-13 19:03:48 +0100
committer	Patrick Simianer <p@simianer.de>	2014-10-13 19:03:48 +0100
commit	cb9fb7088dde35881516c088db402abe747d49fa (patch)
tree	a91e4935a7941f1b261f76d88ab41fa3078a1891 /decoder
parent	0a00e57e921c8eca8e02364db7d2e6607bfdcebc (diff)
parent	b1ed81ef3216b212295afa76c5d20a56fb647204 (diff)