summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-10-13 19:03:48 +0100
committerPatrick Simianer <p@simianer.de>2014-10-13 19:03:48 +0100
commitcb9fb7088dde35881516c088db402abe747d49fa (patch)
treea91e4935a7941f1b261f76d88ab41fa3078a1891 /decoder
parent0a00e57e921c8eca8e02364db7d2e6607bfdcebc (diff)
parentb1ed81ef3216b212295afa76c5d20a56fb647204 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'decoder')
-rw-r--r--decoder/aligner.cc6
-rw-r--r--decoder/apply_models.h4
-rw-r--r--decoder/bottom_up_parser.h4
-rw-r--r--decoder/csplit.cc1
-rw-r--r--decoder/csplit.h4
-rw-r--r--decoder/decoder.cc2
-rw-r--r--decoder/decoder.h4
-rw-r--r--decoder/earley_composer.h4
-rw-r--r--decoder/factored_lexicon_helper.h4
-rw-r--r--decoder/ff.h4
-rw-r--r--decoder/ff_basic.cc9
-rw-r--r--decoder/ff_basic.h7
-rw-r--r--decoder/ff_bleu.h4
-rw-r--r--decoder/ff_charset.h4
-rw-r--r--decoder/ff_context.h5
-rw-r--r--decoder/ff_csplit.cc8
-rw-r--r--decoder/ff_csplit.h4
-rw-r--r--decoder/ff_external.h4
-rw-r--r--decoder/ff_factory.h4
-rw-r--r--decoder/ff_klm.h4
-rw-r--r--decoder/ff_lm.h4
-rw-r--r--decoder/ff_ngrams.h4
-rw-r--r--decoder/ff_parse_match.h4
-rw-r--r--decoder/ff_rules.h4
-rw-r--r--decoder/ff_ruleshape.h4
-rw-r--r--decoder/ff_soft_syntax.h4
-rw-r--r--decoder/ff_soft_syntax_mindist.h4
-rw-r--r--decoder/ff_source_path.h4
-rw-r--r--decoder/ff_source_syntax.cc7
-rw-r--r--decoder/ff_source_syntax.h4
-rw-r--r--decoder/ff_source_syntax2.cc1
-rw-r--r--decoder/ff_source_syntax2.h4
-rw-r--r--decoder/ff_spans.h4
-rw-r--r--decoder/ff_tagger.h4
-rw-r--r--decoder/ff_wordalign.h4
-rw-r--r--decoder/ff_wordset.h4
-rw-r--r--decoder/ffset.h4
-rw-r--r--decoder/forest_writer.h4
-rw-r--r--decoder/freqdict.h4
-rw-r--r--decoder/fst_translator.cc1
-rw-r--r--decoder/hg.h4
-rw-r--r--decoder/hg_intersect.cc2
-rw-r--r--decoder/hg_intersect.h4
-rw-r--r--decoder/hg_io.h4
-rw-r--r--decoder/hg_remove_eps.h4
-rw-r--r--decoder/hg_sampler.h5
-rw-r--r--decoder/hg_union.h4
-rw-r--r--decoder/incremental.h4
-rw-r--r--decoder/inside_outside.h4
-rw-r--r--decoder/json_parse.h4
-rw-r--r--decoder/kbest.h4
-rw-r--r--decoder/lattice.cc1
-rw-r--r--decoder/lattice.h20
-rw-r--r--decoder/lexalign.cc7
-rw-r--r--decoder/lexalign.h4
-rw-r--r--decoder/lextrans.cc7
-rw-r--r--decoder/lextrans.h4
-rw-r--r--decoder/node_state_hash.h4
-rw-r--r--decoder/phrasebased_translator.cc1
-rw-r--r--decoder/phrasebased_translator.h4
-rw-r--r--decoder/phrasetable_fst.h4
-rw-r--r--decoder/rescore_translator.cc1
-rw-r--r--decoder/rule_lexer.h4
-rw-r--r--decoder/scfg_translator.cc2
-rw-r--r--decoder/sentence_metadata.h27
-rw-r--r--decoder/tagger.cc2
-rw-r--r--decoder/tagger.h4
-rw-r--r--decoder/translator.h4
-rw-r--r--decoder/tree2string_translator.cc12
-rw-r--r--decoder/tree_fragment.cc7
-rw-r--r--decoder/tree_fragment.h2
-rw-r--r--decoder/trule.h4
-rw-r--r--decoder/viterbi.h4
73 files changed, 184 insertions, 155 deletions
diff --git a/decoder/aligner.cc b/decoder/aligner.cc
index 232e022a..fd648370 100644
--- a/decoder/aligner.cc
+++ b/decoder/aligner.cc
@@ -198,13 +198,13 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice,
}
const Hypergraph* g = &in_g;
HypergraphP new_hg;
- if (!src_lattice.IsSentence() ||
- !trg_lattice.IsSentence()) {
+ if (!IsSentence(src_lattice) ||
+ !IsSentence(trg_lattice)) {
if (map_instead_of_viterbi) {
cerr << " Lattice alignment: using Viterbi instead of MAP alignment\n";
}
map_instead_of_viterbi = false;
- fix_up_src_spans = !src_lattice.IsSentence();
+ fix_up_src_spans = !IsSentence(src_lattice);
}
KBest::KBestDerivations<vector<Hypergraph::Edge const*>, ViterbiPathTraversal> kbest(in_g, k_best);
diff --git a/decoder/apply_models.h b/decoder/apply_models.h
index 19a4c7be..f03c973a 100644
--- a/decoder/apply_models.h
+++ b/decoder/apply_models.h
@@ -1,5 +1,5 @@
-#ifndef _APPLY_MODELS_H_
-#define _APPLY_MODELS_H_
+#ifndef APPLY_MODELS_H_
+#define APPLY_MODELS_H_
#include <iostream>
diff --git a/decoder/bottom_up_parser.h b/decoder/bottom_up_parser.h
index 546bfb54..628bb96d 100644
--- a/decoder/bottom_up_parser.h
+++ b/decoder/bottom_up_parser.h
@@ -1,5 +1,5 @@
-#ifndef _BOTTOM_UP_PARSER_H_
-#define _BOTTOM_UP_PARSER_H_
+#ifndef BOTTOM_UP_PARSER_H_
+#define BOTTOM_UP_PARSER_H_
#include <vector>
#include <string>
diff --git a/decoder/csplit.cc b/decoder/csplit.cc
index 4a723822..7ee4092e 100644
--- a/decoder/csplit.cc
+++ b/decoder/csplit.cc
@@ -151,6 +151,7 @@ bool CompoundSplit::TranslateImpl(const string& input,
smeta->SetSourceLength(in.size()); // TODO do utf8 or somethign
for (int i = 0; i < in.size(); ++i)
smeta->src_lattice_.push_back(vector<LatticeArc>(1, LatticeArc(TD::Convert(in[i]), 0.0, 1)));
+ smeta->ComputeInputLatticeType();
pimpl_->BuildTrellis(in, forest);
forest->Reweight(weights);
return true;
diff --git a/decoder/csplit.h b/decoder/csplit.h
index 82ed23fc..83d457b8 100644
--- a/decoder/csplit.h
+++ b/decoder/csplit.h
@@ -1,5 +1,5 @@
-#ifndef _CSPLIT_H_
-#define _CSPLIT_H_
+#ifndef CSPLIT_H_
+#define CSPLIT_H_
#include "translator.h"
#include "lattice.h"
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index c384c33f..9e8d692a 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -86,7 +86,7 @@ struct ELengthWeightFunction {
}
};
inline void ShowBanner() {
- cerr << "cdec (c) 2009--2014 by Chris Dyer\n";
+ cerr << "cdec (c) 2009--2014 by Chris Dyer" << endl;
}
inline string str(char const* name,po::variables_map const& conf) {
diff --git a/decoder/decoder.h b/decoder/decoder.h
index 8039a42b..a545206b 100644
--- a/decoder/decoder.h
+++ b/decoder/decoder.h
@@ -1,5 +1,5 @@
-#ifndef _DECODER_H_
-#define _DECODER_H_
+#ifndef DECODER_H_
+#define DECODER_H_
#include <iostream>
#include <string>
diff --git a/decoder/earley_composer.h b/decoder/earley_composer.h
index 9f786bf6..31602f67 100644
--- a/decoder/earley_composer.h
+++ b/decoder/earley_composer.h
@@ -1,5 +1,5 @@
-#ifndef _EARLEY_COMPOSER_H_
-#define _EARLEY_COMPOSER_H_
+#ifndef EARLEY_COMPOSER_H_
+#define EARLEY_COMPOSER_H_
#include <iostream>
diff --git a/decoder/factored_lexicon_helper.h b/decoder/factored_lexicon_helper.h
index 7fedc517..460bdebb 100644
--- a/decoder/factored_lexicon_helper.h
+++ b/decoder/factored_lexicon_helper.h
@@ -1,5 +1,5 @@
-#ifndef _FACTORED_LEXICON_HELPER_
-#define _FACTORED_LEXICON_HELPER_
+#ifndef FACTORED_LEXICON_HELPER_
+#define FACTORED_LEXICON_HELPER_
#include <cassert>
#include <vector>
diff --git a/decoder/ff.h b/decoder/ff.h
index 3280592e..eed1e3fb 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -1,5 +1,5 @@
-#ifndef _FF_H_
-#define _FF_H_
+#ifndef FF_H_
+#define FF_H_
#include <string>
#include <vector>
diff --git a/decoder/ff_basic.cc b/decoder/ff_basic.cc
index f9404d24..f960418a 100644
--- a/decoder/ff_basic.cc
+++ b/decoder/ff_basic.cc
@@ -49,9 +49,7 @@ void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
features->set_value(fid_, edge.rule_->FWords() * value_);
}
-
-ArityPenalty::ArityPenalty(const std::string& param) :
- value_(-1.0 / log(10)) {
+ArityPenalty::ArityPenalty(const std::string& param) {
string fname = "Arity_";
unsigned MAX=DEFAULT_MAX_ARITY;
using namespace boost;
@@ -61,7 +59,8 @@ ArityPenalty::ArityPenalty(const std::string& param) :
WordID fid=FD::Convert(fname+lexical_cast<string>(i));
fids_.push_back(fid);
}
- while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen. doesn't change anything
+ // pretty up features vector in case FD was frozen. doesn't change anything
+ while (!fids_.empty() && fids_.back()==0) fids_.pop_back();
}
void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
@@ -75,6 +74,6 @@ void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
(void) state;
(void) estimated_features;
unsigned a=edge.Arity();
- features->set_value(a<fids_.size()?fids_[a]:0, value_);
+ if (a < fids_.size()) features->set_value(fids_[a], 1.0);
}
diff --git a/decoder/ff_basic.h b/decoder/ff_basic.h
index 901c0110..c63daf0f 100644
--- a/decoder/ff_basic.h
+++ b/decoder/ff_basic.h
@@ -1,5 +1,5 @@
-#ifndef _FF_BASIC_H_
-#define _FF_BASIC_H_
+#ifndef FF_BASIC_H_
+#define FF_BASIC_H_
#include "ff.h"
@@ -41,7 +41,7 @@ class SourceWordPenalty : public FeatureFunction {
const double value_;
};
-#define DEFAULT_MAX_ARITY 9
+#define DEFAULT_MAX_ARITY 50
#define DEFAULT_MAX_ARITY_STRINGIZE(x) #x
#define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x)
#define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY)
@@ -62,7 +62,6 @@ class ArityPenalty : public FeatureFunction {
void* context) const;
private:
std::vector<WordID> fids_;
- const double value_;
};
#endif
diff --git a/decoder/ff_bleu.h b/decoder/ff_bleu.h
index 344dc788..8ca2c095 100644
--- a/decoder/ff_bleu.h
+++ b/decoder/ff_bleu.h
@@ -1,5 +1,5 @@
-#ifndef _BLEU_FF_H_
-#define _BLEU_FF_H_
+#ifndef BLEU_FF_H_
+#define BLEU_FF_H_
#include <vector>
#include <string>
diff --git a/decoder/ff_charset.h b/decoder/ff_charset.h
index 267ef65d..e22ece2b 100644
--- a/decoder/ff_charset.h
+++ b/decoder/ff_charset.h
@@ -1,5 +1,5 @@
-#ifndef _FFCHARSET_H_
-#define _FFCHARSET_H_
+#ifndef FFCHARSET_H_
+#define FFCHARSET_H_
#include <string>
#include <map>
diff --git a/decoder/ff_context.h b/decoder/ff_context.h
index 19198ec3..ed1aea2b 100644
--- a/decoder/ff_context.h
+++ b/decoder/ff_context.h
@@ -1,6 +1,5 @@
-
-#ifndef _FF_CONTEXT_H_
-#define _FF_CONTEXT_H_
+#ifndef FF_CONTEXT_H_
+#define FF_CONTEXT_H_
#include <vector>
#include <boost/xpressive/xpressive.hpp>
diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc
index a0e538d3..550ff69a 100644
--- a/decoder/ff_csplit.cc
+++ b/decoder/ff_csplit.cc
@@ -2,6 +2,8 @@
#include <set>
#include <cstring>
+#include <unordered_set>
+#include <unordered_map>
#include "klm/lm/model.hh"
@@ -14,12 +16,6 @@
#include "stringlib.h"
#include "tdict.h"
-#ifndef HAVE_OLD_CPP
-# include <unordered_set>
-#else
-# include <tr1/unordered_set>
-namespace std { using std::tr1::unordered_set; }
-#endif
using namespace std;
struct BasicCSplitFeaturesImpl {
diff --git a/decoder/ff_csplit.h b/decoder/ff_csplit.h
index 79bf2886..227f2a14 100644
--- a/decoder/ff_csplit.h
+++ b/decoder/ff_csplit.h
@@ -1,5 +1,5 @@
-#ifndef _FF_CSPLIT_H_
-#define _FF_CSPLIT_H_
+#ifndef FF_CSPLIT_H_
+#define FF_CSPLIT_H_
#include <boost/shared_ptr.hpp>
diff --git a/decoder/ff_external.h b/decoder/ff_external.h
index 3e2bee51..fd12a37c 100644
--- a/decoder/ff_external.h
+++ b/decoder/ff_external.h
@@ -1,5 +1,5 @@
-#ifndef _FFEXTERNAL_H_
-#define _FFEXTERNAL_H_
+#ifndef FFEXTERNAL_H_
+#define FFEXTERNAL_H_
#include "ff.h"
diff --git a/decoder/ff_factory.h b/decoder/ff_factory.h
index 1aa8e55f..ba9be9ac 100644
--- a/decoder/ff_factory.h
+++ b/decoder/ff_factory.h
@@ -1,5 +1,5 @@
-#ifndef _FF_FACTORY_H_
-#define _FF_FACTORY_H_
+#ifndef FF_FACTORY_H_
+#define FF_FACTORY_H_
//TODO: use http://www.boost.org/doc/libs/1_43_0/libs/functional/factory/doc/html/index.html ?
diff --git a/decoder/ff_klm.h b/decoder/ff_klm.h
index db4032f7..c8350623 100644
--- a/decoder/ff_klm.h
+++ b/decoder/ff_klm.h
@@ -1,5 +1,5 @@
-#ifndef _KLM_FF_H_
-#define _KLM_FF_H_
+#ifndef KLM_FF_H_
+#define KLM_FF_H_
#include <vector>
#include <string>
diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h
index 85e79704..83a2e186 100644
--- a/decoder/ff_lm.h
+++ b/decoder/ff_lm.h
@@ -1,5 +1,5 @@
-#ifndef _LM_FF_H_
-#define _LM_FF_H_
+#ifndef LM_FF_H_
+#define LM_FF_H_
#include <vector>
#include <string>
diff --git a/decoder/ff_ngrams.h b/decoder/ff_ngrams.h
index 4965d235..5dea9a7d 100644
--- a/decoder/ff_ngrams.h
+++ b/decoder/ff_ngrams.h
@@ -1,5 +1,5 @@
-#ifndef _NGRAMS_FF_H_
-#define _NGRAMS_FF_H_
+#ifndef NGRAMS_FF_H_
+#define NGRAMS_FF_H_
#include <vector>
#include <map>
diff --git a/decoder/ff_parse_match.h b/decoder/ff_parse_match.h
index 7820b418..188c406a 100644
--- a/decoder/ff_parse_match.h
+++ b/decoder/ff_parse_match.h
@@ -1,5 +1,5 @@
-#ifndef _FF_PARSE_MATCH_H_
-#define _FF_PARSE_MATCH_H_
+#ifndef FF_PARSE_MATCH_H_
+#define FF_PARSE_MATCH_H_
#include "ff.h"
#include "hg.h"
diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h
index f210dc65..5c4cf45e 100644
--- a/decoder/ff_rules.h
+++ b/decoder/ff_rules.h
@@ -1,5 +1,5 @@
-#ifndef _FF_RULES_H_
-#define _FF_RULES_H_
+#ifndef FF_RULES_H_
+#define FF_RULES_H_
#include <vector>
#include <map>
diff --git a/decoder/ff_ruleshape.h b/decoder/ff_ruleshape.h
index 488cfd84..66914f5d 100644
--- a/decoder/ff_ruleshape.h
+++ b/decoder/ff_ruleshape.h
@@ -1,5 +1,5 @@
-#ifndef _FF_RULESHAPE_H_
-#define _FF_RULESHAPE_H_
+#ifndef FF_RULESHAPE_H_
+#define FF_RULESHAPE_H_
#include <vector>
#include <map>
diff --git a/decoder/ff_soft_syntax.h b/decoder/ff_soft_syntax.h
index e71825d5..da51df7f 100644
--- a/decoder/ff_soft_syntax.h
+++ b/decoder/ff_soft_syntax.h
@@ -1,5 +1,5 @@
-#ifndef _FF_SOFT_SYNTAX_H_
-#define _FF_SOFT_SYNTAX_H_
+#ifndef FF_SOFT_SYNTAX_H_
+#define FF_SOFT_SYNTAX_H_
#include "ff.h"
#include "hg.h"
diff --git a/decoder/ff_soft_syntax_mindist.h b/decoder/ff_soft_syntax_mindist.h
index bf938b38..205eff4b 100644
--- a/decoder/ff_soft_syntax_mindist.h
+++ b/decoder/ff_soft_syntax_mindist.h
@@ -1,5 +1,5 @@
-#ifndef _FF_SOFT_SYNTAX_MINDIST_H_
-#define _FF_SOFT_SYNTAX_MINDIST_H_
+#ifndef FF_SOFT_SYNTAX_MINDIST_H_
+#define FF_SOFT_SYNTAX_MINDIST_H_
#include "ff.h"
#include "hg.h"
diff --git a/decoder/ff_source_path.h b/decoder/ff_source_path.h
index 03126412..fc309264 100644
--- a/decoder/ff_source_path.h
+++ b/decoder/ff_source_path.h
@@ -1,5 +1,5 @@
-#ifndef _FF_SOURCE_PATH_H_
-#define _FF_SOURCE_PATH_H_
+#ifndef FF_SOURCE_PATH_H_
+#define FF_SOURCE_PATH_H_
#include <vector>
#include <map>
diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc
index 6b183863..f6f673d2 100644
--- a/decoder/ff_source_syntax.cc
+++ b/decoder/ff_source_syntax.cc
@@ -2,12 +2,7 @@
#include <sstream>
#include <stack>
-#ifndef HAVE_OLD_CPP
-# include <unordered_set>
-#else
-# include <tr1/unordered_set>
-namespace std { using std::tr1::unordered_set; }
-#endif
+#include <unordered_set>
#include "sentence_metadata.h"
#include "array2d.h"
diff --git a/decoder/ff_source_syntax.h b/decoder/ff_source_syntax.h
index bdd638c1..6316e881 100644
--- a/decoder/ff_source_syntax.h
+++ b/decoder/ff_source_syntax.h
@@ -1,5 +1,5 @@
-#ifndef _FF_SOURCE_SYNTAX_H_
-#define _FF_SOURCE_SYNTAX_H_
+#ifndef FF_SOURCE_SYNTAX_H_
+#define FF_SOURCE_SYNTAX_H_
#include "ff.h"
#include "hg.h"
diff --git a/decoder/ff_source_syntax2.cc b/decoder/ff_source_syntax2.cc
index a97e31d8..48991920 100644
--- a/decoder/ff_source_syntax2.cc
+++ b/decoder/ff_source_syntax2.cc
@@ -3,6 +3,7 @@
#include <sstream>
#include <stack>
#include <string>
+#include <unordered_set>
#include "sentence_metadata.h"
#include "array2d.h"
diff --git a/decoder/ff_source_syntax2.h b/decoder/ff_source_syntax2.h
index f606c2bf..bbfa9eb6 100644
--- a/decoder/ff_source_syntax2.h
+++ b/decoder/ff_source_syntax2.h
@@ -1,5 +1,5 @@
-#ifndef _FF_SOURCE_SYNTAX2_H_
-#define _FF_SOURCE_SYNTAX2_H_
+#ifndef FF_SOURCE_SYNTAX2_H_
+#define FF_SOURCE_SYNTAX2_H_
#include "ff.h"
#include "hg.h"
diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h
index d2f5e84c..e2475491 100644
--- a/decoder/ff_spans.h
+++ b/decoder/ff_spans.h
@@ -1,5 +1,5 @@
-#ifndef _FF_SPANS_H_
-#define _FF_SPANS_H_
+#ifndef FF_SPANS_H_
+#define FF_SPANS_H_
#include <vector>
#include <map>
diff --git a/decoder/ff_tagger.h b/decoder/ff_tagger.h
index 46418b0c..0cb8c648 100644
--- a/decoder/ff_tagger.h
+++ b/decoder/ff_tagger.h
@@ -1,5 +1,5 @@
-#ifndef _FF_TAGGER_H_
-#define _FF_TAGGER_H_
+#ifndef FF_TAGGER_H_
+#define FF_TAGGER_H_
#include <map>
#include <boost/scoped_ptr.hpp>
diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h
index 0161f603..ec454621 100644
--- a/decoder/ff_wordalign.h
+++ b/decoder/ff_wordalign.h
@@ -1,5 +1,5 @@
-#ifndef _FF_WORD_ALIGN_H_
-#define _FF_WORD_ALIGN_H_
+#ifndef FF_WORD_ALIGN_H_
+#define FF_WORD_ALIGN_H_
#include "ff.h"
#include "array2d.h"
diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h
index e78cd2fb..94f5ff8a 100644
--- a/decoder/ff_wordset.h
+++ b/decoder/ff_wordset.h
@@ -1,5 +1,5 @@
-#ifndef _FF_WORDSET_H_
-#define _FF_WORDSET_H_
+#ifndef FF_WORDSET_H_
+#define FF_WORDSET_H_
#include "ff.h"
#include "tdict.h"
diff --git a/decoder/ffset.h b/decoder/ffset.h
index 28aef667..b7322ee2 100644
--- a/decoder/ffset.h
+++ b/decoder/ffset.h
@@ -1,5 +1,5 @@
-#ifndef _FFSET_H_
-#define _FFSET_H_
+#ifndef FFSET_H_
+#define FFSET_H_
#include <vector>
#include "value_array.h"
diff --git a/decoder/forest_writer.h b/decoder/forest_writer.h
index 819a8940..4d28de77 100644
--- a/decoder/forest_writer.h
+++ b/decoder/forest_writer.h
@@ -1,5 +1,5 @@
-#ifndef _FOREST_WRITER_H_
-#define _FOREST_WRITER_H_
+#ifndef FOREST_WRITER_H_
+#define FOREST_WRITER_H_
#include <string>
diff --git a/decoder/freqdict.h b/decoder/freqdict.h
index 4e03fadd..07d797e2 100644
--- a/decoder/freqdict.h
+++ b/decoder/freqdict.h
@@ -1,5 +1,5 @@
-#ifndef _FREQDICT_H_
-#define _FREQDICT_H_
+#ifndef FREQDICT_H_
+#define FREQDICT_H_
#include <iostream>
#include <map>
diff --git a/decoder/fst_translator.cc b/decoder/fst_translator.cc
index 4253b652..50e6adcc 100644
--- a/decoder/fst_translator.cc
+++ b/decoder/fst_translator.cc
@@ -95,6 +95,7 @@ bool FSTTranslator::TranslateImpl(const string& input,
const vector<double>& weights,
Hypergraph* minus_lm_forest) {
smeta->SetSourceLength(0); // don't know how to compute this
+ smeta->input_type_ = cdec::kFOREST;
return pimpl_->Translate(input, weights, minus_lm_forest);
}
diff --git a/decoder/hg.h b/decoder/hg.h
index 4ed27d87..256f650f 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -1,5 +1,5 @@
-#ifndef _HG_H_
-#define _HG_H_
+#ifndef HG_H_
+#define HG_H_
// define USE_INFO_EDGE 1 if you want lots of debug info shown with --show_derivations - otherwise it adds quite a bit of overhead if ffs have their logging enabled (e.g. ff_from_fsa)
#ifndef USE_INFO_EDGE
diff --git a/decoder/hg_intersect.cc b/decoder/hg_intersect.cc
index 02f5a401..b9381d02 100644
--- a/decoder/hg_intersect.cc
+++ b/decoder/hg_intersect.cc
@@ -88,7 +88,7 @@ namespace HG {
bool Intersect(const Lattice& target, Hypergraph* hg) {
// there are a number of faster algorithms available for restricted
// classes of hypergraph and/or target.
- if (hg->IsLinearChain() && target.IsSentence())
+ if (hg->IsLinearChain() && IsSentence(target))
return FastLinearIntersect(target, hg);
vector<bool> rem(hg->edges_.size(), false);
diff --git a/decoder/hg_intersect.h b/decoder/hg_intersect.h
index 29a5ea2a..19c1c177 100644
--- a/decoder/hg_intersect.h
+++ b/decoder/hg_intersect.h
@@ -1,5 +1,5 @@
-#ifndef _HG_INTERSECT_H_
-#define _HG_INTERSECT_H_
+#ifndef HG_INTERSECT_H_
+#define HG_INTERSECT_H_
#include "lattice.h"
diff --git a/decoder/hg_io.h b/decoder/hg_io.h
index 58af8132..5a2bd808 100644
--- a/decoder/hg_io.h
+++ b/decoder/hg_io.h
@@ -1,5 +1,5 @@
-#ifndef _HG_IO_H_
-#define _HG_IO_H_
+#ifndef HG_IO_H_
+#define HG_IO_H_
#include <iostream>
#include <string>
diff --git a/decoder/hg_remove_eps.h b/decoder/hg_remove_eps.h
index 82f06039..f67fe6e2 100644
--- a/decoder/hg_remove_eps.h
+++ b/decoder/hg_remove_eps.h
@@ -1,5 +1,5 @@
-#ifndef _HG_REMOVE_EPS_H_
-#define _HG_REMOVE_EPS_H_
+#ifndef HG_REMOVE_EPS_H_
+#define HG_REMOVE_EPS_H_
#include "wordid.h"
class Hypergraph;
diff --git a/decoder/hg_sampler.h b/decoder/hg_sampler.h
index 6ac39a20..4267b5ec 100644
--- a/decoder/hg_sampler.h
+++ b/decoder/hg_sampler.h
@@ -1,6 +1,5 @@
-#ifndef _HG_SAMPLER_H_
-#define _HG_SAMPLER_H_
-
+#ifndef HG_SAMPLER_H_
+#define HG_SAMPLER_H_
#include <vector>
#include <string>
diff --git a/decoder/hg_union.h b/decoder/hg_union.h
index 34624246..bb7e2d09 100644
--- a/decoder/hg_union.h
+++ b/decoder/hg_union.h
@@ -1,5 +1,5 @@
-#ifndef _HG_UNION_H_
-#define _HG_UNION_H_
+#ifndef HG_UNION_H_
+#define HG_UNION_H_
class Hypergraph;
namespace HG {
diff --git a/decoder/incremental.h b/decoder/incremental.h
index f791a626..46b4817b 100644
--- a/decoder/incremental.h
+++ b/decoder/incremental.h
@@ -1,5 +1,5 @@
-#ifndef _INCREMENTAL_H_
-#define _INCREMENTAL_H_
+#ifndef INCREMENTAL_H_
+#define INCREMENTAL_H_
#include "weights.h"
#include <vector>
diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h
index c0377fe8..d5bda63c 100644
--- a/decoder/inside_outside.h
+++ b/decoder/inside_outside.h
@@ -1,5 +1,5 @@
-#ifndef _INSIDE_OUTSIDE_H_
-#define _INSIDE_OUTSIDE_H_
+#ifndef INSIDE_OUTSIDE_H_
+#define INSIDE_OUTSIDE_H_
#include <vector>
#include <algorithm>
diff --git a/decoder/json_parse.h b/decoder/json_parse.h
index c3cba954..85e2eff1 100644
--- a/decoder/json_parse.h
+++ b/decoder/json_parse.h
@@ -1,5 +1,5 @@
-#ifndef _JSON_WRAPPER_H_
-#define _JSON_WRAPPER_H_
+#ifndef JSON_WRAPPER_H_
+#define JSON_WRAPPER_H_
#include <iostream>
#include <cassert>
diff --git a/decoder/kbest.h b/decoder/kbest.h
index c7194c7e..d6b3eb94 100644
--- a/decoder/kbest.h
+++ b/decoder/kbest.h
@@ -1,5 +1,5 @@
-#ifndef _HG_KBEST_H_
-#define _HG_KBEST_H_
+#ifndef HG_KBEST_H_
+#define HG_KBEST_H_
#include <vector>
#include <utility>
diff --git a/decoder/lattice.cc b/decoder/lattice.cc
index 89da3cd0..1f97048d 100644
--- a/decoder/lattice.cc
+++ b/decoder/lattice.cc
@@ -50,7 +50,6 @@ void LatticeTools::ConvertTextToLattice(const string& text, Lattice* pl) {
l.resize(ids.size());
for (int i = 0; i < l.size(); ++i)
l[i].push_back(LatticeArc(ids[i], 0.0, 1));
- l.is_sentence_ = true;
}
void LatticeTools::ConvertTextOrPLF(const string& text_or_plf, Lattice* pl) {
diff --git a/decoder/lattice.h b/decoder/lattice.h
index ad4ca50d..1258d3f5 100644
--- a/decoder/lattice.h
+++ b/decoder/lattice.h
@@ -1,5 +1,5 @@
-#ifndef __LATTICE_H_
-#define __LATTICE_H_
+#ifndef LATTICE_H_
+#define LATTICE_H_
#include <string>
#include <vector>
@@ -25,22 +25,24 @@ class Lattice : public std::vector<std::vector<LatticeArc> > {
friend void LatticeTools::ConvertTextOrPLF(const std::string& text_or_plf, Lattice* pl);
friend void LatticeTools::ConvertTextToLattice(const std::string& text, Lattice* pl);
public:
- Lattice() : is_sentence_(false) {}
+ Lattice() {}
explicit Lattice(size_t t, const std::vector<LatticeArc>& v = std::vector<LatticeArc>()) :
- std::vector<std::vector<LatticeArc> >(t, v),
- is_sentence_(false) {}
+ std::vector<std::vector<LatticeArc>>(t, v) {}
int Distance(int from, int to) const {
if (dist_.empty())
return (to - from);
return dist_(from, to);
}
- // TODO this should actually be computed based on the contents
- // of the lattice
- bool IsSentence() const { return is_sentence_; }
private:
void ComputeDistances();
Array2D<int> dist_;
- bool is_sentence_;
};
+inline bool IsSentence(const Lattice& in) {
+ bool res = true;
+ for (auto& alt : in)
+ if (alt.size() > 1) { res = false; break; }
+ return res;
+}
+
#endif
diff --git a/decoder/lexalign.cc b/decoder/lexalign.cc
index 11f20de7..dd529311 100644
--- a/decoder/lexalign.cc
+++ b/decoder/lexalign.cc
@@ -114,10 +114,9 @@ bool LexicalAlign::TranslateImpl(const string& input,
Hypergraph* forest) {
Lattice& lattice = smeta->src_lattice_;
LatticeTools::ConvertTextOrPLF(input, &lattice);
- if (!lattice.IsSentence()) {
- // lexical models make independence assumptions
- // that don't work with lattices or conf nets
- cerr << "LexicalTrans: cannot deal with lattice source input!\n";
+ smeta->ComputeInputLatticeType();
+ if (smeta->GetInputType() != cdec::kSEQUENCE) {
+ cerr << "LexicalTrans: cannot deal with non-sequence input!";
abort();
}
smeta->SetSourceLength(lattice.size());
diff --git a/decoder/lexalign.h b/decoder/lexalign.h
index 7ba4fe64..6415f4f9 100644
--- a/decoder/lexalign.h
+++ b/decoder/lexalign.h
@@ -1,5 +1,5 @@
-#ifndef _LEXALIGN_H_
-#define _LEXALIGN_H_
+#ifndef LEXALIGN_H_
+#define LEXALIGN_H_
#include "translator.h"
#include "lattice.h"
diff --git a/decoder/lextrans.cc b/decoder/lextrans.cc
index 74a18c3f..d13a891a 100644
--- a/decoder/lextrans.cc
+++ b/decoder/lextrans.cc
@@ -271,10 +271,9 @@ bool LexicalTrans::TranslateImpl(const string& input,
Hypergraph* forest) {
Lattice& lattice = smeta->src_lattice_;
LatticeTools::ConvertTextOrPLF(input, &lattice);
- if (!lattice.IsSentence()) {
- // lexical models make independence assumptions
- // that don't work with lattices or conf nets
- cerr << "LexicalTrans: cannot deal with lattice source input!\n";
+ smeta->ComputeInputLatticeType();
+ if (smeta->GetInputType() != cdec::kSEQUENCE) {
+ cerr << "LexicalTrans: cannot deal with non-sequence inputs\n";
abort();
}
smeta->SetSourceLength(lattice.size());
diff --git a/decoder/lextrans.h b/decoder/lextrans.h
index 2d51e7c0..a23a4e0d 100644
--- a/decoder/lextrans.h
+++ b/decoder/lextrans.h
@@ -1,5 +1,5 @@
-#ifndef _LEXTrans_H_
-#define _LEXTrans_H_
+#ifndef LEXTrans_H_
+#define LEXTrans_H_
#include "translator.h"
#include "lattice.h"
diff --git a/decoder/node_state_hash.h b/decoder/node_state_hash.h
index 9fc01a09..f380fcb1 100644
--- a/decoder/node_state_hash.h
+++ b/decoder/node_state_hash.h
@@ -1,5 +1,5 @@
-#ifndef _NODE_STATE_HASH_
-#define _NODE_STATE_HASH_
+#ifndef NODE_STATE_HASH_
+#define NODE_STATE_HASH_
#include <cassert>
#include <cstring>
diff --git a/decoder/phrasebased_translator.cc b/decoder/phrasebased_translator.cc
index 8048248e..8415353a 100644
--- a/decoder/phrasebased_translator.cc
+++ b/decoder/phrasebased_translator.cc
@@ -114,6 +114,7 @@ struct PhraseBasedTranslatorImpl {
Lattice lattice;
LatticeTools::ConvertTextOrPLF(input, &lattice);
smeta->SetSourceLength(lattice.size());
+ smeta->ComputeInputLatticeType();
size_t est_nodes = lattice.size() * lattice.size() * (1 << max_distortion);
minus_lm_forest->ReserveNodes(est_nodes, est_nodes * 100);
if (add_pass_through_rules) {
diff --git a/decoder/phrasebased_translator.h b/decoder/phrasebased_translator.h
index e5e3f8a2..10790d0d 100644
--- a/decoder/phrasebased_translator.h
+++ b/decoder/phrasebased_translator.h
@@ -1,5 +1,5 @@
-#ifndef _PHRASEBASED_TRANSLATOR_H_
-#define _PHRASEBASED_TRANSLATOR_H_
+#ifndef PHRASEBASED_TRANSLATOR_H_
+#define PHRASEBASED_TRANSLATOR_H_
#include "translator.h"
diff --git a/decoder/phrasetable_fst.h b/decoder/phrasetable_fst.h
index 477de1f7..966bb14d 100644
--- a/decoder/phrasetable_fst.h
+++ b/decoder/phrasetable_fst.h
@@ -1,5 +1,5 @@
-#ifndef _PHRASETABLE_FST_H_
-#define _PHRASETABLE_FST_H_
+#ifndef PHRASETABLE_FST_H_
+#define PHRASETABLE_FST_H_
#include <vector>
#include <string>
diff --git a/decoder/rescore_translator.cc b/decoder/rescore_translator.cc
index 10192f7a..18c83c56 100644
--- a/decoder/rescore_translator.cc
+++ b/decoder/rescore_translator.cc
@@ -53,6 +53,7 @@ bool RescoreTranslator::TranslateImpl(const string& input,
const vector<double>& weights,
Hypergraph* minus_lm_forest) {
smeta->SetSourceLength(0); // don't know how to compute this
+ smeta->input_type_ = cdec::kFOREST;
return pimpl_->Translate(input, weights, minus_lm_forest);
}
diff --git a/decoder/rule_lexer.h b/decoder/rule_lexer.h
index e15c056d..5267f9ca 100644
--- a/decoder/rule_lexer.h
+++ b/decoder/rule_lexer.h
@@ -1,5 +1,5 @@
-#ifndef _RULE_LEXER_H_
-#define _RULE_LEXER_H_
+#ifndef RULE_LEXER_H_
+#define RULE_LEXER_H_
#include <iostream>
#include <string>
diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc
index c3cfcaad..538f82ec 100644
--- a/decoder/scfg_translator.cc
+++ b/decoder/scfg_translator.cc
@@ -1,5 +1,6 @@
#include <algorithm>
#include <vector>
+#include <unordered_set>
#include <boost/foreach.hpp>
#include <boost/functional/hash.hpp>
#include "fast_lexical_cast.hpp"
@@ -194,6 +195,7 @@ struct SCFGTranslatorImpl {
Lattice& lattice = smeta->src_lattice_;
LatticeTools::ConvertTextOrPLF(input, &lattice);
smeta->SetSourceLength(lattice.size());
+ smeta->ComputeInputLatticeType();
if (add_pass_through_rules){
if (!SILENT) cerr << "Adding pass through grammar" << endl;
PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_, num_pt_features);
diff --git a/decoder/sentence_metadata.h b/decoder/sentence_metadata.h
index f2a779f4..e13c2ca5 100644
--- a/decoder/sentence_metadata.h
+++ b/decoder/sentence_metadata.h
@@ -1,14 +1,20 @@
-#ifndef _SENTENCE_METADATA_H_
-#define _SENTENCE_METADATA_H_
+#ifndef SENTENCE_METADATA_H_
+#define SENTENCE_METADATA_H_
#include <string>
#include <map>
#include <cassert>
#include "lattice.h"
+#include "tree_fragment.h"
struct DocScorer; // deprecated, will be removed
struct Score; // deprecated, will be removed
+namespace cdec {
+enum InputType { kSEQUENCE, kTREE, kLATTICE, kFOREST, kUNKNOWN };
+class TreeFragment;
+}
+
class SentenceMetadata {
public:
friend class DecoderImpl;
@@ -17,7 +23,17 @@ class SentenceMetadata {
src_len_(-1),
has_reference_(ref.size() > 0),
trg_len_(ref.size()),
- ref_(has_reference_ ? &ref : NULL) {}
+ ref_(has_reference_ ? &ref : NULL),
+ input_type_(cdec::kUNKNOWN) {}
+
+ // helper function for lattice inputs
+ void ComputeInputLatticeType() {
+ input_type_ = cdec::kSEQUENCE;
+ for (auto& alt : src_lattice_) {
+ if (alt.size() > 1) { input_type_ = cdec::kLATTICE; break; }
+ }
+ }
+ cdec::InputType GetInputType() const { return input_type_; }
int GetSentenceId() const { return sent_id_; }
@@ -25,6 +41,8 @@ class SentenceMetadata {
// it has parsed the source
void SetSourceLength(int sl) { src_len_ = sl; }
+ const cdec::TreeFragment& GetSourceTree() const { return src_tree_; }
+
// this should be called if a separate model needs to
// specify how long the target sentence should be
void SetTargetLength(int tl) {
@@ -64,12 +82,15 @@ class SentenceMetadata {
const Score* app_score;
public:
Lattice src_lattice_; // this will only be set if inputs are finite state!
+ cdec::TreeFragment src_tree_; // this will be set only if inputs are trees
private:
// you need to be very careful when depending on these values
// they will only be set during training / alignment contexts
const bool has_reference_;
int trg_len_;
const Lattice* const ref_;
+ public:
+ cdec::InputType input_type_;
};
#endif
diff --git a/decoder/tagger.cc b/decoder/tagger.cc
index 30fb055f..500d2061 100644
--- a/decoder/tagger.cc
+++ b/decoder/tagger.cc
@@ -100,6 +100,8 @@ bool Tagger::TranslateImpl(const string& input,
Lattice& lattice = smeta->src_lattice_;
LatticeTools::ConvertTextToLattice(input, &lattice);
smeta->SetSourceLength(lattice.size());
+ smeta->ComputeInputLatticeType();
+ assert(smeta->GetInputType() == cdec::kSEQUENCE);
vector<WordID> sequence(lattice.size());
for (int i = 0; i < lattice.size(); ++i) {
assert(lattice[i].size() == 1);
diff --git a/decoder/tagger.h b/decoder/tagger.h
index 9ac820d9..51659d5b 100644
--- a/decoder/tagger.h
+++ b/decoder/tagger.h
@@ -1,5 +1,5 @@
-#ifndef _TAGGER_H_
-#define _TAGGER_H_
+#ifndef TAGGER_H_
+#define TAGGER_H_
#include "translator.h"
diff --git a/decoder/translator.h b/decoder/translator.h
index ba218a0b..096cf191 100644
--- a/decoder/translator.h
+++ b/decoder/translator.h
@@ -1,5 +1,5 @@
-#ifndef _TRANSLATOR_H_
-#define _TRANSLATOR_H_
+#ifndef TRANSLATOR_H_
+#define TRANSLATOR_H_
#include <string>
#include <vector>
diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc
index adc8dc89..08dae64c 100644
--- a/decoder/tree2string_translator.cc
+++ b/decoder/tree2string_translator.cc
@@ -2,6 +2,7 @@
#include <vector>
#include <queue>
#include <map>
+#include <unordered_map>
#include <unordered_set>
#include <boost/shared_ptr.hpp>
#include <boost/functional/hash.hpp>
@@ -31,12 +32,12 @@ static void ReadTree2StringGrammar(istream* in, Tree2StringGrammarNode* root, bo
++lc;
if (line.size() == 0 || line[0] == '#') continue;
std::vector<StringPiece> fields = TokenizeMultisep(line, " ||| ");
- if (has_multiple_states && fields.size() != 4) {
- cerr << "Expected 4 fields in rule file but line " << lc << " is:\n" << line << endl;
+ if (has_multiple_states && fields.size() < 4) {
+ cerr << "Expected at least 4 fields in rule file but line " << lc << " is:\n" << line << endl;
abort();
}
- if (!has_multiple_states && fields.size() != 3) {
- cerr << "Expected 3 fields in rule file but line " << lc << " is:\n" << line << endl;
+ if (!has_multiple_states && fields.size() < 3) {
+ cerr << "Expected at least 3 fields in rule file but line " << lc << " is:\n" << line << endl;
abort();
}
@@ -72,6 +73,7 @@ static void ReadTree2StringGrammar(istream* in, Tree2StringGrammarNode* root, bo
cerr << "Not implemented...\n"; abort(); // TODO read in states
} else {
os << " ||| " << fields[1] << " ||| " << fields[2];
+ if (fields.size() > 3) os << " ||| " << fields[3];
rule.reset(new TRule(os.str()));
}
cur->rules.push_back(rule);
@@ -286,6 +288,8 @@ struct Tree2StringTranslatorImpl {
const vector<double>& weights,
Hypergraph* minus_lm_forest) {
cdec::TreeFragment input_tree(input, false);
+ smeta->src_tree_ = input_tree;
+ smeta->input_type_ = cdec::kTREE;
if (add_pass_through_rules) CreatePassThroughRules(input_tree);
Hypergraph hg;
hg.ReserveNodes(input_tree.nodes.size());
diff --git a/decoder/tree_fragment.cc b/decoder/tree_fragment.cc
index 42f7793a..5f717c5b 100644
--- a/decoder/tree_fragment.cc
+++ b/decoder/tree_fragment.cc
@@ -64,6 +64,13 @@ int TreeFragment::SetupSpansRec(unsigned cur, int left) {
return right;
}
+vector<int> TreeFragment::Terminals() const {
+ vector<int> terms;
+ for (auto& x : *this)
+ if (IsTerminal(x)) terms.push_back(x);
+ return terms;
+}
+
// cp is the character index in the tree
// np keeps track of the nodes (nonterminals) that have been built
// symp keeps track of the terminal symbols that have been built
diff --git a/decoder/tree_fragment.h b/decoder/tree_fragment.h
index 6b4842ee..e19b79fb 100644
--- a/decoder/tree_fragment.h
+++ b/decoder/tree_fragment.h
@@ -72,6 +72,8 @@ class TreeFragment {
BreadthFirstIterator bfs_begin(unsigned node_idx) const;
BreadthFirstIterator bfs_end() const;
+ std::vector<int> Terminals() const;
+
private:
// cp is the character index in the tree
// np keeps track of the nodes (nonterminals) that have been built
diff --git a/decoder/trule.h b/decoder/trule.h
index 243b0da9..adef7cc7 100644
--- a/decoder/trule.h
+++ b/decoder/trule.h
@@ -1,5 +1,5 @@
-#ifndef _RULE_H_
-#define _RULE_H_
+#ifndef TRULE_H_
+#define TRULE_H_
#include <algorithm>
#include <vector>
diff --git a/decoder/viterbi.h b/decoder/viterbi.h
index a8a0ea7f..20ee73cc 100644
--- a/decoder/viterbi.h
+++ b/decoder/viterbi.h
@@ -1,5 +1,5 @@
-#ifndef _VITERBI_H_
-#define _VITERBI_H_
+#ifndef VITERBI_H_
+#define VITERBI_H_
#include <vector>
#include "prob.h"