diff options
author | Patrick Simianer <p@simianer.de> | 2014-10-13 19:03:48 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-10-13 19:03:48 +0100 |
commit | cb9fb7088dde35881516c088db402abe747d49fa (patch) | |
tree | a91e4935a7941f1b261f76d88ab41fa3078a1891 /decoder | |
parent | 0a00e57e921c8eca8e02364db7d2e6607bfdcebc (diff) | |
parent | b1ed81ef3216b212295afa76c5d20a56fb647204 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'decoder')
73 files changed, 184 insertions, 155 deletions
diff --git a/decoder/aligner.cc b/decoder/aligner.cc index 232e022a..fd648370 100644 --- a/decoder/aligner.cc +++ b/decoder/aligner.cc @@ -198,13 +198,13 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice, } const Hypergraph* g = &in_g; HypergraphP new_hg; - if (!src_lattice.IsSentence() || - !trg_lattice.IsSentence()) { + if (!IsSentence(src_lattice) || + !IsSentence(trg_lattice)) { if (map_instead_of_viterbi) { cerr << " Lattice alignment: using Viterbi instead of MAP alignment\n"; } map_instead_of_viterbi = false; - fix_up_src_spans = !src_lattice.IsSentence(); + fix_up_src_spans = !IsSentence(src_lattice); } KBest::KBestDerivations<vector<Hypergraph::Edge const*>, ViterbiPathTraversal> kbest(in_g, k_best); diff --git a/decoder/apply_models.h b/decoder/apply_models.h index 19a4c7be..f03c973a 100644 --- a/decoder/apply_models.h +++ b/decoder/apply_models.h @@ -1,5 +1,5 @@ -#ifndef _APPLY_MODELS_H_ -#define _APPLY_MODELS_H_ +#ifndef APPLY_MODELS_H_ +#define APPLY_MODELS_H_ #include <iostream> diff --git a/decoder/bottom_up_parser.h b/decoder/bottom_up_parser.h index 546bfb54..628bb96d 100644 --- a/decoder/bottom_up_parser.h +++ b/decoder/bottom_up_parser.h @@ -1,5 +1,5 @@ -#ifndef _BOTTOM_UP_PARSER_H_ -#define _BOTTOM_UP_PARSER_H_ +#ifndef BOTTOM_UP_PARSER_H_ +#define BOTTOM_UP_PARSER_H_ #include <vector> #include <string> diff --git a/decoder/csplit.cc b/decoder/csplit.cc index 4a723822..7ee4092e 100644 --- a/decoder/csplit.cc +++ b/decoder/csplit.cc @@ -151,6 +151,7 @@ bool CompoundSplit::TranslateImpl(const string& input, smeta->SetSourceLength(in.size()); // TODO do utf8 or somethign for (int i = 0; i < in.size(); ++i) smeta->src_lattice_.push_back(vector<LatticeArc>(1, LatticeArc(TD::Convert(in[i]), 0.0, 1))); + smeta->ComputeInputLatticeType(); pimpl_->BuildTrellis(in, forest); forest->Reweight(weights); return true; diff --git a/decoder/csplit.h b/decoder/csplit.h index 82ed23fc..83d457b8 100644 --- a/decoder/csplit.h +++ b/decoder/csplit.h @@ -1,5 +1,5 @@ -#ifndef _CSPLIT_H_ -#define _CSPLIT_H_ +#ifndef CSPLIT_H_ +#define CSPLIT_H_ #include "translator.h" #include "lattice.h" diff --git a/decoder/decoder.cc b/decoder/decoder.cc index c384c33f..9e8d692a 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -86,7 +86,7 @@ struct ELengthWeightFunction { } }; inline void ShowBanner() { - cerr << "cdec (c) 2009--2014 by Chris Dyer\n"; + cerr << "cdec (c) 2009--2014 by Chris Dyer" << endl; } inline string str(char const* name,po::variables_map const& conf) { diff --git a/decoder/decoder.h b/decoder/decoder.h index 8039a42b..a545206b 100644 --- a/decoder/decoder.h +++ b/decoder/decoder.h @@ -1,5 +1,5 @@ -#ifndef _DECODER_H_ -#define _DECODER_H_ +#ifndef DECODER_H_ +#define DECODER_H_ #include <iostream> #include <string> diff --git a/decoder/earley_composer.h b/decoder/earley_composer.h index 9f786bf6..31602f67 100644 --- a/decoder/earley_composer.h +++ b/decoder/earley_composer.h @@ -1,5 +1,5 @@ -#ifndef _EARLEY_COMPOSER_H_ -#define _EARLEY_COMPOSER_H_ +#ifndef EARLEY_COMPOSER_H_ +#define EARLEY_COMPOSER_H_ #include <iostream> diff --git a/decoder/factored_lexicon_helper.h b/decoder/factored_lexicon_helper.h index 7fedc517..460bdebb 100644 --- a/decoder/factored_lexicon_helper.h +++ b/decoder/factored_lexicon_helper.h @@ -1,5 +1,5 @@ -#ifndef _FACTORED_LEXICON_HELPER_ -#define _FACTORED_LEXICON_HELPER_ +#ifndef FACTORED_LEXICON_HELPER_ +#define FACTORED_LEXICON_HELPER_ #include <cassert> #include <vector> diff --git a/decoder/ff.h b/decoder/ff.h index 3280592e..eed1e3fb 100644 --- a/decoder/ff.h +++ b/decoder/ff.h @@ -1,5 +1,5 @@ -#ifndef _FF_H_ -#define _FF_H_ +#ifndef FF_H_ +#define FF_H_ #include <string> #include <vector> diff --git a/decoder/ff_basic.cc b/decoder/ff_basic.cc index f9404d24..f960418a 100644 --- a/decoder/ff_basic.cc +++ b/decoder/ff_basic.cc @@ -49,9 +49,7 @@ void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, features->set_value(fid_, edge.rule_->FWords() * value_); } - -ArityPenalty::ArityPenalty(const std::string& param) : - value_(-1.0 / log(10)) { +ArityPenalty::ArityPenalty(const std::string& param) { string fname = "Arity_"; unsigned MAX=DEFAULT_MAX_ARITY; using namespace boost; @@ -61,7 +59,8 @@ ArityPenalty::ArityPenalty(const std::string& param) : WordID fid=FD::Convert(fname+lexical_cast<string>(i)); fids_.push_back(fid); } - while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen. doesn't change anything + // pretty up features vector in case FD was frozen. doesn't change anything + while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); } void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, @@ -75,6 +74,6 @@ void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, (void) state; (void) estimated_features; unsigned a=edge.Arity(); - features->set_value(a<fids_.size()?fids_[a]:0, value_); + if (a < fids_.size()) features->set_value(fids_[a], 1.0); } diff --git a/decoder/ff_basic.h b/decoder/ff_basic.h index 901c0110..c63daf0f 100644 --- a/decoder/ff_basic.h +++ b/decoder/ff_basic.h @@ -1,5 +1,5 @@ -#ifndef _FF_BASIC_H_ -#define _FF_BASIC_H_ +#ifndef FF_BASIC_H_ +#define FF_BASIC_H_ #include "ff.h" @@ -41,7 +41,7 @@ class SourceWordPenalty : public FeatureFunction { const double value_; }; -#define DEFAULT_MAX_ARITY 9 +#define DEFAULT_MAX_ARITY 50 #define DEFAULT_MAX_ARITY_STRINGIZE(x) #x #define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x) #define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY) @@ -62,7 +62,6 @@ class ArityPenalty : public FeatureFunction { void* context) const; private: std::vector<WordID> fids_; - const double value_; }; #endif diff --git a/decoder/ff_bleu.h b/decoder/ff_bleu.h index 344dc788..8ca2c095 100644 --- a/decoder/ff_bleu.h +++ b/decoder/ff_bleu.h @@ -1,5 +1,5 @@ -#ifndef _BLEU_FF_H_ -#define _BLEU_FF_H_ +#ifndef BLEU_FF_H_ +#define BLEU_FF_H_ #include <vector> #include <string> diff --git a/decoder/ff_charset.h b/decoder/ff_charset.h index 267ef65d..e22ece2b 100644 --- a/decoder/ff_charset.h +++ b/decoder/ff_charset.h @@ -1,5 +1,5 @@ -#ifndef _FFCHARSET_H_ -#define _FFCHARSET_H_ +#ifndef FFCHARSET_H_ +#define FFCHARSET_H_ #include <string> #include <map> diff --git a/decoder/ff_context.h b/decoder/ff_context.h index 19198ec3..ed1aea2b 100644 --- a/decoder/ff_context.h +++ b/decoder/ff_context.h @@ -1,6 +1,5 @@ - -#ifndef _FF_CONTEXT_H_ -#define _FF_CONTEXT_H_ +#ifndef FF_CONTEXT_H_ +#define FF_CONTEXT_H_ #include <vector> #include <boost/xpressive/xpressive.hpp> diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc index a0e538d3..550ff69a 100644 --- a/decoder/ff_csplit.cc +++ b/decoder/ff_csplit.cc @@ -2,6 +2,8 @@ #include <set> #include <cstring> +#include <unordered_set> +#include <unordered_map> #include "klm/lm/model.hh" @@ -14,12 +16,6 @@ #include "stringlib.h" #include "tdict.h" -#ifndef HAVE_OLD_CPP -# include <unordered_set> -#else -# include <tr1/unordered_set> -namespace std { using std::tr1::unordered_set; } -#endif using namespace std; struct BasicCSplitFeaturesImpl { diff --git a/decoder/ff_csplit.h b/decoder/ff_csplit.h index 79bf2886..227f2a14 100644 --- a/decoder/ff_csplit.h +++ b/decoder/ff_csplit.h @@ -1,5 +1,5 @@ -#ifndef _FF_CSPLIT_H_ -#define _FF_CSPLIT_H_ +#ifndef FF_CSPLIT_H_ +#define FF_CSPLIT_H_ #include <boost/shared_ptr.hpp> diff --git a/decoder/ff_external.h b/decoder/ff_external.h index 3e2bee51..fd12a37c 100644 --- a/decoder/ff_external.h +++ b/decoder/ff_external.h @@ -1,5 +1,5 @@ -#ifndef _FFEXTERNAL_H_ -#define _FFEXTERNAL_H_ +#ifndef FFEXTERNAL_H_ +#define FFEXTERNAL_H_ #include "ff.h" diff --git a/decoder/ff_factory.h b/decoder/ff_factory.h index 1aa8e55f..ba9be9ac 100644 --- a/decoder/ff_factory.h +++ b/decoder/ff_factory.h @@ -1,5 +1,5 @@ -#ifndef _FF_FACTORY_H_ -#define _FF_FACTORY_H_ +#ifndef FF_FACTORY_H_ +#define FF_FACTORY_H_ //TODO: use http://www.boost.org/doc/libs/1_43_0/libs/functional/factory/doc/html/index.html ? diff --git a/decoder/ff_klm.h b/decoder/ff_klm.h index db4032f7..c8350623 100644 --- a/decoder/ff_klm.h +++ b/decoder/ff_klm.h @@ -1,5 +1,5 @@ -#ifndef _KLM_FF_H_ -#define _KLM_FF_H_ +#ifndef KLM_FF_H_ +#define KLM_FF_H_ #include <vector> #include <string> diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h index 85e79704..83a2e186 100644 --- a/decoder/ff_lm.h +++ b/decoder/ff_lm.h @@ -1,5 +1,5 @@ -#ifndef _LM_FF_H_ -#define _LM_FF_H_ +#ifndef LM_FF_H_ +#define LM_FF_H_ #include <vector> #include <string> diff --git a/decoder/ff_ngrams.h b/decoder/ff_ngrams.h index 4965d235..5dea9a7d 100644 --- a/decoder/ff_ngrams.h +++ b/decoder/ff_ngrams.h @@ -1,5 +1,5 @@ -#ifndef _NGRAMS_FF_H_ -#define _NGRAMS_FF_H_ +#ifndef NGRAMS_FF_H_ +#define NGRAMS_FF_H_ #include <vector> #include <map> diff --git a/decoder/ff_parse_match.h b/decoder/ff_parse_match.h index 7820b418..188c406a 100644 --- a/decoder/ff_parse_match.h +++ b/decoder/ff_parse_match.h @@ -1,5 +1,5 @@ -#ifndef _FF_PARSE_MATCH_H_ -#define _FF_PARSE_MATCH_H_ +#ifndef FF_PARSE_MATCH_H_ +#define FF_PARSE_MATCH_H_ #include "ff.h" #include "hg.h" diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index f210dc65..5c4cf45e 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -1,5 +1,5 @@ -#ifndef _FF_RULES_H_ -#define _FF_RULES_H_ +#ifndef FF_RULES_H_ +#define FF_RULES_H_ #include <vector> #include <map> diff --git a/decoder/ff_ruleshape.h b/decoder/ff_ruleshape.h index 488cfd84..66914f5d 100644 --- a/decoder/ff_ruleshape.h +++ b/decoder/ff_ruleshape.h @@ -1,5 +1,5 @@ -#ifndef _FF_RULESHAPE_H_ -#define _FF_RULESHAPE_H_ +#ifndef FF_RULESHAPE_H_ +#define FF_RULESHAPE_H_ #include <vector> #include <map> diff --git a/decoder/ff_soft_syntax.h b/decoder/ff_soft_syntax.h index e71825d5..da51df7f 100644 --- a/decoder/ff_soft_syntax.h +++ b/decoder/ff_soft_syntax.h @@ -1,5 +1,5 @@ -#ifndef _FF_SOFT_SYNTAX_H_ -#define _FF_SOFT_SYNTAX_H_ +#ifndef FF_SOFT_SYNTAX_H_ +#define FF_SOFT_SYNTAX_H_ #include "ff.h" #include "hg.h" diff --git a/decoder/ff_soft_syntax_mindist.h b/decoder/ff_soft_syntax_mindist.h index bf938b38..205eff4b 100644 --- a/decoder/ff_soft_syntax_mindist.h +++ b/decoder/ff_soft_syntax_mindist.h @@ -1,5 +1,5 @@ -#ifndef _FF_SOFT_SYNTAX_MINDIST_H_ -#define _FF_SOFT_SYNTAX_MINDIST_H_ +#ifndef FF_SOFT_SYNTAX_MINDIST_H_ +#define FF_SOFT_SYNTAX_MINDIST_H_ #include "ff.h" #include "hg.h" diff --git a/decoder/ff_source_path.h b/decoder/ff_source_path.h index 03126412..fc309264 100644 --- a/decoder/ff_source_path.h +++ b/decoder/ff_source_path.h @@ -1,5 +1,5 @@ -#ifndef _FF_SOURCE_PATH_H_ -#define _FF_SOURCE_PATH_H_ +#ifndef FF_SOURCE_PATH_H_ +#define FF_SOURCE_PATH_H_ #include <vector> #include <map> diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc index 6b183863..f6f673d2 100644 --- a/decoder/ff_source_syntax.cc +++ b/decoder/ff_source_syntax.cc @@ -2,12 +2,7 @@ #include <sstream> #include <stack> -#ifndef HAVE_OLD_CPP -# include <unordered_set> -#else -# include <tr1/unordered_set> -namespace std { using std::tr1::unordered_set; } -#endif +#include <unordered_set> #include "sentence_metadata.h" #include "array2d.h" diff --git a/decoder/ff_source_syntax.h b/decoder/ff_source_syntax.h index bdd638c1..6316e881 100644 --- a/decoder/ff_source_syntax.h +++ b/decoder/ff_source_syntax.h @@ -1,5 +1,5 @@ -#ifndef _FF_SOURCE_SYNTAX_H_ -#define _FF_SOURCE_SYNTAX_H_ +#ifndef FF_SOURCE_SYNTAX_H_ +#define FF_SOURCE_SYNTAX_H_ #include "ff.h" #include "hg.h" diff --git a/decoder/ff_source_syntax2.cc b/decoder/ff_source_syntax2.cc index a97e31d8..48991920 100644 --- a/decoder/ff_source_syntax2.cc +++ b/decoder/ff_source_syntax2.cc @@ -3,6 +3,7 @@ #include <sstream> #include <stack> #include <string> +#include <unordered_set> #include "sentence_metadata.h" #include "array2d.h" diff --git a/decoder/ff_source_syntax2.h b/decoder/ff_source_syntax2.h index f606c2bf..bbfa9eb6 100644 --- a/decoder/ff_source_syntax2.h +++ b/decoder/ff_source_syntax2.h @@ -1,5 +1,5 @@ -#ifndef _FF_SOURCE_SYNTAX2_H_ -#define _FF_SOURCE_SYNTAX2_H_ +#ifndef FF_SOURCE_SYNTAX2_H_ +#define FF_SOURCE_SYNTAX2_H_ #include "ff.h" #include "hg.h" diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h index d2f5e84c..e2475491 100644 --- a/decoder/ff_spans.h +++ b/decoder/ff_spans.h @@ -1,5 +1,5 @@ -#ifndef _FF_SPANS_H_ -#define _FF_SPANS_H_ +#ifndef FF_SPANS_H_ +#define FF_SPANS_H_ #include <vector> #include <map> diff --git a/decoder/ff_tagger.h b/decoder/ff_tagger.h index 46418b0c..0cb8c648 100644 --- a/decoder/ff_tagger.h +++ b/decoder/ff_tagger.h @@ -1,5 +1,5 @@ -#ifndef _FF_TAGGER_H_ -#define _FF_TAGGER_H_ +#ifndef FF_TAGGER_H_ +#define FF_TAGGER_H_ #include <map> #include <boost/scoped_ptr.hpp> diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index 0161f603..ec454621 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -1,5 +1,5 @@ -#ifndef _FF_WORD_ALIGN_H_ -#define _FF_WORD_ALIGN_H_ +#ifndef FF_WORD_ALIGN_H_ +#define FF_WORD_ALIGN_H_ #include "ff.h" #include "array2d.h" diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h index e78cd2fb..94f5ff8a 100644 --- a/decoder/ff_wordset.h +++ b/decoder/ff_wordset.h @@ -1,5 +1,5 @@ -#ifndef _FF_WORDSET_H_ -#define _FF_WORDSET_H_ +#ifndef FF_WORDSET_H_ +#define FF_WORDSET_H_ #include "ff.h" #include "tdict.h" diff --git a/decoder/ffset.h b/decoder/ffset.h index 28aef667..b7322ee2 100644 --- a/decoder/ffset.h +++ b/decoder/ffset.h @@ -1,5 +1,5 @@ -#ifndef _FFSET_H_ -#define _FFSET_H_ +#ifndef FFSET_H_ +#define FFSET_H_ #include <vector> #include "value_array.h" diff --git a/decoder/forest_writer.h b/decoder/forest_writer.h index 819a8940..4d28de77 100644 --- a/decoder/forest_writer.h +++ b/decoder/forest_writer.h @@ -1,5 +1,5 @@ -#ifndef _FOREST_WRITER_H_ -#define _FOREST_WRITER_H_ +#ifndef FOREST_WRITER_H_ +#define FOREST_WRITER_H_ #include <string> diff --git a/decoder/freqdict.h b/decoder/freqdict.h index 4e03fadd..07d797e2 100644 --- a/decoder/freqdict.h +++ b/decoder/freqdict.h @@ -1,5 +1,5 @@ -#ifndef _FREQDICT_H_ -#define _FREQDICT_H_ +#ifndef FREQDICT_H_ +#define FREQDICT_H_ #include <iostream> #include <map> diff --git a/decoder/fst_translator.cc b/decoder/fst_translator.cc index 4253b652..50e6adcc 100644 --- a/decoder/fst_translator.cc +++ b/decoder/fst_translator.cc @@ -95,6 +95,7 @@ bool FSTTranslator::TranslateImpl(const string& input, const vector<double>& weights, Hypergraph* minus_lm_forest) { smeta->SetSourceLength(0); // don't know how to compute this + smeta->input_type_ = cdec::kFOREST; return pimpl_->Translate(input, weights, minus_lm_forest); } diff --git a/decoder/hg.h b/decoder/hg.h index 4ed27d87..256f650f 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -1,5 +1,5 @@ -#ifndef _HG_H_ -#define _HG_H_ +#ifndef HG_H_ +#define HG_H_ // define USE_INFO_EDGE 1 if you want lots of debug info shown with --show_derivations - otherwise it adds quite a bit of overhead if ffs have their logging enabled (e.g. ff_from_fsa) #ifndef USE_INFO_EDGE diff --git a/decoder/hg_intersect.cc b/decoder/hg_intersect.cc index 02f5a401..b9381d02 100644 --- a/decoder/hg_intersect.cc +++ b/decoder/hg_intersect.cc @@ -88,7 +88,7 @@ namespace HG { bool Intersect(const Lattice& target, Hypergraph* hg) { // there are a number of faster algorithms available for restricted // classes of hypergraph and/or target. - if (hg->IsLinearChain() && target.IsSentence()) + if (hg->IsLinearChain() && IsSentence(target)) return FastLinearIntersect(target, hg); vector<bool> rem(hg->edges_.size(), false); diff --git a/decoder/hg_intersect.h b/decoder/hg_intersect.h index 29a5ea2a..19c1c177 100644 --- a/decoder/hg_intersect.h +++ b/decoder/hg_intersect.h @@ -1,5 +1,5 @@ -#ifndef _HG_INTERSECT_H_ -#define _HG_INTERSECT_H_ +#ifndef HG_INTERSECT_H_ +#define HG_INTERSECT_H_ #include "lattice.h" diff --git a/decoder/hg_io.h b/decoder/hg_io.h index 58af8132..5a2bd808 100644 --- a/decoder/hg_io.h +++ b/decoder/hg_io.h @@ -1,5 +1,5 @@ -#ifndef _HG_IO_H_ -#define _HG_IO_H_ +#ifndef HG_IO_H_ +#define HG_IO_H_ #include <iostream> #include <string> diff --git a/decoder/hg_remove_eps.h b/decoder/hg_remove_eps.h index 82f06039..f67fe6e2 100644 --- a/decoder/hg_remove_eps.h +++ b/decoder/hg_remove_eps.h @@ -1,5 +1,5 @@ -#ifndef _HG_REMOVE_EPS_H_ -#define _HG_REMOVE_EPS_H_ +#ifndef HG_REMOVE_EPS_H_ +#define HG_REMOVE_EPS_H_ #include "wordid.h" class Hypergraph; diff --git a/decoder/hg_sampler.h b/decoder/hg_sampler.h index 6ac39a20..4267b5ec 100644 --- a/decoder/hg_sampler.h +++ b/decoder/hg_sampler.h @@ -1,6 +1,5 @@ -#ifndef _HG_SAMPLER_H_ -#define _HG_SAMPLER_H_ - +#ifndef HG_SAMPLER_H_ +#define HG_SAMPLER_H_ #include <vector> #include <string> diff --git a/decoder/hg_union.h b/decoder/hg_union.h index 34624246..bb7e2d09 100644 --- a/decoder/hg_union.h +++ b/decoder/hg_union.h @@ -1,5 +1,5 @@ -#ifndef _HG_UNION_H_ -#define _HG_UNION_H_ +#ifndef HG_UNION_H_ +#define HG_UNION_H_ class Hypergraph; namespace HG { diff --git a/decoder/incremental.h b/decoder/incremental.h index f791a626..46b4817b 100644 --- a/decoder/incremental.h +++ b/decoder/incremental.h @@ -1,5 +1,5 @@ -#ifndef _INCREMENTAL_H_ -#define _INCREMENTAL_H_ +#ifndef INCREMENTAL_H_ +#define INCREMENTAL_H_ #include "weights.h" #include <vector> diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h index c0377fe8..d5bda63c 100644 --- a/decoder/inside_outside.h +++ b/decoder/inside_outside.h @@ -1,5 +1,5 @@ -#ifndef _INSIDE_OUTSIDE_H_ -#define _INSIDE_OUTSIDE_H_ +#ifndef INSIDE_OUTSIDE_H_ +#define INSIDE_OUTSIDE_H_ #include <vector> #include <algorithm> diff --git a/decoder/json_parse.h b/decoder/json_parse.h index c3cba954..85e2eff1 100644 --- a/decoder/json_parse.h +++ b/decoder/json_parse.h @@ -1,5 +1,5 @@ -#ifndef _JSON_WRAPPER_H_ -#define _JSON_WRAPPER_H_ +#ifndef JSON_WRAPPER_H_ +#define JSON_WRAPPER_H_ #include <iostream> #include <cassert> diff --git a/decoder/kbest.h b/decoder/kbest.h index c7194c7e..d6b3eb94 100644 --- a/decoder/kbest.h +++ b/decoder/kbest.h @@ -1,5 +1,5 @@ -#ifndef _HG_KBEST_H_ -#define _HG_KBEST_H_ +#ifndef HG_KBEST_H_ +#define HG_KBEST_H_ #include <vector> #include <utility> diff --git a/decoder/lattice.cc b/decoder/lattice.cc index 89da3cd0..1f97048d 100644 --- a/decoder/lattice.cc +++ b/decoder/lattice.cc @@ -50,7 +50,6 @@ void LatticeTools::ConvertTextToLattice(const string& text, Lattice* pl) { l.resize(ids.size()); for (int i = 0; i < l.size(); ++i) l[i].push_back(LatticeArc(ids[i], 0.0, 1)); - l.is_sentence_ = true; } void LatticeTools::ConvertTextOrPLF(const string& text_or_plf, Lattice* pl) { diff --git a/decoder/lattice.h b/decoder/lattice.h index ad4ca50d..1258d3f5 100644 --- a/decoder/lattice.h +++ b/decoder/lattice.h @@ -1,5 +1,5 @@ -#ifndef __LATTICE_H_ -#define __LATTICE_H_ +#ifndef LATTICE_H_ +#define LATTICE_H_ #include <string> #include <vector> @@ -25,22 +25,24 @@ class Lattice : public std::vector<std::vector<LatticeArc> > { friend void LatticeTools::ConvertTextOrPLF(const std::string& text_or_plf, Lattice* pl); friend void LatticeTools::ConvertTextToLattice(const std::string& text, Lattice* pl); public: - Lattice() : is_sentence_(false) {} + Lattice() {} explicit Lattice(size_t t, const std::vector<LatticeArc>& v = std::vector<LatticeArc>()) : - std::vector<std::vector<LatticeArc> >(t, v), - is_sentence_(false) {} + std::vector<std::vector<LatticeArc>>(t, v) {} int Distance(int from, int to) const { if (dist_.empty()) return (to - from); return dist_(from, to); } - // TODO this should actually be computed based on the contents - // of the lattice - bool IsSentence() const { return is_sentence_; } private: void ComputeDistances(); Array2D<int> dist_; - bool is_sentence_; }; +inline bool IsSentence(const Lattice& in) { + bool res = true; + for (auto& alt : in) + if (alt.size() > 1) { res = false; break; } + return res; +} + #endif diff --git a/decoder/lexalign.cc b/decoder/lexalign.cc index 11f20de7..dd529311 100644 --- a/decoder/lexalign.cc +++ b/decoder/lexalign.cc @@ -114,10 +114,9 @@ bool LexicalAlign::TranslateImpl(const string& input, Hypergraph* forest) { Lattice& lattice = smeta->src_lattice_; LatticeTools::ConvertTextOrPLF(input, &lattice); - if (!lattice.IsSentence()) { - // lexical models make independence assumptions - // that don't work with lattices or conf nets - cerr << "LexicalTrans: cannot deal with lattice source input!\n"; + smeta->ComputeInputLatticeType(); + if (smeta->GetInputType() != cdec::kSEQUENCE) { + cerr << "LexicalTrans: cannot deal with non-sequence input!"; abort(); } smeta->SetSourceLength(lattice.size()); diff --git a/decoder/lexalign.h b/decoder/lexalign.h index 7ba4fe64..6415f4f9 100644 --- a/decoder/lexalign.h +++ b/decoder/lexalign.h @@ -1,5 +1,5 @@ -#ifndef _LEXALIGN_H_ -#define _LEXALIGN_H_ +#ifndef LEXALIGN_H_ +#define LEXALIGN_H_ #include "translator.h" #include "lattice.h" diff --git a/decoder/lextrans.cc b/decoder/lextrans.cc index 74a18c3f..d13a891a 100644 --- a/decoder/lextrans.cc +++ b/decoder/lextrans.cc @@ -271,10 +271,9 @@ bool LexicalTrans::TranslateImpl(const string& input, Hypergraph* forest) { Lattice& lattice = smeta->src_lattice_; LatticeTools::ConvertTextOrPLF(input, &lattice); - if (!lattice.IsSentence()) { - // lexical models make independence assumptions - // that don't work with lattices or conf nets - cerr << "LexicalTrans: cannot deal with lattice source input!\n"; + smeta->ComputeInputLatticeType(); + if (smeta->GetInputType() != cdec::kSEQUENCE) { + cerr << "LexicalTrans: cannot deal with non-sequence inputs\n"; abort(); } smeta->SetSourceLength(lattice.size()); diff --git a/decoder/lextrans.h b/decoder/lextrans.h index 2d51e7c0..a23a4e0d 100644 --- a/decoder/lextrans.h +++ b/decoder/lextrans.h @@ -1,5 +1,5 @@ -#ifndef _LEXTrans_H_ -#define _LEXTrans_H_ +#ifndef LEXTrans_H_ +#define LEXTrans_H_ #include "translator.h" #include "lattice.h" diff --git a/decoder/node_state_hash.h b/decoder/node_state_hash.h index 9fc01a09..f380fcb1 100644 --- a/decoder/node_state_hash.h +++ b/decoder/node_state_hash.h @@ -1,5 +1,5 @@ -#ifndef _NODE_STATE_HASH_ -#define _NODE_STATE_HASH_ +#ifndef NODE_STATE_HASH_ +#define NODE_STATE_HASH_ #include <cassert> #include <cstring> diff --git a/decoder/phrasebased_translator.cc b/decoder/phrasebased_translator.cc index 8048248e..8415353a 100644 --- a/decoder/phrasebased_translator.cc +++ b/decoder/phrasebased_translator.cc @@ -114,6 +114,7 @@ struct PhraseBasedTranslatorImpl { Lattice lattice; LatticeTools::ConvertTextOrPLF(input, &lattice); smeta->SetSourceLength(lattice.size()); + smeta->ComputeInputLatticeType(); size_t est_nodes = lattice.size() * lattice.size() * (1 << max_distortion); minus_lm_forest->ReserveNodes(est_nodes, est_nodes * 100); if (add_pass_through_rules) { diff --git a/decoder/phrasebased_translator.h b/decoder/phrasebased_translator.h index e5e3f8a2..10790d0d 100644 --- a/decoder/phrasebased_translator.h +++ b/decoder/phrasebased_translator.h @@ -1,5 +1,5 @@ -#ifndef _PHRASEBASED_TRANSLATOR_H_ -#define _PHRASEBASED_TRANSLATOR_H_ +#ifndef PHRASEBASED_TRANSLATOR_H_ +#define PHRASEBASED_TRANSLATOR_H_ #include "translator.h" diff --git a/decoder/phrasetable_fst.h b/decoder/phrasetable_fst.h index 477de1f7..966bb14d 100644 --- a/decoder/phrasetable_fst.h +++ b/decoder/phrasetable_fst.h @@ -1,5 +1,5 @@ -#ifndef _PHRASETABLE_FST_H_ -#define _PHRASETABLE_FST_H_ +#ifndef PHRASETABLE_FST_H_ +#define PHRASETABLE_FST_H_ #include <vector> #include <string> diff --git a/decoder/rescore_translator.cc b/decoder/rescore_translator.cc index 10192f7a..18c83c56 100644 --- a/decoder/rescore_translator.cc +++ b/decoder/rescore_translator.cc @@ -53,6 +53,7 @@ bool RescoreTranslator::TranslateImpl(const string& input, const vector<double>& weights, Hypergraph* minus_lm_forest) { smeta->SetSourceLength(0); // don't know how to compute this + smeta->input_type_ = cdec::kFOREST; return pimpl_->Translate(input, weights, minus_lm_forest); } diff --git a/decoder/rule_lexer.h b/decoder/rule_lexer.h index e15c056d..5267f9ca 100644 --- a/decoder/rule_lexer.h +++ b/decoder/rule_lexer.h @@ -1,5 +1,5 @@ -#ifndef _RULE_LEXER_H_ -#define _RULE_LEXER_H_ +#ifndef RULE_LEXER_H_ +#define RULE_LEXER_H_ #include <iostream> #include <string> diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index c3cfcaad..538f82ec 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -1,5 +1,6 @@ #include <algorithm> #include <vector> +#include <unordered_set> #include <boost/foreach.hpp> #include <boost/functional/hash.hpp> #include "fast_lexical_cast.hpp" @@ -194,6 +195,7 @@ struct SCFGTranslatorImpl { Lattice& lattice = smeta->src_lattice_; LatticeTools::ConvertTextOrPLF(input, &lattice); smeta->SetSourceLength(lattice.size()); + smeta->ComputeInputLatticeType(); if (add_pass_through_rules){ if (!SILENT) cerr << "Adding pass through grammar" << endl; PassThroughGrammar* g = new PassThroughGrammar(lattice, default_nt, ctf_iterations_, num_pt_features); diff --git a/decoder/sentence_metadata.h b/decoder/sentence_metadata.h index f2a779f4..e13c2ca5 100644 --- a/decoder/sentence_metadata.h +++ b/decoder/sentence_metadata.h @@ -1,14 +1,20 @@ -#ifndef _SENTENCE_METADATA_H_ -#define _SENTENCE_METADATA_H_ +#ifndef SENTENCE_METADATA_H_ +#define SENTENCE_METADATA_H_ #include <string> #include <map> #include <cassert> #include "lattice.h" +#include "tree_fragment.h" struct DocScorer; // deprecated, will be removed struct Score; // deprecated, will be removed +namespace cdec { +enum InputType { kSEQUENCE, kTREE, kLATTICE, kFOREST, kUNKNOWN }; +class TreeFragment; +} + class SentenceMetadata { public: friend class DecoderImpl; @@ -17,7 +23,17 @@ class SentenceMetadata { src_len_(-1), has_reference_(ref.size() > 0), trg_len_(ref.size()), - ref_(has_reference_ ? &ref : NULL) {} + ref_(has_reference_ ? &ref : NULL), + input_type_(cdec::kUNKNOWN) {} + + // helper function for lattice inputs + void ComputeInputLatticeType() { + input_type_ = cdec::kSEQUENCE; + for (auto& alt : src_lattice_) { + if (alt.size() > 1) { input_type_ = cdec::kLATTICE; break; } + } + } + cdec::InputType GetInputType() const { return input_type_; } int GetSentenceId() const { return sent_id_; } @@ -25,6 +41,8 @@ class SentenceMetadata { // it has parsed the source void SetSourceLength(int sl) { src_len_ = sl; } + const cdec::TreeFragment& GetSourceTree() const { return src_tree_; } + // this should be called if a separate model needs to // specify how long the target sentence should be void SetTargetLength(int tl) { @@ -64,12 +82,15 @@ class SentenceMetadata { const Score* app_score; public: Lattice src_lattice_; // this will only be set if inputs are finite state! + cdec::TreeFragment src_tree_; // this will be set only if inputs are trees private: // you need to be very careful when depending on these values // they will only be set during training / alignment contexts const bool has_reference_; int trg_len_; const Lattice* const ref_; + public: + cdec::InputType input_type_; }; #endif diff --git a/decoder/tagger.cc b/decoder/tagger.cc index 30fb055f..500d2061 100644 --- a/decoder/tagger.cc +++ b/decoder/tagger.cc @@ -100,6 +100,8 @@ bool Tagger::TranslateImpl(const string& input, Lattice& lattice = smeta->src_lattice_; LatticeTools::ConvertTextToLattice(input, &lattice); smeta->SetSourceLength(lattice.size()); + smeta->ComputeInputLatticeType(); + assert(smeta->GetInputType() == cdec::kSEQUENCE); vector<WordID> sequence(lattice.size()); for (int i = 0; i < lattice.size(); ++i) { assert(lattice[i].size() == 1); diff --git a/decoder/tagger.h b/decoder/tagger.h index 9ac820d9..51659d5b 100644 --- a/decoder/tagger.h +++ b/decoder/tagger.h @@ -1,5 +1,5 @@ -#ifndef _TAGGER_H_ -#define _TAGGER_H_ +#ifndef TAGGER_H_ +#define TAGGER_H_ #include "translator.h" diff --git a/decoder/translator.h b/decoder/translator.h index ba218a0b..096cf191 100644 --- a/decoder/translator.h +++ b/decoder/translator.h @@ -1,5 +1,5 @@ -#ifndef _TRANSLATOR_H_ -#define _TRANSLATOR_H_ +#ifndef TRANSLATOR_H_ +#define TRANSLATOR_H_ #include <string> #include <vector> diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc index adc8dc89..08dae64c 100644 --- a/decoder/tree2string_translator.cc +++ b/decoder/tree2string_translator.cc @@ -2,6 +2,7 @@ #include <vector> #include <queue> #include <map> +#include <unordered_map> #include <unordered_set> #include <boost/shared_ptr.hpp> #include <boost/functional/hash.hpp> @@ -31,12 +32,12 @@ static void ReadTree2StringGrammar(istream* in, Tree2StringGrammarNode* root, bo ++lc; if (line.size() == 0 || line[0] == '#') continue; std::vector<StringPiece> fields = TokenizeMultisep(line, " ||| "); - if (has_multiple_states && fields.size() != 4) { - cerr << "Expected 4 fields in rule file but line " << lc << " is:\n" << line << endl; + if (has_multiple_states && fields.size() < 4) { + cerr << "Expected at least 4 fields in rule file but line " << lc << " is:\n" << line << endl; abort(); } - if (!has_multiple_states && fields.size() != 3) { - cerr << "Expected 3 fields in rule file but line " << lc << " is:\n" << line << endl; + if (!has_multiple_states && fields.size() < 3) { + cerr << "Expected at least 3 fields in rule file but line " << lc << " is:\n" << line << endl; abort(); } @@ -72,6 +73,7 @@ static void ReadTree2StringGrammar(istream* in, Tree2StringGrammarNode* root, bo cerr << "Not implemented...\n"; abort(); // TODO read in states } else { os << " ||| " << fields[1] << " ||| " << fields[2]; + if (fields.size() > 3) os << " ||| " << fields[3]; rule.reset(new TRule(os.str())); } cur->rules.push_back(rule); @@ -286,6 +288,8 @@ struct Tree2StringTranslatorImpl { const vector<double>& weights, Hypergraph* minus_lm_forest) { cdec::TreeFragment input_tree(input, false); + smeta->src_tree_ = input_tree; + smeta->input_type_ = cdec::kTREE; if (add_pass_through_rules) CreatePassThroughRules(input_tree); Hypergraph hg; hg.ReserveNodes(input_tree.nodes.size()); diff --git a/decoder/tree_fragment.cc b/decoder/tree_fragment.cc index 42f7793a..5f717c5b 100644 --- a/decoder/tree_fragment.cc +++ b/decoder/tree_fragment.cc @@ -64,6 +64,13 @@ int TreeFragment::SetupSpansRec(unsigned cur, int left) { return right; } +vector<int> TreeFragment::Terminals() const { + vector<int> terms; + for (auto& x : *this) + if (IsTerminal(x)) terms.push_back(x); + return terms; +} + // cp is the character index in the tree // np keeps track of the nodes (nonterminals) that have been built // symp keeps track of the terminal symbols that have been built diff --git a/decoder/tree_fragment.h b/decoder/tree_fragment.h index 6b4842ee..e19b79fb 100644 --- a/decoder/tree_fragment.h +++ b/decoder/tree_fragment.h @@ -72,6 +72,8 @@ class TreeFragment { BreadthFirstIterator bfs_begin(unsigned node_idx) const; BreadthFirstIterator bfs_end() const; + std::vector<int> Terminals() const; + private: // cp is the character index in the tree // np keeps track of the nodes (nonterminals) that have been built diff --git a/decoder/trule.h b/decoder/trule.h index 243b0da9..adef7cc7 100644 --- a/decoder/trule.h +++ b/decoder/trule.h @@ -1,5 +1,5 @@ -#ifndef _RULE_H_ -#define _RULE_H_ +#ifndef TRULE_H_ +#define TRULE_H_ #include <algorithm> #include <vector> diff --git a/decoder/viterbi.h b/decoder/viterbi.h index a8a0ea7f..20ee73cc 100644 --- a/decoder/viterbi.h +++ b/decoder/viterbi.h @@ -1,5 +1,5 @@ -#ifndef _VITERBI_H_ -#define _VITERBI_H_ +#ifndef VITERBI_H_ +#define VITERBI_H_ #include <vector> #include "prob.h" |