diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-04-07 16:58:55 +0200 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-04-07 16:58:55 +0200 |
commit | e91553ae70907e243a554e4a549c53df57b78478 (patch) | |
tree | a4d044093f5937d0152b573c99914746b5a2b8ef /decoder | |
parent | fb714888562845a8ae10fd4411cf199961193833 (diff) | |
parent | 2fe4323cbfc34de906a2869f98c017b41e4ccae7 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'decoder')
-rw-r--r-- | decoder/aligner.cc | 4 | ||||
-rw-r--r-- | decoder/decoder.cc | 31 | ||||
-rw-r--r-- | decoder/earley_composer.cc | 4 | ||||
-rw-r--r-- | decoder/ff_wordalign.cc | 1 | ||||
-rw-r--r-- | decoder/grammar.cc | 24 | ||||
-rw-r--r-- | decoder/grammar.h | 2 | ||||
-rw-r--r-- | decoder/hg_io.cc | 20 | ||||
-rw-r--r-- | decoder/hg_io.h | 1 | ||||
-rw-r--r-- | decoder/phrasetable_fst.cc | 3 |
9 files changed, 53 insertions, 37 deletions
diff --git a/decoder/aligner.cc b/decoder/aligner.cc index 53e059fb..232e022a 100644 --- a/decoder/aligner.cc +++ b/decoder/aligner.cc @@ -11,7 +11,7 @@ #include "sentence_metadata.h" #include "inside_outside.h" #include "viterbi.h" -#include "alignment_pharaoh.h" +#include "alignment_io.h" using namespace std; @@ -300,7 +300,7 @@ void AlignerTools::WriteAlignment(const Lattice& src_lattice, cerr << grid << endl; } (*out) << TD::GetString(src_sent) << " ||| " << TD::GetString(trg_sent) << " ||| "; - AlignmentPharaoh::SerializePharaohFormat(grid, out); + AlignmentIO::SerializePharaohFormat(grid, out); } }; diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 53c47d21..ec6f75f7 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -57,7 +57,6 @@ static const double kMINUS_EPSILON = -1e-6; // don't be too strict using namespace std; using namespace std::tr1; -using boost::shared_ptr; namespace po = boost::program_options; static bool verbose_feature_functions=true; @@ -101,7 +100,7 @@ inline string str(char const* name,po::variables_map const& conf) { // print just the --long_opt names suitable for bash compgen inline void print_options(std::ostream &out,po::options_description const& opts) { - typedef std::vector< shared_ptr<po::option_description> > Ds; + typedef std::vector< boost::shared_ptr<po::option_description> > Ds; Ds const& ds=opts.options(); out << '"'; for (unsigned i=0;i<ds.size();++i) { @@ -120,13 +119,13 @@ inline bool store_conf(po::variables_map const& conf,std::string const& name,V * return false; } -inline shared_ptr<FeatureFunction> make_ff(string const& ffp,bool verbose_feature_functions,char const* pre="") { +inline boost::shared_ptr<FeatureFunction> make_ff(string const& ffp,bool verbose_feature_functions,char const* pre="") { string ff, param; SplitCommandAndParam(ffp, &ff, ¶m); cerr << pre << "feature: " << ff; if (param.size() > 0) cerr << " (with config parameters '" << param << "')\n"; else cerr << " (no config parameters)\n"; - shared_ptr<FeatureFunction> pf = ff_registry.Create(ff, param); + boost::shared_ptr<FeatureFunction> pf = ff_registry.Create(ff, param); if (!pf) exit(1); int nbyte=pf->NumBytesContext(); if (verbose_feature_functions) @@ -135,13 +134,13 @@ inline shared_ptr<FeatureFunction> make_ff(string const& ffp,bool verbose_featur } #ifdef FSA_RESCORING -inline shared_ptr<FsaFeatureFunction> make_fsa_ff(string const& ffp,bool verbose_feature_functions,char const* pre="") { +inline boost::shared_ptr<FsaFeatureFunction> make_fsa_ff(string const& ffp,bool verbose_feature_functions,char const* pre="") { string ff, param; SplitCommandAndParam(ffp, &ff, ¶m); cerr << "FSA Feature: " << ff; if (param.size() > 0) cerr << " (with config parameters '" << param << "')\n"; else cerr << " (no config parameters)\n"; - shared_ptr<FsaFeatureFunction> pf = fsa_ff_registry.Create(ff, param); + boost::shared_ptr<FsaFeatureFunction> pf = fsa_ff_registry.Create(ff, param); if (!pf) exit(1); if (verbose_feature_functions) cerr<<"State is "<<pf->state_bytes()<<" bytes for "<<pre<<"feature "<<ffp<<endl; @@ -156,10 +155,10 @@ inline shared_ptr<FsaFeatureFunction> make_fsa_ff(string const& ffp,bool verbose // passes are carried over into subsequent passes (where they may have different weights). struct RescoringPass { RescoringPass() : fid_summary(), density_prune(), beam_prune() {} - shared_ptr<ModelSet> models; - shared_ptr<IntersectionConfiguration> inter_conf; + boost::shared_ptr<ModelSet> models; + boost::shared_ptr<IntersectionConfiguration> inter_conf; vector<const FeatureFunction*> ffs; - shared_ptr<vector<weight_t> > weight_vector; + boost::shared_ptr<vector<weight_t> > weight_vector; int fid_summary; // 0 == no summary feature double density_prune; // 0 == don't density prune double beam_prune; // 0 == don't beam prune @@ -293,15 +292,15 @@ struct DecoderImpl { po::variables_map& conf; OracleBleu oracle; string formalism; - shared_ptr<Translator> translator; - shared_ptr<vector<weight_t> > init_weights; // weights used with initial parse - vector<shared_ptr<FeatureFunction> > pffs; + boost::shared_ptr<Translator> translator; + boost::shared_ptr<vector<weight_t> > init_weights; // weights used with initial parse + vector<boost::shared_ptr<FeatureFunction> > pffs; #ifdef FSA_RESCORING CFGOptions cfg_options; - vector<shared_ptr<FsaFeatureFunction> > fsa_ffs; + vector<boost::shared_ptr<FsaFeatureFunction> > fsa_ffs; vector<string> fsa_names; #endif - shared_ptr<RandomNumberGenerator<boost::mt19937> > rng; + boost::shared_ptr<RandomNumberGenerator<boost::mt19937> > rng; int sample_max_trans; bool aligner_mode; bool graphviz; @@ -310,7 +309,7 @@ struct DecoderImpl { bool kbest; bool unique_kbest; bool get_oracle_forest; - shared_ptr<WriteFile> extract_file; + boost::shared_ptr<WriteFile> extract_file; int combine_size; int sent_id; SparseVector<prob_t> acc_vec; // accumulate gradient @@ -622,7 +621,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream } // set up weight vectors since later phases may reuse weights from earlier phases - shared_ptr<vector<weight_t> > prev_weights = init_weights; + boost::shared_ptr<vector<weight_t> > prev_weights = init_weights; for (int pass = 0; pass < rescoring_passes.size(); ++pass) { RescoringPass& rp = rescoring_passes[pass]; if (!rp.weight_vector) { diff --git a/decoder/earley_composer.cc b/decoder/earley_composer.cc index b7af801a..385baf8b 100644 --- a/decoder/earley_composer.cc +++ b/decoder/earley_composer.cc @@ -16,8 +16,6 @@ #include "tdict.h" #include "hg.h" -using boost::shared_ptr; -namespace po = boost::program_options; using namespace std; using namespace std::tr1; @@ -111,7 +109,7 @@ struct Edge { const Edge* const active_parent; // back pointer, NULL for PREDICT items const Edge* const passive_parent; // back pointer, NULL for SCAN and PREDICT items const TargetPhraseSet* const tps; // translations - shared_ptr<SparseVector<double> > features; // features from CFG rule + boost::shared_ptr<SparseVector<double> > features; // features from CFG rule bool IsPassive() const { // when a rule is completed, this value will be set diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc index 9e7c618e..decdf9bc 100644 --- a/decoder/ff_wordalign.cc +++ b/decoder/ff_wordalign.cc @@ -15,7 +15,6 @@ #include "factored_lexicon_helper.h" #include "verbose.h" -#include "alignment_pharaoh.h" #include "stringlib.h" #include "sentence_metadata.h" #include "hg.h" diff --git a/decoder/grammar.cc b/decoder/grammar.cc index 9e4065a6..714390f0 100644 --- a/decoder/grammar.cc +++ b/decoder/grammar.cc @@ -3,12 +3,14 @@ #include <algorithm> #include <utility> #include <map> +#include <tr1/unordered_map> #include "rule_lexer.h" #include "filelib.h" #include "tdict.h" using namespace std; +using namespace std::tr1; const vector<TRulePtr> Grammar::NO_RULES; @@ -148,24 +150,24 @@ bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const { return (i == 0); } -PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) : - has_rule_(input.size() + 1) { +PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) { + unordered_set<WordID> ss; for (int i = 0; i < input.size(); ++i) { const vector<LatticeArc>& alts = input[i]; for (int k = 0; k < alts.size(); ++k) { const int j = alts[k].dist2next + i; - has_rule_[i].insert(j); const string& src = TD::Convert(alts[k].label); - TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1")); - pt->a_.push_back(AlignmentPoint(0,0)); - AddRule(pt); - RefineRule(pt, ctf_level); + if (ss.count(alts[k].label) == 0) { + TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1")); + pt->a_.push_back(AlignmentPoint(0,0)); + AddRule(pt); + RefineRule(pt, ctf_level); + ss.insert(alts[k].label); + } } } } -bool PassThroughGrammar::HasRuleForSpan(int i, int j, int /* distance */) const { - const set<int>& hr = has_rule_[i]; - if (i == j) { return !hr.empty(); } - return (hr.find(j) != hr.end()); +bool PassThroughGrammar::HasRuleForSpan(int, int, int distance) const { + return (distance < 2); } diff --git a/decoder/grammar.h b/decoder/grammar.h index f5d00817..e6a15a69 100644 --- a/decoder/grammar.h +++ b/decoder/grammar.h @@ -91,8 +91,6 @@ struct GlueGrammar : public TextGrammar { struct PassThroughGrammar : public TextGrammar { PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0); virtual bool HasRuleForSpan(int i, int j, int distance) const; - private: - std::vector<std::set<int> > has_rule_; // index by [i][j] }; void RefineRule(TRulePtr pt, const unsigned int ctf_level); diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index 9f0f50fa..d416dbf6 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -401,6 +401,26 @@ string HypergraphIO::AsPLF(const Hypergraph& hg, bool include_global_parentheses return os.str(); } +string HypergraphIO::AsPLF(const Lattice& lat, bool include_global_parentheses) { + static bool first = true; + if (first) { InitEscapes(); first = false; } + if (lat.empty()) return "()"; + ostringstream os; + if (include_global_parentheses) os << '('; + static const string EPS="*EPS*"; + for (int i = 0; i < lat.size(); ++i) { + const vector<LatticeArc> arcs = lat[i]; + os << '('; + for (int j = 0; j < arcs.size(); ++j) { + os << "('" << Escape(TD::Convert(arcs[j].label)) << "'," + << arcs[j].cost << ',' << arcs[j].dist2next << "),"; + } + os << "),"; + } + if (include_global_parentheses) os << ')'; + return os.str(); +} + namespace PLF { const string chars = "'\\"; diff --git a/decoder/hg_io.h b/decoder/hg_io.h index 44817157..4e502a0c 100644 --- a/decoder/hg_io.h +++ b/decoder/hg_io.h @@ -30,6 +30,7 @@ struct HypergraphIO { static void ReadFromPLF(const std::string& in, Hypergraph* out, int line = 0); // return PLF string representation (undefined behavior on non-lattices) static std::string AsPLF(const Hypergraph& hg, bool include_global_parentheses = true); + static std::string AsPLF(const Lattice& lat, bool include_global_parentheses = true); static void PLFtoLattice(const std::string& plf, Lattice* pl); static std::string Escape(const std::string& s); // PLF helper }; diff --git a/decoder/phrasetable_fst.cc b/decoder/phrasetable_fst.cc index f421e941..b3bec86b 100644 --- a/decoder/phrasetable_fst.cc +++ b/decoder/phrasetable_fst.cc @@ -9,7 +9,6 @@ #include "filelib.h" #include "tdict.h" -using boost::shared_ptr; using namespace std; TargetPhraseSet::~TargetPhraseSet() {} @@ -46,7 +45,7 @@ class TextFSTNode : public FSTNode { void ClearPassThroughTranslations(); private: vector<WordID> passthroughs; - shared_ptr<TargetPhraseSet> data; + boost::shared_ptr<TargetPhraseSet> data; map<WordID, TextFSTNode> ptr; }; |