From 5d0f3c6aa4e78aea09952a7a65f61d3c4dce0a0e Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Mon, 14 Mar 2011 17:05:14 -0400 Subject: Fix wordset to override features() so that we can safely use multiple instances of it --- decoder/ff_wordset.h | 1 + 1 file changed, 1 insertion(+) (limited to 'decoder') diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h index 00e1145b..643097ef 100644 --- a/decoder/ff_wordset.h +++ b/decoder/ff_wordset.h @@ -32,6 +32,7 @@ class WordSet : public FeatureFunction { ~WordSet() { } + Features features() const { return single_feature(fid_); } protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -- cgit v1.2.3 From 6b6eeff3130bcb40980886d8179ba4ad6842325e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 16 Mar 2011 19:48:41 -0400 Subject: explicit markers turned on by default --- decoder/ff_klm.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'decoder') diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc index adc2c8bf..62908cdc 100644 --- a/decoder/ff_klm.cc +++ b/decoder/ff_klm.cc @@ -21,7 +21,7 @@ static const unsigned char MASK = 7; // -n NAME : feature id is NAME bool ParseLMArgs(string const& in, string* filename, string* mapfile, bool* explicit_markers, string* featname) { vector const& argv=SplitOnWhitespace(in); - *explicit_markers = true; + *explicit_markers = false; *featname="LanguageModel"; *mapfile = ""; #define LMSPEC_NEXTARG if (i==argv.end()) { \ -- cgit v1.2.3 From 95e50962fe307b930e835513e4d9998df91426a4 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 16 Mar 2011 20:30:37 -0400 Subject: possible mert bug with rules with alignments --- decoder/trule.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'decoder') diff --git a/decoder/trule.cc b/decoder/trule.cc index 9820e6d5..fda62741 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -145,7 +145,9 @@ bool TRule::ReadFromString(const string& line, bool strict, bool mono) { getline(is, ss); //cerr << "L: " << ss << endl; int start = 0; - const int len = ss.size(); + int len = ss.size(); + const size_t ppos = ss.find(" |||"); + if (ppos != string::npos) { len = ppos; } while (start < len) { while(start < len && (ss[start] == ' ' || ss[start] == ';')) ++start; -- cgit v1.2.3 From da6c892bc05a5520910e23089d83ceb1f2a0fbb4 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 21 Mar 2011 22:10:02 -0400 Subject: add support for normalized 'summary features'- seemingly sound way of dealing with normalization problems in embedded crf translation models --- decoder/decoder.cc | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'decoder') diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 95ff6270..8a03c5c9 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -141,12 +141,13 @@ inline shared_ptr make_fsa_ff(string const& ffp,bool verbose // and then prune the resulting (rescored) hypergraph. All feature values from previous // passes are carried over into subsequent passes (where they may have different weights). struct RescoringPass { - RescoringPass() : density_prune(), beam_prune() {} + RescoringPass() : fid_summary(), density_prune(), beam_prune() {} shared_ptr models; shared_ptr inter_conf; vector ffs; shared_ptr w; // null == use previous weights vector weight_vector; + int fid_summary; // 0 == no summary feature double density_prune; // 0 == don't density prune double beam_prune; // 0 == don't beam prune }; @@ -155,6 +156,7 @@ ostream& operator<<(ostream& os, const RescoringPass& rp) { os << "[num_fn=" << rp.ffs.size(); if (rp.inter_conf) { os << " int_alg=" << *rp.inter_conf; } if (rp.w) os << " new_weights"; + if (rp.fid_summary) os << " summary_feature=" << FD::Convert(rp.fid_summary); if (rp.density_prune) os << " density_prune=" << rp.density_prune; if (rp.beam_prune) os << " beam_prune=" << rp.beam_prune; os << ']'; @@ -361,18 +363,21 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream ("weights,w",po::value(),"Feature weights file (initial forest / pass 1)") ("feature_function,F",po::value >()->composing(), "Pass 1 additional feature function(s) (-L for list)") ("intersection_strategy,I",po::value()->default_value("cube_pruning"), "Pass 1 intersection strategy for incorporating finite-state features; values include Cube_pruning, Full") + ("summary_feature", po::value(), "Compute a 'summary feature' at the end of the pass (before any pruning) with name=arg and value=inside-outside/Z") ("density_prune", po::value(), "Pass 1 pruning: keep no more than this many times the number of edges used in the best derivation tree (>=1.0)") ("beam_prune", po::value(), "Pass 1 pruning: Prune paths from scored forest, keep paths within exp(alpha>=0)") ("weights2",po::value(),"Optional pass 2") ("feature_function2",po::value >()->composing(), "Optional pass 2") ("intersection_strategy2",po::value()->default_value("cube_pruning"), "Optional pass 2") + ("summary_feature2", po::value(), "Optional pass 2") ("density_prune2", po::value(), "Optional pass 2") ("beam_prune2", po::value(), "Optional pass 2") ("weights3",po::value(),"Optional pass 3") ("feature_function3",po::value >()->composing(), "Optional pass 3") ("intersection_strategy3",po::value()->default_value("cube_pruning"), "Optional pass 3") + ("summary_feature3", po::value(), "Optional pass 3") ("density_prune3", po::value(), "Optional pass 3") ("beam_prune3", po::value(), "Optional pass 3") @@ -559,6 +564,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream for (int pass = 0; pass < MAX_PASSES; ++pass) { string ws = "weights" + StringSuffixForRescoringPass(pass); string ff = "feature_function" + StringSuffixForRescoringPass(pass); + string sf = "summary_feature" + StringSuffixForRescoringPass(pass); string bp = "beam_prune" + StringSuffixForRescoringPass(pass); string dp = "density_prune" + StringSuffixForRescoringPass(pass); bool first_pass_condition = ((pass == 0) && (conf.count(ff) || conf.count(bp) || conf.count(dp))); @@ -583,6 +589,11 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream if (p->IsStateful()) { has_stateful = true; } } } + if (conf.count(sf)) { + rp.fid_summary = FD::Convert(conf[sf].as()); + assert(rp.fid_summary > 0); + // TODO assert that weights for this pass have coef(fid_summary) == 0.0? + } if (conf.count(bp)) { rp.beam_prune = conf[bp].as(); } if (conf.count(dp)) { rp.density_prune = conf[dp].as(); } int palg = (has_stateful ? 1 : 0); // if there are no stateful featueres, default to FULL @@ -794,6 +805,15 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { cerr << " " << passtr << " partition log(Z): " << log(z) << endl; } + if (rp.fid_summary) { + Hypergraph::EdgeProbs posteriors; + const prob_t z = forest.ComputeEdgePosteriors(1.0, &posteriors); + if (!SILENT) { cerr << " " << passtr << " adding summary feature " << FD::Convert(rp.fid_summary) << " log(Z)=" << log(z) << endl; } + assert(forest.edges_.size() == posteriors.size()); + for (int i = 0; i < posteriors.size(); ++i) + forest.edges_[i].feature_values_.set_value(rp.fid_summary, log(posteriors[i] / z)); + } + string fullbp = "beam_prune" + StringSuffixForRescoringPass(pass); string fulldp = "density_prune" + StringSuffixForRescoringPass(pass); maybe_prune(forest,conf,fullbp.c_str(),fulldp.c_str(),passtr,srclen); -- cgit v1.2.3 From 4bc9ea17ba9f85c899e35a9d657ee3f174ff2863 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 22 Mar 2011 11:35:45 -0400 Subject: check for infs --- decoder/decoder.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'decoder') diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 8a03c5c9..a16a9b5a 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -810,8 +810,12 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { const prob_t z = forest.ComputeEdgePosteriors(1.0, &posteriors); if (!SILENT) { cerr << " " << passtr << " adding summary feature " << FD::Convert(rp.fid_summary) << " log(Z)=" << log(z) << endl; } assert(forest.edges_.size() == posteriors.size()); - for (int i = 0; i < posteriors.size(); ++i) - forest.edges_[i].feature_values_.set_value(rp.fid_summary, log(posteriors[i] / z)); + if (!isfinite(log(z)) || isnan(log(z))) { + cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n"; + } else { + for (int i = 0; i < posteriors.size(); ++i) + forest.edges_[i].feature_values_.set_value(rp.fid_summary, log(posteriors[i] / z)); + } } string fullbp = "beam_prune" + StringSuffixForRescoringPass(pass); -- cgit v1.2.3 From c0ae6f362b245ccf2ab3b8d6dc7e367cbcc64c1c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 22 Mar 2011 14:44:46 -0400 Subject: fix local normalizer code for summary features --- decoder/decoder.cc | 16 +++++++++++----- decoder/hg.cc | 5 +++-- decoder/hg.h | 2 +- 3 files changed, 15 insertions(+), 8 deletions(-) (limited to 'decoder') diff --git a/decoder/decoder.cc b/decoder/decoder.cc index a16a9b5a..89425198 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -806,15 +806,21 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { } if (rp.fid_summary) { - Hypergraph::EdgeProbs posteriors; - const prob_t z = forest.ComputeEdgePosteriors(1.0, &posteriors); + const prob_t z = forest.PushWeightsToGoal(1.0); if (!SILENT) { cerr << " " << passtr << " adding summary feature " << FD::Convert(rp.fid_summary) << " log(Z)=" << log(z) << endl; } - assert(forest.edges_.size() == posteriors.size()); if (!isfinite(log(z)) || isnan(log(z))) { cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n"; } else { - for (int i = 0; i < posteriors.size(); ++i) - forest.edges_[i].feature_values_.set_value(rp.fid_summary, log(posteriors[i] / z)); + for (int i = 0; i < forest.edges_.size(); ++i) { + const double log_prob_transition = log(forest.edges_[i].edge_prob_); // locally normalized by the edge + // head node by forest.PushWeightsToGoal + if (!isfinite(log_prob_transition) || isnan(log_prob_transition)) { + cerr << "Edge: i=" << i << " got bad inside prob: " << *forest.edges_[i].rule_ << endl; + abort(); + } + + forest.edges_[i].feature_values_.set_value(rp.fid_summary, log_prob_transition); + } } } diff --git a/decoder/hg.cc b/decoder/hg.cc index 39ac5132..a4028b0e 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -226,9 +226,9 @@ prob_t Hypergraph::PushViterbiWeightsToGoal(int fid) { } -void Hypergraph::PushWeightsToGoal(double scale) { +prob_t Hypergraph::PushWeightsToGoal(double scale) { vector posts; - ComputeEdgePosteriors(scale, &posts); + const prob_t inside_z = ComputeEdgePosteriors(scale, &posts); for (int i = 0; i < nodes_.size(); ++i) { const Hypergraph::Node& node = nodes_[i]; prob_t z = prob_t::Zero(); @@ -238,6 +238,7 @@ void Hypergraph::PushWeightsToGoal(double scale) { edges_[node.in_edges_[j]].edge_prob_ = posts[node.in_edges_[j]] / z; } } + return inside_z; } struct EdgeExistsWeightFunction { diff --git a/decoder/hg.h b/decoder/hg.h index aa1202b1..e5ef05f8 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -449,7 +449,7 @@ public: void PushWeightsToSource(double scale = 1.0); // same, except weights are pushed to the goal, works for HGs, // not just lattices - void PushWeightsToGoal(double scale = 1.0); + prob_t PushWeightsToGoal(double scale = 1.0); // contrary to PushWeightsToGoal, use viterbi semiring; store log(p) to fid. note that p_viterbi becomes 1; k*p_viterbi becomes k. also modifies edge_prob_ (note that the fid stored log(p) will stick around even if you reweight) // afterwards, product of edge_prob_ for a derivation will equal 1 for the viterbi (p_v before, 1 after), and in general (k*p_v before, k after). returns inside(goal) -- cgit v1.2.3 From 12ece6ddfa91ec61cdeee698db2c7edac941e096 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 22 Mar 2011 16:48:28 -0400 Subject: reweight after weight pushing to avoid weird output --- decoder/decoder.cc | 1 + 1 file changed, 1 insertion(+) (limited to 'decoder') diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 89425198..ac063659 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -821,6 +821,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { forest.edges_[i].feature_values_.set_value(rp.fid_summary, log_prob_transition); } + forest.Reweight(cur_weights); // reset weights } } -- cgit v1.2.3 From 57a218e86e30d57d9795bccd280737c431f6b4e4 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 23 Mar 2011 12:15:55 -0400 Subject: yet another feature attempt --- decoder/decoder.cc | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'decoder') diff --git a/decoder/decoder.cc b/decoder/decoder.cc index ac063659..b7774acc 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -806,6 +806,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { } if (rp.fid_summary) { +#if 0 const prob_t z = forest.PushWeightsToGoal(1.0); if (!SILENT) { cerr << " " << passtr << " adding summary feature " << FD::Convert(rp.fid_summary) << " log(Z)=" << log(z) << endl; } if (!isfinite(log(z)) || isnan(log(z))) { @@ -823,6 +824,26 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { } forest.Reweight(cur_weights); // reset weights } +#endif + Hypergraph::EdgeProbs posts; + const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts); + if (!isfinite(log(z)) || isnan(log(z))) { + cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n"; + } else { + for (int i = 0; i < forest.nodes_.size(); ++i) { + const Hypergraph::EdgesVector& in_edges = forest.nodes_[i].in_edges_; + prob_t node_post = prob_t(0); + for (int j = 0; j < in_edges.size(); ++j) + node_post += (posts[in_edges[j]] / z); + const double log_np = log(node_post); + if (!isfinite(log_np) || isnan(log_np)) { + cerr << "got bad posterior prob for node " << i << endl; + abort(); + } + for (int j = 0; j < in_edges.size(); ++j) + forest.edges_[in_edges[j]].feature_values_.set_value(rp.fid_summary, exp(log_np)); + } + } } string fullbp = "beam_prune" + StringSuffixForRescoringPass(pass); -- cgit v1.2.3 From 918ed4bf919a55e3eb5d99d98c9b915921dc11ab Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 23 Mar 2011 22:53:44 -0400 Subject: remove thread-local stuff which was fragile on some build systems --- decoder/trule.cc | 3 +-- utils/static_utoa.h | 2 +- utils/tdict.cc | 1 - utils/threadlocal.h | 71 ----------------------------------------------------- 4 files changed, 2 insertions(+), 75 deletions(-) delete mode 100755 utils/threadlocal.h (limited to 'decoder') diff --git a/decoder/trule.cc b/decoder/trule.cc index fda62741..40235542 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -5,7 +5,6 @@ #include "stringlib.h" #include "tdict.h" #include "rule_lexer.h" -#include "threadlocal.h" using namespace std; @@ -99,7 +98,7 @@ TRule* TRule::CreateRuleMonolingual(const string& rule) { namespace { // callback for lexer -THREADLOCAL int n_assigned=0; +int n_assigned=0; void assign_trule(const TRulePtr& new_rule, const unsigned int ctf_level, const TRulePtr& coarse_rule, void* extra) { TRule *assignto=(TRule *)extra; *assignto=*new_rule; diff --git a/utils/static_utoa.h b/utils/static_utoa.h index d15ed35b..bb3d821f 100755 --- a/utils/static_utoa.h +++ b/utils/static_utoa.h @@ -7,7 +7,7 @@ namespace { static const int utoa_bufsize=40; // 64bit safe. static const int utoa_bufsizem1=utoa_bufsize-1; // 64bit safe. -THREADLOCAL char utoa_buf[utoa_bufsize]; // to put end of string character at buf[20] +static char utoa_buf[utoa_bufsize]; // to put end of string character at buf[20] } inline char *static_utoa(unsigned n) { diff --git a/utils/tdict.cc b/utils/tdict.cc index 23a298f8..c21b2b48 100644 --- a/utils/tdict.cc +++ b/utils/tdict.cc @@ -8,7 +8,6 @@ #include "dict.h" #include "tdict.h" #include "stringlib.h" -#include "threadlocal.h" using namespace std; diff --git a/utils/threadlocal.h b/utils/threadlocal.h deleted file mode 100755 index d79f5d9d..00000000 --- a/utils/threadlocal.h +++ /dev/null @@ -1,71 +0,0 @@ -#ifndef THREADLOCAL_H -#define THREADLOCAL_H - -#ifndef SETLOCAL_SWAP -# define SETLOCAL_SWAP 0 -#endif - -#ifdef BOOST_NO_MT - -# define THREADLOCAL - -#else - -#ifdef _MSC_VER - -//FIXME: doesn't work with DLLs ... use TLS apis instead (http://www.boost.org/libs/thread/doc/tss.html) -# define THREADLOCAL __declspec(thread) - -#else - -# define THREADLOCAL __thread - -#endif - -#endif - -#include //swap - -// naturally, the below are only thread-safe if value is THREADLOCAL -template -struct SaveLocal { - D &value; - D old_value; - SaveLocal(D& val) : value(val), old_value(val) {} - ~SaveLocal() { -#if SETLOCAL_SWAP - swap(value,old_value); -#else - value=old_value; -#endif - } -}; - -template -struct SetLocal { - D &value; - D old_value; - SetLocal(D& val,const D &new_value) : value(val), old_value( -#if SETLOCAL_SWAP - new_value -#else - val -#endif - ) { -#if SETLOCAL_SWAP - swap(value,old_value); -#else - value=new_value; -#endif - } - ~SetLocal() { -#if SETLOCAL_SWAP - swap(value,old_value); -#else - value=old_value; -#endif - } -}; - - -#endif -- cgit v1.2.3 From e03a6c2b2e3cc21d75904300d34249cd1e2e032b Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 23 Mar 2011 22:55:50 -0400 Subject: refactor makefile --- decoder/Makefile.am | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'decoder') diff --git a/decoder/Makefile.am b/decoder/Makefile.am index e1dba497..244da2de 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -6,16 +6,13 @@ noinst_PROGRAMS = \ hg_test \ ff_test \ parser_test \ - grammar_test \ - cfg_test -TESTS = trule_test ff_test parser_test grammar_test hg_test cfg_test -endif - -cdec_SOURCES = cdec.cc -cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz - -cfg_test_SOURCES = cfg_test.cc -cfg_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz + grammar_test + + # cfg_test +TESTS = trule_test ff_test parser_test grammar_test hg_test +# cfg_test +#cfg_test_SOURCES = cfg_test.cc +#cfg_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz parser_test_SOURCES = parser_test.cc parser_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz ff_test_SOURCES = ff_test.cc @@ -26,6 +23,11 @@ hg_test_SOURCES = hg_test.cc hg_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz trule_test_SOURCES = trule_test.cc trule_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +endif + +cdec_SOURCES = cdec.cc +cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz + AM_CPPFLAGS = -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm rule_lexer.cc: rule_lexer.l -- cgit v1.2.3