diff options
author | Avneesh Saluja <asaluja@gmail.com> | 2013-03-28 18:28:16 -0700 |
---|---|---|
committer | Avneesh Saluja <asaluja@gmail.com> | 2013-03-28 18:28:16 -0700 |
commit | 3d8d656fa7911524e0e6885647173474524e0784 (patch) | |
tree | 81b1ee2fcb67980376d03f0aa48e42e53abff222 /decoder | |
parent | be7f57fdd484e063775d7abf083b9fa4c403b610 (diff) | |
parent | 96fedabebafe7a38a6d5928be8fff767e411d705 (diff) |
fixed conflicts
Diffstat (limited to 'decoder')
66 files changed, 1145 insertions, 826 deletions
diff --git a/decoder/Jamfile b/decoder/Jamfile deleted file mode 100644 index da02d063..00000000 --- a/decoder/Jamfile +++ /dev/null @@ -1,81 +0,0 @@ -import testing ; -import lex ; -import option ; - -if [ option.get "with-glc" ] { - glc = ff_glc.cc string_util.cc feature-factory.cc ; -} - -lib decoder : - forest_writer.cc - maxtrans_blunsom.cc - cdec_ff.cc - cfg.cc - dwarf.cc - ff_dwarf.cc - rule_lexer.ll - fst_translator.cc - csplit.cc - translator.cc - scfg_translator.cc - hg.cc - hg_io.cc - decoder.cc - hg_intersect.cc - hg_sampler.cc - factored_lexicon_helper.cc - viterbi.cc - lattice.cc - aligner.cc - apply_models.cc - earley_composer.cc - phrasetable_fst.cc - trule.cc - ff.cc - ff_rules.cc - ff_wordset.cc - ff_context.cc - ff_charset.cc - ff_lm.cc - ff_klm.cc - ff_ngrams.cc - ff_spans.cc - ff_ruleshape.cc - ff_wordalign.cc - ff_csplit.cc - ff_tagger.cc - ff_source_syntax.cc - ff_bleu.cc - ff_factory.cc - lexalign.cc - lextrans.cc - tagger.cc - bottom_up_parser.cc - phrasebased_translator.cc - JSON_parser.c - json_parse.cc - grammar.cc - rescore_translator.cc - hg_remove_eps.cc - hg_union.cc - $(glc) - ..//utils - ..//mteval - ../klm/lm//kenlm - ..//boost_program_options - : <include>. - : : - <library>..//utils - <library>..//mteval - <library>../klm/lm//kenlm - <library>..//boost_program_options - <include>. - ; - -exe cdec : cdec.cc decoder ..//utils ..//mteval ../klm/lm//kenlm ..//boost_program_options ; - -all_tests [ glob *_test.cc : cfg_test.cc ] : decoder : <testing.arg>$(TOP)/decoder/test_data ; - -install legacy : cdec - : <location>$(TOP)/cdec <install-type>EXE <install-dependencies>on <link>shared:<dll-path>$(TOP)/cdec <link>shared:<install-type>LIB ; - diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 4a98a4f1..82b50f19 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -5,34 +5,106 @@ noinst_PROGRAMS = \ hg_test \ parser_test \ grammar_test - + TESTS = trule_test parser_test grammar_test hg_test parser_test_SOURCES = parser_test.cc -parser_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +parser_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a grammar_test_SOURCES = grammar_test.cc -grammar_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +grammar_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a hg_test_SOURCES = hg_test.cc -hg_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +hg_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a trule_test_SOURCES = trule_test.cc -trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz +trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEWORK_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a cdec_SOURCES = cdec.cc -cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz +cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a -AM_CPPFLAGS = -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm +AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm rule_lexer.cc: rule_lexer.ll $(LEX) -s -CF -8 -o$@ $< noinst_LIBRARIES = libcdec.a +EXTRA_DIST = test_data rule_lexer.ll + libcdec_a_SOURCES = \ + JSON_parser.h \ + aligner.h \ + apply_fsa_models.h \ + apply_models.h \ + bottom_up_parser.h \ + cfg.h \ + cfg_binarize.h \ + cfg_format.h \ + cfg_options.h \ + csplit.h \ + decoder.h \ + dwarf.h \ + earley_composer.h \ + exp_semiring.h \ + factored_lexicon_helper.h \ + ff.h \ + ff_basic.h \ + ff_bleu.h \ + ff_charset.h \ + ff_context.h \ + ff_csplit.h \ + ff_dwarf.h \ + ff_external.h \ + ff_factory.h \ + ff_klm.h \ + ff_lm.h \ + ff_ngrams.h \ + ff_register.h \ + ff_rules.h \ + ff_ruleshape.h \ + ff_sample_fsa.h \ + ff_source_path.h \ + ff_source_syntax.h \ + ff_spans.h \ + ff_tagger.h \ + ff_wordalign.h \ + ff_wordset.h \ + ffset.h \ + forest_writer.h \ + freqdict.h \ + grammar.h \ + hg.h \ + hg_cfg.h \ + hg_intersect.h \ + hg_io.h \ + hg_remove_eps.h \ + hg_sampler.h \ + hg_test.h \ + hg_union.h \ + incremental.h \ + inside_outside.h \ + json_parse.h \ + kbest.h \ + lattice.h \ + lexalign.h \ + lextrans.h \ + nt_span.h \ + oracle_bleu.h \ + phrasebased_translator.h \ + phrasetable_fst.h \ + program_options.h \ + rule_lexer.h \ + sentence_metadata.h \ + sentences.h \ + tagger.h \ + translator.h \ + tromble_loss.h \ + trule.h \ + viterbi.h \ forest_writer.cc \ maxtrans_blunsom.cc \ cdec_ff.cc \ cfg.cc \ dwarf.cc \ ff_dwarf.cc \ + ff_external.cc \ rule_lexer.cc \ fst_translator.cc \ csplit.cc \ @@ -55,6 +127,8 @@ libcdec_a_SOURCES = \ phrasetable_fst.cc \ trule.cc \ ff.cc \ + ffset.cc \ + ff_basic.cc \ ff_rules.cc \ ff_wordset.cc \ ff_context.cc \ @@ -67,9 +141,11 @@ libcdec_a_SOURCES = \ ff_wordalign.cc \ ff_csplit.cc \ ff_tagger.cc \ + ff_source_path.cc \ ff_source_syntax.cc \ ff_bleu.cc \ ff_factory.cc \ + incremental.cc \ lexalign.cc \ lextrans.cc \ tagger.cc \ @@ -78,8 +154,3 @@ libcdec_a_SOURCES = \ JSON_parser.c \ json_parse.cc \ grammar.cc - -if GLC - # Until we build GLC as a library... - libcdec_a_SOURCES += ff_glc.cc string_util.cc feature-factory.cc -endif diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc index 9ba59d1b..330de9e2 100644 --- a/decoder/apply_models.cc +++ b/decoder/apply_models.cc @@ -16,6 +16,7 @@ #include "verbose.h" #include "hg.h" #include "ff.h" +#include "ffset.h" #define NORMAL_CP 1 #define FAST_CP 2 diff --git a/decoder/cdec.cc b/decoder/cdec.cc index 4819105c..cc3fcff1 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -5,6 +5,7 @@ #include "ff_register.h" #include "verbose.h" #include "timing_stats.h" +#include "util/usage.hh" using namespace std; @@ -40,6 +41,7 @@ int main(int argc, char** argv) { cout << FD::Convert(i) << endl; } } + util::PrintUsage(std::cerr); return 0; } diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index b516c386..0bf441d4 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -1,6 +1,7 @@ #include <boost/shared_ptr.hpp> #include "ff.h" +#include "ff_basic.h" #include "ff_context.h" #include "ff_spans.h" #include "ff_lm.h" @@ -13,11 +14,13 @@ #include "ff_rules.h" #include "ff_ruleshape.h" #include "ff_bleu.h" +#include "ff_source_path.h" #include "ff_source_syntax.h" #include "ff_register.h" #include "ff_charset.h" #include "ff_wordset.h" #include "ff_dwarf.h" +#include "ff_external.h" #ifdef HAVE_GLC #include <cdec/ff_glc.h> @@ -47,8 +50,9 @@ void register_feature_functions() { ff_registry.Register("RuleIdentityFeatures", new FFFactory<RuleIdentityFeatures>()); ff_registry.Register("SourceSyntaxFeatures", new FFFactory<SourceSyntaxFeatures>); ff_registry.Register("SourceSpanSizeFeatures", new FFFactory<SourceSpanSizeFeatures>); - ff_registry.Register("RuleNgramFeatures", new FFFactory<RuleNgramFeatures>()); ff_registry.Register("CMR2008ReorderingFeatures", new FFFactory<CMR2008ReorderingFeatures>()); + ff_registry.Register("RuleSourceBigramFeatures", new FFFactory<RuleSourceBigramFeatures>()); + ff_registry.Register("RuleTargetBigramFeatures", new FFFactory<RuleTargetBigramFeatures>()); ff_registry.Register("KLanguageModel", new KLanguageModelFactory()); ff_registry.Register("NonLatinCount", new FFFactory<NonLatinCount>); ff_registry.Register("RuleShape", new FFFactory<RuleShapeFeatures>); @@ -67,8 +71,10 @@ void register_feature_functions() { ff_registry.Register("InputIndicator", new FFFactory<InputIndicator>); ff_registry.Register("LexicalTranslationTrigger", new FFFactory<LexicalTranslationTrigger>); ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>); + ff_registry.Register("SourcePathFeatures", new FFFactory<SourcePathFeatures>); ff_registry.Register("WordSet", new FFFactory<WordSet>); ff_registry.Register("Dwarf", new FFFactory<Dwarf>); + ff_registry.Register("External", new FFFactory<ExternalFeature>); #ifdef HAVE_GLC ff_registry.Register("ContextCRF", new FFFactory<Model1Features>); #endif diff --git a/decoder/cfg.h b/decoder/cfg.h index 8cb29bb9..aeeacb83 100644 --- a/decoder/cfg.h +++ b/decoder/cfg.h @@ -130,7 +130,7 @@ struct CFG { int lhs; // index into nts RHS rhs; prob_t p; // h unused for now (there's nothing admissable, and p is already using 1st pass inside as pushed toward top) - FeatureVector f; // may be empty, unless copy_features on Init + SparseVector<double> f; // may be empty, unless copy_features on Init IF_CFG_TRULE(TRulePtr rule;) int size() const { // for stats only return rhs.size(); diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h index 2f40d483..d12da261 100644 --- a/decoder/cfg_format.h +++ b/decoder/cfg_format.h @@ -100,7 +100,7 @@ struct CFGFormat { } } - void print_features(std::ostream &o,prob_t p,FeatureVector const& fv=FeatureVector()) const { + void print_features(std::ostream &o,prob_t p,SparseVector<double> const& fv=SparseVector<double>()) const { bool logp=(logprob_feat && p!=prob_t::One()); if (features || logp) { o << partsep; diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc index b8f4cf11..cbe7d0be 100644 --- a/decoder/cfg_test.cc +++ b/decoder/cfg_test.cc @@ -25,15 +25,15 @@ struct CFGTest : public TestWithParam<HgW> { Hypergraph hg; CFG cfg; CFGFormat form; - FeatureVector weights; + SparseVector<double> weights; - static void JsonFN(Hypergraph &hg,CFG &cfg,FeatureVector &featw,std::string file + static void JsonFN(Hypergraph &hg,CFG &cfg,SparseVector<double> &featw,std::string file ,std::string const& wts="Model_0 1 EgivenF 1 f1 1") { istringstream ws(wts); EXPECT_TRUE(ws>>featw); CSHOW(featw) - std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); HGSetup::JsonTestFile(&hg,path,file); hg.Reweight(featw); cfg.Init(hg,true,true,false); diff --git a/decoder/decoder.cc b/decoder/decoder.cc index a69a6d05..31e6dc46 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -4,6 +4,7 @@ #include <boost/program_options.hpp> #include <boost/program_options/variables_map.hpp> #include <boost/make_shared.hpp> +#include <boost/scoped_ptr.hpp> #include "program_options.h" #include "stringlib.h" @@ -29,6 +30,7 @@ #include "oracle_bleu.h" #include "apply_models.h" #include "ff.h" +#include "ffset.h" #include "ff_factory.h" #include "viterbi.h" #include "kbest.h" @@ -38,6 +40,7 @@ #include "sampler.h" #include "forest_writer.h" // TODO this section should probably be handled by an Observer +#include "incremental.h" #include "hg_io.h" #include "aligner.h" @@ -90,11 +93,6 @@ inline void ShowBanner() { cerr << "cdec v1.0 (c) 2009-2011 by Chris Dyer\n"; } -inline void show_models(po::variables_map const& conf,ModelSet &ms,char const* header) { - cerr<<header<<": "; - ms.show_features(cerr,cerr,conf.count("warn_0_weight")); -} - inline string str(char const* name,po::variables_map const& conf) { return conf[name].as<string>(); } @@ -132,7 +130,7 @@ inline boost::shared_ptr<FeatureFunction> make_ff(string const& ffp,bool verbose } boost::shared_ptr<FeatureFunction> pf = ff_registry.Create(ff, param); if (!pf) exit(1); - int nbyte=pf->NumBytesContext(); + int nbyte=pf->StateSize(); if (verbose_feature_functions && !SILENT) cerr<<"State is "<<nbyte<<" bytes for "<<pre<<"feature "<<ffp<<endl; return pf; @@ -328,6 +326,8 @@ struct DecoderImpl { bool feature_expectations; // TODO Observer bool output_training_vector; // TODO Observer bool remove_intersected_rule_annotations; + boost::scoped_ptr<IncrementalBase> incremental; + static void ConvertSV(const SparseVector<prob_t>& src, SparseVector<double>* trg) { for (SparseVector<prob_t>::const_iterator it = src.begin(); it != src.end(); ++it) @@ -415,6 +415,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream ("show_conditional_prob", "Output the conditional log prob to STDOUT instead of a translation") ("show_cfg_search_space", "Show the search space as a CFG") ("show_target_graph", po::value<string>(), "Directory to write the target hypergraphs to") + ("incremental_search", po::value<string>(), "Run lazy search with this language model file") ("coarse_to_fine_beam_prune", po::value<double>(), "Prune paths from coarse parse forest before fine parse, keeping paths within exp(alpha>=0)") ("ctf_beam_widen", po::value<double>()->default_value(2.0), "Expand coarse pass beam by this factor if no fine parse is found") ("ctf_num_widenings", po::value<int>()->default_value(2), "Widen coarse beam this many times before backing off to full parse") @@ -642,8 +643,6 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream prev_weights = rp.weight_vector; } rp.models.reset(new ModelSet(*rp.weight_vector, rp.ffs)); - string ps = "Pass1 "; ps[4] += pass; - if (!SILENT) show_models(conf,*rp.models,ps.c_str()); } // show configuration of rescoring passes @@ -731,6 +730,10 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream sent_id = -1; acc_obj = 0; // accumulate objective g_count = 0; // number of gradient pieces computed + + if (conf.count("incremental_search")) { + incremental.reset(IncrementalBase::Load(conf["incremental_search"].as<string>().c_str(), CurrentWeightVector())); + } } Decoder::Decoder(istream* cfg) { pimpl_.reset(new DecoderImpl(conf,0,0,cfg)); } @@ -829,8 +832,16 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { abort(); } - if (conf.count("show_target_graph")) + if (conf.count("show_target_graph")) { HypergraphIO::WriteTarget(conf["show_target_graph"].as<string>(), sent_id, forest); + } + if (conf.count("incremental_search")) { + incremental->Search(pop_limit, forest); + } + if (conf.count("show_target_graph") || conf.count("incremental_search")) { + o->NotifyDecodingComplete(smeta); + return true; + } for (int pass = 0; pass < rescoring_passes.size(); ++pass) { const RescoringPass& rp = rescoring_passes[pass]; @@ -871,13 +882,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { if (rp.fid_summary) { if (summary_feature_type == kEDGE_PROB) { const prob_t z = forest.PushWeightsToGoal(1.0); - if (!isfinite(log(z)) || isnan(log(z))) { + if (!std::isfinite(log(z)) || std::isnan(log(z))) { cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n"; } else { for (int i = 0; i < forest.edges_.size(); ++i) { const double log_prob_transition = log(forest.edges_[i].edge_prob_); // locally normalized by the edge // head node by forest.PushWeightsToGoal - if (!isfinite(log_prob_transition) || isnan(log_prob_transition)) { + if (!std::isfinite(log_prob_transition) || std::isnan(log_prob_transition)) { cerr << "Edge: i=" << i << " got bad inside prob: " << *forest.edges_[i].rule_ << endl; abort(); } @@ -889,7 +900,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { } else if (summary_feature_type == kNODE_RISK) { Hypergraph::EdgeProbs posts; const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts); - if (!isfinite(log(z)) || isnan(log(z))) { + if (!std::isfinite(log(z)) || std::isnan(log(z))) { cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n"; } else { for (int i = 0; i < forest.nodes_.size(); ++i) { @@ -898,7 +909,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { for (int j = 0; j < in_edges.size(); ++j) node_post += (posts[in_edges[j]] / z); const double log_np = log(node_post); - if (!isfinite(log_np) || isnan(log_np)) { + if (!std::isfinite(log_np) || std::isnan(log_np)) { cerr << "got bad posterior prob for node " << i << endl; abort(); } @@ -913,13 +924,13 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { } else if (summary_feature_type == kEDGE_RISK) { Hypergraph::EdgeProbs posts; const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts); - if (!isfinite(log(z)) || isnan(log(z))) { + if (!std::isfinite(log(z)) || std::isnan(log(z))) { cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n"; } else { assert(posts.size() == forest.edges_.size()); for (int i = 0; i < posts.size(); ++i) { const double log_np = log(posts[i] / z); - if (!isfinite(log_np) || isnan(log_np)) { + if (!std::isfinite(log_np) || std::isnan(log_np)) { cerr << "got bad posterior prob for node " << i << endl; abort(); } @@ -959,7 +970,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { // Oracle Rescoring if(get_oracle_forest) { - assert(!"this is broken"); FeatureVector dummy; // = last_weights + assert(!"this is broken"); SparseVector<double> dummy; // = last_weights Oracle oc=oracle.ComputeOracle(smeta,&forest,dummy,10,conf["forest_output"].as<std::string>()); if (!SILENT) cerr << " +Oracle BLEU forest (nodes/edges): " << forest.nodes_.size() << '/' << forest.edges_.size() << endl; if (!SILENT) cerr << " +Oracle BLEU (paths): " << forest.NumberOfPaths() << endl; @@ -1090,7 +1101,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { cerr << "DIFF. ERR! log_z < log_ref_z: " << log_z << " " << log_ref_z << endl; exit(1); } - assert(!isnan(log_ref_z)); + assert(!std::isnan(log_ref_z)); ref_exp -= full_exp; acc_vec += ref_exp; acc_obj += (log_z - log_ref_z); diff --git a/decoder/decoder.h b/decoder/decoder.h index bef2ff5e..79c7a602 100644 --- a/decoder/decoder.h +++ b/decoder/decoder.h @@ -24,7 +24,7 @@ private: #endif class SentenceMetadata; -struct Hypergraph; +class Hypergraph; struct DecoderImpl; struct DecoderObserver { diff --git a/decoder/exp_semiring.h b/decoder/exp_semiring.h index 111eaaf1..2a9034bb 100644 --- a/decoder/exp_semiring.h +++ b/decoder/exp_semiring.h @@ -59,7 +59,7 @@ struct PRWeightFunction { explicit PRWeightFunction(const PWeightFunction& pwf = PWeightFunction(), const RWeightFunction& rwf = RWeightFunction()) : pweight(pwf), rweight(rwf) {} - PRPair<P,R> operator()(const Hypergraph::Edge& e) const { + PRPair<P,R> operator()(const HG::Edge& e) const { const P p = pweight(e); const R r = rweight(e); return PRPair<P,R>(p, r * p); diff --git a/decoder/ff.cc b/decoder/ff.cc index 557e0b5f..a6a035b5 100644 --- a/decoder/ff.cc +++ b/decoder/ff.cc @@ -1,9 +1,3 @@ -//TODO: non-sparse vector for all feature functions? modelset applymodels keeps track of who has what features? it's nice having FF that could generate a handful out of 10000 possible feats, though. - -//TODO: actually score rule_feature()==true features once only, hash keyed on rule or modify TRule directly? need to keep clear in forest which features come from models vs. rules; then rescoring could drop all the old models features at once - -#include "fast_lexical_cast.hpp" -#include <stdexcept> #include "ff.h" #include "tdict.h" @@ -16,8 +10,7 @@ FeatureFunction::~FeatureFunction() {} void FeatureFunction::PrepareForInput(const SentenceMetadata&) {} void FeatureFunction::FinalTraversalFeatures(const void* /* ant_state */, - SparseVector<double>* /* features */) const { -} + SparseVector<double>* /* features */) const {} string FeatureFunction::usage_helper(std::string const& name,std::string const& params,std::string const& details,bool sp,bool sd) { string r=name; @@ -32,188 +25,14 @@ string FeatureFunction::usage_helper(std::string const& name,std::string const& return r; } -Features FeatureFunction::single_feature(WordID feat) { - return Features(1,feat); -} - -Features ModelSet::all_features(std::ostream *warn,bool warn0) { - //return ::all_features(models_,weights_,warn,warn0); -} - -void show_features(Features const& ffs,DenseWeightVector const& weights_,std::ostream &out,std::ostream &warn,bool warn_zero_wt) { - out << "Weight Feature\n"; - for (unsigned i=0;i<ffs.size();++i) { - WordID fid=ffs[i]; - string const& fname=FD::Convert(fid); - double wt=weights_[fid]; - if (warn_zero_wt && wt==0) - warn<<"WARNING: "<<fname<<" has 0 weight."<<endl; - out << wt << " " << fname<<endl; - } -} - -void ModelSet::show_features(std::ostream &out,std::ostream &warn,bool warn_zero_wt) -{ -// ::show_features(all_features(),weights_,out,warn,warn_zero_wt); - //show_all_features(models_,weights_,out,warn,warn_zero_wt,warn_zero_wt); -} - -// Hiero and Joshua use log_10(e) as the value, so I do to -WordPenalty::WordPenalty(const string& param) : - fid_(FD::Convert("WordPenalty")), - value_(-1.0 / log(10)) { - if (!param.empty()) { - cerr << "Warning WordPenalty ignoring parameter: " << param << endl; - } -} - -void FeatureFunction::TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector<const void*>& ant_states, - SparseVector<double>* features, - SparseVector<double>* estimated_features, - void* state) const { - throw std::runtime_error("TraversalFeaturesImpl not implemented - override it or TraversalFeaturesLog.\n"); +void FeatureFunction::TraversalFeaturesImpl(const SentenceMetadata&, + const Hypergraph::Edge&, + const std::vector<const void*>&, + SparseVector<double>*, + SparseVector<double>*, + void*) const { + cerr << "TraversalFeaturesImpl not implemented - override it or TraversalFeaturesLog\n"; abort(); } -void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector<const void*>& ant_states, - SparseVector<double>* features, - SparseVector<double>* estimated_features, - void* state) const { - (void) smeta; - (void) ant_states; - (void) state; - (void) estimated_features; - features->set_value(fid_, edge.rule_->EWords() * value_); -} - -SourceWordPenalty::SourceWordPenalty(const string& param) : - fid_(FD::Convert("SourceWordPenalty")), - value_(-1.0 / log(10)) { - if (!param.empty()) { - cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl; - } -} - -Features SourceWordPenalty::features() const { - return single_feature(fid_); -} - -Features WordPenalty::features() const { - return single_feature(fid_); -} - - -void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector<const void*>& ant_states, - SparseVector<double>* features, - SparseVector<double>* estimated_features, - void* state) const { - (void) smeta; - (void) ant_states; - (void) state; - (void) estimated_features; - features->set_value(fid_, edge.rule_->FWords() * value_); -} - -ArityPenalty::ArityPenalty(const std::string& param) : - value_(-1.0 / log(10)) { - string fname = "Arity_"; - unsigned MAX=DEFAULT_MAX_ARITY; - using namespace boost; - if (!param.empty()) - MAX=lexical_cast<unsigned>(param); - for (unsigned i = 0; i <= MAX; ++i) { - WordID fid=FD::Convert(fname+lexical_cast<string>(i)); - fids_.push_back(fid); - } - while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen. doesn't change anything -} - -Features ArityPenalty::features() const { - return Features(fids_.begin(),fids_.end()); -} - -void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector<const void*>& ant_states, - SparseVector<double>* features, - SparseVector<double>* estimated_features, - void* state) const { - (void) smeta; - (void) ant_states; - (void) state; - (void) estimated_features; - unsigned a=edge.Arity(); - features->set_value(a<fids_.size()?fids_[a]:0, value_); -} - -ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) : - models_(models), - weights_(w), - state_size_(0), - model_state_pos_(models.size()) { - for (int i = 0; i < models_.size(); ++i) { - model_state_pos_[i] = state_size_; - state_size_ += models_[i]->NumBytesContext(); - } -} - -void ModelSet::PrepareForInput(const SentenceMetadata& smeta) { - for (int i = 0; i < models_.size(); ++i) - const_cast<FeatureFunction*>(models_[i])->PrepareForInput(smeta); -} - -void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta, - const Hypergraph& /* hg */, - const FFStates& node_states, - Hypergraph::Edge* edge, - FFState* context, - prob_t* combination_cost_estimate) const { - edge->reset_info(); - context->resize(state_size_); - if (state_size_ > 0) { - memset(&(*context)[0], 0, state_size_); - } - SparseVector<double> est_vals; // only computed if combination_cost_estimate is non-NULL - if (combination_cost_estimate) *combination_cost_estimate = prob_t::One(); - for (int i = 0; i < models_.size(); ++i) { - const FeatureFunction& ff = *models_[i]; - void* cur_ff_context = NULL; - vector<const void*> ants(edge->tail_nodes_.size()); - bool has_context = ff.NumBytesContext() > 0; - if (has_context) { - int spos = model_state_pos_[i]; - cur_ff_context = &(*context)[spos]; - for (int i = 0; i < ants.size(); ++i) { - ants[i] = &node_states[edge->tail_nodes_[i]][spos]; - } - } - ff.TraversalFeatures(smeta, *edge, ants, &edge->feature_values_, &est_vals, cur_ff_context); - } - if (combination_cost_estimate) - combination_cost_estimate->logeq(est_vals.dot(weights_)); - edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); -} - -void ModelSet::AddFinalFeatures(const FFState& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const { - assert(1 == edge->rule_->Arity()); - edge->reset_info(); - for (int i = 0; i < models_.size(); ++i) { - const FeatureFunction& ff = *models_[i]; - const void* ant_state = NULL; - bool has_context = ff.NumBytesContext() > 0; - if (has_context) { - int spos = model_state_pos_[i]; - ant_state = &state[spos]; - } - ff.FinalTraversalFeatures(smeta, *edge, ant_state, &edge->feature_values_); - } - edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); -} - diff --git a/decoder/ff.h b/decoder/ff.h index 6c22d39f..3280592e 100644 --- a/decoder/ff.h +++ b/decoder/ff.h @@ -1,79 +1,47 @@ #ifndef _FF_H_ #define _FF_H_ -#define DEBUG_INIT 0 -#if DEBUG_INIT -# include <iostream> -# define DBGINIT(a) do { std::cerr<<a<<"\n"; } while(0) -#else -# define DBGINIT(a) -#endif - -#include <stdint.h> +#include <string> #include <vector> -#include <cstring> -#include "fdict.h" -#include "hg.h" -#include "feature_vector.h" -#include "value_array.h" +#include "sparse_vector.h" +namespace HG { struct Edge; struct Node; } +class Hypergraph; class SentenceMetadata; -class FeatureFunction; // see definition below - -typedef std::vector<WordID> Features; // set of features ids // if you want to develop a new feature, inherit from this class and // override TraversalFeaturesImpl(...). If it's a feature that returns / // depends on context, you may also need to implement // FinalTraversalFeatures(...) class FeatureFunction { + friend class ExternalFeature; public: std::string name_; // set by FF factory using usage() - bool debug_; // also set by FF factory checking param for immediate initial "debug" - //called after constructor, but before name_ and debug_ have been set - virtual void Init() { DBGINIT("default FF::Init name="<<name_); } - virtual void init_name_debug(std::string const& n,bool debug) { - name_=n; - debug_=debug; - } - bool debug() const { return debug_; } FeatureFunction() : state_size_() {} explicit FeatureFunction(int state_size) : state_size_(state_size) {} virtual ~FeatureFunction(); bool IsStateful() const { return state_size_ > 0; } + int StateSize() const { return state_size_; } // override this. not virtual because we want to expose this to factory template for help before creating a FF static std::string usage(bool show_params,bool show_details) { return usage_helper("FIXME_feature_needs_name","[no parameters]","[no documentation yet]",show_params,show_details); } static std::string usage_helper(std::string const& name,std::string const& params,std::string const& details,bool show_params,bool show_details); - static Features single_feature(int feat); -public: - - // stateless feature that doesn't depend on source span: override and return true. then your feature can be precomputed over rules. - virtual bool rule_feature() const { return false; } // called once, per input, before any feature calls to TraversalFeatures, etc. // used to initialize sentence-specific data structures virtual void PrepareForInput(const SentenceMetadata& smeta); - //OVERRIDE THIS: - virtual Features features() const { return single_feature(FD::Convert(name_)); } - // returns the number of bytes of context that this feature function will - // (maximally) use. By default, 0 ("stateless" models in Hiero/Joshua). - // NOTE: this value is fixed for the instance of your class, you cannot - // use different amounts of memory for different nodes in the forest. this will be read as soon as you create a ModelSet, then fixed forever on - inline int NumBytesContext() const { return state_size_; } - // Compute the feature values and (if this applies) the estimates of the // feature values when this edge is used incorporated into a larger context inline void TraversalFeatures(const SentenceMetadata& smeta, - Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, + SparseVector<double>* features, + SparseVector<double>* estimated_features, void* out_state) const { - TraversalFeaturesLog(smeta, edge, ant_contexts, + TraversalFeaturesImpl(smeta, edge, ant_contexts, features, estimated_features, out_state); // TODO it's easy for careless feature function developers to overwrite // the end of their state and clobber someone else's memory. These bugs @@ -83,21 +51,10 @@ public: } // if there's some state left when you transition to the goal state, score - // it here. For example, the language model computes the cost of adding + // it here. For example, a language model might the cost of adding // <s> and </s>. - -protected: virtual void FinalTraversalFeatures(const void* residual_state, - FeatureVector* final_features) const; -public: - //override either this or one of above. - virtual void FinalTraversalFeatures(const SentenceMetadata& /* smeta */, - Hypergraph::Edge& /* edge */, // so you can log() - const void* residual_state, - FeatureVector* final_features) const { - FinalTraversalFeatures(residual_state,final_features); - } - + SparseVector<double>* final_features) const; protected: // context is a pointer to a buffer of size NumBytesContext() that the @@ -107,191 +64,19 @@ public: // of the particular FeatureFunction class. There is one exception: // equality of the contents (i.e., memcmp) is required to determine whether // two states can be combined. - - // by Log, I mean that the edge is non-const only so you can log to it with INFO_EDGE(edge,msg<<"etc."). most features don't use this so implement the below. it has a different name to allow a default implementation without name hiding when inheriting + overriding just 1. - virtual void TraversalFeaturesLog(const SentenceMetadata& smeta, - Hypergraph::Edge& edge, // this is writable only so you can use log() - const std::vector<const void*>& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, - void* context) const { - TraversalFeaturesImpl(smeta,edge,ant_contexts,features,estimated_features,context); - } - - // override above or below. virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - Hypergraph::Edge const& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, + SparseVector<double>* features, + SparseVector<double>* estimated_features, void* context) const; // !!! ONLY call this from subclass *CONSTRUCTORS* !!! void SetStateSize(size_t state_size) { state_size_ = state_size; } - int StateSize() const { return state_size_; } - private: - int state_size_; -}; - - -// word penalty feature, for each word on the E side of a rule, -// add value_ -class WordPenalty : public FeatureFunction { - public: - Features features() const; - WordPenalty(const std::string& param); - static std::string usage(bool p,bool d) { - return usage_helper("WordPenalty","","number of target words (local feature)",p,d); - } - bool rule_feature() const { return true; } - protected: - virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector<const void*>& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, - void* context) const; - private: - const int fid_; - const double value_; -}; - -class SourceWordPenalty : public FeatureFunction { - public: - bool rule_feature() const { return true; } - Features features() const; - SourceWordPenalty(const std::string& param); - static std::string usage(bool p,bool d) { - return usage_helper("SourceWordPenalty","","number of source words (local feature, and meaningless except when input has non-constant number of source words, e.g. segmentation/morphology/speech recognition lattice)",p,d); - } - protected: - virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector<const void*>& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, - void* context) const; - private: - const int fid_; - const double value_; -}; - -#define DEFAULT_MAX_ARITY 9 -#define DEFAULT_MAX_ARITY_STRINGIZE(x) #x -#define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x) -#define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY) - -class ArityPenalty : public FeatureFunction { - public: - bool rule_feature() const { return true; } - Features features() const; - ArityPenalty(const std::string& param); - static std::string usage(bool p,bool d) { - return usage_helper("ArityPenalty","[MaxArity(default " DEFAULT_MAX_ARITY_STR ")]","Indicator feature Arity_N=1 for rule of arity N (local feature). 0<=N<=MaxArity(default " DEFAULT_MAX_ARITY_STR ")",p,d); - } - - protected: - virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector<const void*>& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, - void* context) const; - private: - std::vector<WordID> fids_; - const double value_; -}; - -void show_features(Features const& features,DenseWeightVector const& weights,std::ostream &out,std::ostream &warn,bool warn_zero_wt=true); //show features and weights - -template <class FFp> -Features all_features(std::vector<FFp> const& models_,DenseWeightVector &weights_,std::ostream *warn=0,bool warn_fid_0=false) { - using namespace std; - Features ffs; -#define WARNFF(x) do { if (warn) { *warn << "WARNING: "<< x << endl; } } while(0) - typedef map<WordID,string> FFM; - FFM ff_from; - for (unsigned i=0;i<models_.size();++i) { - string const& ffname=models_[i]->name_; - Features si=models_[i]->features(); - if (si.empty()) { - WARNFF(ffname<<" doesn't yet report any feature IDs - either supply feature weight, or use --no_freeze_feature_set, or implement features() method"); - } - unsigned n0=0; - for (unsigned j=0;j<si.size();++j) { - WordID fid=si[j]; - if (!fid) ++n0; - if (fid >= weights_.size()) - weights_.resize(fid+1); - if (warn_fid_0 || fid) { - pair<FFM::iterator,bool> i_new=ff_from.insert(FFM::value_type(fid,ffname)); - if (i_new.second) { - if (fid) - ffs.push_back(fid); - else - WARNFF("Feature id 0 for "<<ffname<<" (models["<<i<<"]) - probably no weight provided. Don't freeze feature ids to see the name"); - } else { - WARNFF(ffname<<" (models["<<i<<"]) tried to define feature "<<FD::Convert(fid)<<" already defined earlier by "<<i_new.first->second); - } - } - } - if (n0) - WARNFF(ffname<<" (models["<<i<<"]) had "<<n0<<" unused features (--no_freeze_feature_set to see them)"); - } - return ffs; -#undef WARNFF -} - -template <class FFp> -void show_all_features(std::vector<FFp> const& models_,DenseWeightVector &weights_,std::ostream &out,std::ostream &warn,bool warn_fid_0=true,bool warn_zero_wt=true) { - return show_features(all_features(models_,weights_,&warn,warn_fid_0),weights_,out,warn,warn_zero_wt); -} - -typedef ValueArray<uint8_t> FFState; // this is about 10% faster than string. -//typedef std::string FFState; - -//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size) -typedef std::vector<FFState> FFStates; - -// this class is a set of FeatureFunctions that can be used to score, rescore, -// etc. a (translation?) forest -class ModelSet { - public: - ModelSet(const std::vector<double>& weights, - const std::vector<const FeatureFunction*>& models); - - // sets edge->feature_values_ and edge->edge_prob_ - // NOTE: edge must not necessarily be in hg.edges_ but its TAIL nodes - // must be. edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone - void AddFeaturesToEdge(const SentenceMetadata& smeta, - const Hypergraph& hg, - const FFStates& node_states, - Hypergraph::Edge* edge, - FFState* residual_context, - prob_t* combination_cost_estimate = NULL) const; - - //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM)) - void AddFinalFeatures(const FFState& residual_context, - Hypergraph::Edge* edge, - SentenceMetadata const& smeta) const; - - // this is called once before any feature functions apply to a hypergraph - // it can be used to initialize sentence-specific data structures - void PrepareForInput(const SentenceMetadata& smeta); - - bool empty() const { return models_.empty(); } - - bool stateless() const { return !state_size_; } - Features all_features(std::ostream *warnings=0,bool warn_fid_zero=false); // this will warn about duplicate features as well (one function overwrites the feature of another). also resizes weights_ so it is large enough to hold the (0) weight for the largest reported feature id. since 0 is a NULL feature id, it's never included. if warn_fid_zero, then even the first 0 id is - void show_features(std::ostream &out,std::ostream &warn,bool warn_zero_wt=true); - private: - std::vector<const FeatureFunction*> models_; - const std::vector<double>& weights_; int state_size_; - std::vector<int> model_state_pos_; }; #endif diff --git a/decoder/ff_basic.cc b/decoder/ff_basic.cc new file mode 100644 index 00000000..f9404d24 --- /dev/null +++ b/decoder/ff_basic.cc @@ -0,0 +1,80 @@ +#include "ff_basic.h" + +#include "fast_lexical_cast.hpp" +#include "hg.h" + +using namespace std; + +// Hiero and Joshua use log_10(e) as the value, so I do to +WordPenalty::WordPenalty(const string& param) : + fid_(FD::Convert("WordPenalty")), + value_(-1.0 / log(10)) { + if (!param.empty()) { + cerr << "Warning WordPenalty ignoring parameter: " << param << endl; + } +} + +void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_states, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* state) const { + (void) smeta; + (void) ant_states; + (void) state; + (void) estimated_features; + features->set_value(fid_, edge.rule_->EWords() * value_); +} + + +SourceWordPenalty::SourceWordPenalty(const string& param) : + fid_(FD::Convert("SourceWordPenalty")), + value_(-1.0 / log(10)) { + if (!param.empty()) { + cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl; + } +} + +void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_states, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* state) const { + (void) smeta; + (void) ant_states; + (void) state; + (void) estimated_features; + features->set_value(fid_, edge.rule_->FWords() * value_); +} + + +ArityPenalty::ArityPenalty(const std::string& param) : + value_(-1.0 / log(10)) { + string fname = "Arity_"; + unsigned MAX=DEFAULT_MAX_ARITY; + using namespace boost; + if (!param.empty()) + MAX=lexical_cast<unsigned>(param); + for (unsigned i = 0; i <= MAX; ++i) { + WordID fid=FD::Convert(fname+lexical_cast<string>(i)); + fids_.push_back(fid); + } + while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen. doesn't change anything +} + +void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_states, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* state) const { + (void) smeta; + (void) ant_states; + (void) state; + (void) estimated_features; + unsigned a=edge.Arity(); + features->set_value(a<fids_.size()?fids_[a]:0, value_); +} + diff --git a/decoder/ff_basic.h b/decoder/ff_basic.h new file mode 100644 index 00000000..901c0110 --- /dev/null +++ b/decoder/ff_basic.h @@ -0,0 +1,68 @@ +#ifndef _FF_BASIC_H_ +#define _FF_BASIC_H_ + +#include "ff.h" + +// word penalty feature, for each word on the E side of a rule, +// add value_ +class WordPenalty : public FeatureFunction { + public: + WordPenalty(const std::string& param); + static std::string usage(bool p,bool d) { + return usage_helper("WordPenalty","","number of target words (local feature)",p,d); + } + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + private: + const int fid_; + const double value_; +}; + +class SourceWordPenalty : public FeatureFunction { + public: + SourceWordPenalty(const std::string& param); + static std::string usage(bool p,bool d) { + return usage_helper("SourceWordPenalty","","number of source words (local feature, and meaningless except when input has non-constant number of source words, e.g. segmentation/morphology/speech recognition lattice)",p,d); + } + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + private: + const int fid_; + const double value_; +}; + +#define DEFAULT_MAX_ARITY 9 +#define DEFAULT_MAX_ARITY_STRINGIZE(x) #x +#define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x) +#define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY) + +class ArityPenalty : public FeatureFunction { + public: + ArityPenalty(const std::string& param); + static std::string usage(bool p,bool d) { + return usage_helper("ArityPenalty","[MaxArity(default " DEFAULT_MAX_ARITY_STR ")]","Indicator feature Arity_N=1 for rule of arity N (local feature). 0<=N<=MaxArity(default " DEFAULT_MAX_ARITY_STR ")",p,d); + } + + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + private: + std::vector<WordID> fids_; + const double value_; +}; + +#endif diff --git a/decoder/ff_bleu.h b/decoder/ff_bleu.h index 5544920e..344dc788 100644 --- a/decoder/ff_bleu.h +++ b/decoder/ff_bleu.h @@ -20,7 +20,7 @@ class BLEUModel : public FeatureFunction { static std::string usage(bool param,bool verbose); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ff_charset.cc b/decoder/ff_charset.cc index 472de82b..6429088b 100644 --- a/decoder/ff_charset.cc +++ b/decoder/ff_charset.cc @@ -1,5 +1,7 @@ #include "ff_charset.h" +#include "tdict.h" +#include "hg.h" #include "fdict.h" #include "stringlib.h" @@ -20,8 +22,8 @@ bool ContainsNonLatin(const string& word) { void NonLatinCount::TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, const std::vector<const void*>& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, + SparseVector<double>* features, + SparseVector<double>* estimated_features, void* context) const { const vector<WordID>& e = edge.rule_->e(); int count = 0; diff --git a/decoder/ff_charset.h b/decoder/ff_charset.h index b1ad537e..267ef65d 100644 --- a/decoder/ff_charset.h +++ b/decoder/ff_charset.h @@ -13,10 +13,10 @@ class NonLatinCount : public FeatureFunction { NonLatinCount(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, + SparseVector<double>* features, + SparseVector<double>* estimated_features, void* context) const; private: mutable std::map<WordID, bool> is_non_latin_; diff --git a/decoder/ff_context.cc b/decoder/ff_context.cc index 9de4d737..f2b0e67c 100644 --- a/decoder/ff_context.cc +++ b/decoder/ff_context.cc @@ -5,12 +5,14 @@ #include <cassert> #include <cmath> +#include "hg.h" #include "filelib.h" #include "stringlib.h" #include "sentence_metadata.h" #include "lattice.h" #include "fdict.h" #include "verbose.h" +#include "tdict.h" RuleContextFeatures::RuleContextFeatures(const string& param) { // cerr << "initializing RuleContextFeatures with parameters: " << param; diff --git a/decoder/ff_context.h b/decoder/ff_context.h index 89bcb557..19198ec3 100644 --- a/decoder/ff_context.h +++ b/decoder/ff_context.h @@ -14,7 +14,7 @@ class RuleContextFeatures : public FeatureFunction { RuleContextFeatures(const string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc index 252dbf8c..e6f78f84 100644 --- a/decoder/ff_csplit.cc +++ b/decoder/ff_csplit.cc @@ -5,6 +5,7 @@ #include "klm/lm/model.hh" +#include "hg.h" #include "sentence_metadata.h" #include "lattice.h" #include "tdict.h" diff --git a/decoder/ff_csplit.h b/decoder/ff_csplit.h index 38c0c5b8..64d42526 100644 --- a/decoder/ff_csplit.h +++ b/decoder/ff_csplit.h @@ -12,7 +12,7 @@ class BasicCSplitFeatures : public FeatureFunction { BasicCSplitFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -27,7 +27,7 @@ class ReverseCharLMCSplitFeature : public FeatureFunction { ReverseCharLMCSplitFeature(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ff_dwarf.cc b/decoder/ff_dwarf.cc index 43528405..fe7a472e 100644 --- a/decoder/ff_dwarf.cc +++ b/decoder/ff_dwarf.cc @@ -4,6 +4,7 @@ #include <string> #include <iostream> #include <map> +#include "hg.h" #include "ff_dwarf.h" #include "dwarf.h" #include "wordid.h" diff --git a/decoder/ff_dwarf.h b/decoder/ff_dwarf.h index 083fcc7c..3d6a7da6 100644 --- a/decoder/ff_dwarf.h +++ b/decoder/ff_dwarf.h @@ -56,7 +56,7 @@ class Dwarf : public FeatureFunction { function word alignments set by 3. */ void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ff_external.cc b/decoder/ff_external.cc new file mode 100644 index 00000000..dea0e20f --- /dev/null +++ b/decoder/ff_external.cc @@ -0,0 +1,60 @@ +#include "ff_external.h" + +#include <dlfcn.h> + +#include "stringlib.h" +#include "hg.h" + +using namespace std; + +ExternalFeature::ExternalFeature(const string& param) { + size_t pos = param.find(' '); + string nparam; + string file = param; + if (pos < param.size()) { + nparam = Trim(param.substr(pos + 1)); + file = param.substr(0, pos); + } + if (file.size() < 1) { + cerr << "External requires a path to a dynamic library!\n"; + abort(); + } + lib_handle = dlopen(file.c_str(), RTLD_LAZY); + if (!lib_handle) { + cerr << "dlopen reports: " << dlerror() << endl; + cerr << "Did you provide a full path to the dynamic library?\n"; + abort(); + } + FeatureFunction* (*fn)(const string&) = + (FeatureFunction* (*)(const string&))(dlsym(lib_handle, "create_ff")); + if (!fn) { + cerr << "dlsym reports: " << dlerror() << endl; + abort(); + } + ff_ext = (*fn)(nparam); + SetStateSize(ff_ext->StateSize()); +} + +ExternalFeature::~ExternalFeature() { + delete ff_ext; + dlclose(lib_handle); +} + +void ExternalFeature::PrepareForInput(const SentenceMetadata& smeta) { + ff_ext->PrepareForInput(smeta); +} + +void ExternalFeature::FinalTraversalFeatures(const void* context, + SparseVector<double>* features) const { + ff_ext->FinalTraversalFeatures(context, features); +} + +void ExternalFeature::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const { + ff_ext->TraversalFeaturesImpl(smeta, edge, ant_contexts, features, estimated_features, context); +} + diff --git a/decoder/ff_external.h b/decoder/ff_external.h new file mode 100644 index 00000000..3e2bee51 --- /dev/null +++ b/decoder/ff_external.h @@ -0,0 +1,26 @@ +#ifndef _FFEXTERNAL_H_ +#define _FFEXTERNAL_H_ + +#include "ff.h" + +// dynamically loaded feature function +class ExternalFeature : public FeatureFunction { + public: + ExternalFeature(const std::string& param); + ~ExternalFeature(); + virtual void PrepareForInput(const SentenceMetadata& smeta); + virtual void FinalTraversalFeatures(const void* context, + SparseVector<double>* features) const; + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + private: + void* lib_handle; + FeatureFunction* ff_ext; +}; + +#endif diff --git a/decoder/ff_factory.h b/decoder/ff_factory.h index 5eb68c8b..bfdd3257 100644 --- a/decoder/ff_factory.h +++ b/decoder/ff_factory.h @@ -43,7 +43,6 @@ template<class FF> struct FFFactory : public FactoryBase<FeatureFunction> { FP Create(std::string param) const { FF *ret=new FF(param); - ret->Init(); return FP(ret); } virtual std::string usage(bool params,bool verbose) const { @@ -57,7 +56,6 @@ template<class FF> struct FsaFactory : public FactoryBase<FsaFeatureFunction> { FP Create(std::string param) const { FF *ret=new FF(param); - ret->Init(); return FP(ret); } virtual std::string usage(bool params,bool verbose) const { @@ -98,8 +96,6 @@ struct FactoryRegistry : public UntypedFactoryRegistry { if (debug) cerr<<"debug enabled for "<<ffname<< " - remaining options: '"<<param<<"'\n"; FP res = dynamic_cast<FB const&>(*it->second).Create(param); - res->init_name_debug(ffname,debug); - // could add a res->Init() here instead of in Create if we wanted feature id to potentially differ based on the registered name rather than static usage() - of course, specific feature ids can be computed on the basis of feature param as well; this only affects the default single feature id=name return res; } }; diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc index 09ef282c..fefa90bd 100644 --- a/decoder/ff_klm.cc +++ b/decoder/ff_klm.cc @@ -327,11 +327,6 @@ KLanguageModel<Model>::KLanguageModel(const string& param) { } template <class Model> -Features KLanguageModel<Model>::features() const { - return single_feature(fid_); -} - -template <class Model> KLanguageModel<Model>::~KLanguageModel() { delete pimpl_; } @@ -362,7 +357,6 @@ void KLanguageModel<Model>::FinalTraversalFeatures(const void* ant_state, template <class Model> boost::shared_ptr<FeatureFunction> CreateModel(const std::string ¶m) { KLanguageModel<Model> *ret = new KLanguageModel<Model>(param); - ret->Init(); return boost::shared_ptr<FeatureFunction>(ret); } diff --git a/decoder/ff_klm.h b/decoder/ff_klm.h index 6efe50f6..b5ceffd0 100644 --- a/decoder/ff_klm.h +++ b/decoder/ff_klm.h @@ -20,10 +20,9 @@ class KLanguageModel : public FeatureFunction { virtual void FinalTraversalFeatures(const void* context, SparseVector<double>* features) const; static std::string usage(bool param,bool verbose); - Features features() const; protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index 5e16d4e3..6ec7b4f3 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -519,10 +519,6 @@ LanguageModel::LanguageModel(const string& param) { SetStateSize(LanguageModelImpl::OrderToStateSize(order)); } -Features LanguageModel::features() const { - return single_feature(fid_); -} - LanguageModel::~LanguageModel() { delete pimpl_; } diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h index ccee4268..94e18f00 100644 --- a/decoder/ff_lm.h +++ b/decoder/ff_lm.h @@ -55,10 +55,9 @@ class LanguageModel : public FeatureFunction { SparseVector<double>* features) const; std::string DebugStateToString(const void* state) const; static std::string usage(bool param,bool verbose); - Features features() const; protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -81,7 +80,7 @@ class LanguageModelRandLM : public FeatureFunction { std::string DebugStateToString(const void* state) const; protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ff_ngrams.cc b/decoder/ff_ngrams.cc index 9c13fdbb..d337b28b 100644 --- a/decoder/ff_ngrams.cc +++ b/decoder/ff_ngrams.cc @@ -60,7 +60,7 @@ namespace { } } -static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order, vector<string>& prefixes, string& target_separator) { +static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order, vector<string>& prefixes, string& target_separator, string* cluster_file) { vector<string> const& argv=SplitOnWhitespace(in); *explicit_markers = false; *order = 3; @@ -103,6 +103,10 @@ static bool ParseArgs(string const& in, bool* explicit_markers, unsigned* order, LMSPEC_NEXTARG; prefixes[5] = *i; break; + case 'c': + LMSPEC_NEXTARG; + *cluster_file = *i; + break; case 'S': LMSPEC_NEXTARG; target_separator = *i; @@ -124,6 +128,7 @@ usage: << "NgramFeatures Usage: \n" << " feature_function=NgramFeatures filename.lm [-x] [-o <order>] \n" + << " [-c <cluster-file>]\n" << " [-U <unigram-prefix>] [-B <bigram-prefix>][-T <trigram-prefix>]\n" << " [-4 <4-gram-prefix>] [-5 <5-gram-prefix>] [-S <separator>]\n\n" @@ -203,6 +208,12 @@ class NgramDetectorImpl { SetFlag(flag, HAS_FULL_CONTEXT, state); } + WordID MapToClusterIfNecessary(WordID w) const { + if (cluster_map.size() == 0) return w; + if (w >= cluster_map.size()) return kCDEC_UNK; + return cluster_map[w]; + } + void FireFeatures(const State<5>& state, WordID cur, SparseVector<double>* feats) { FidTree* ft = &fidroot_; int n = 0; @@ -285,7 +296,7 @@ class NgramDetectorImpl { context_complete = true; } } else { // handle terminal - const WordID cur_word = e[j]; + const WordID cur_word = MapToClusterIfNecessary(e[j]); SparseVector<double> p; if (cur_word == kSOS_) { state = BeginSentenceState(); @@ -348,9 +359,52 @@ class NgramDetectorImpl { } } + void ReadClusterFile(const string& clusters) { + ReadFile rf(clusters); + istream& in = *rf.stream(); + string line; + int lc = 0; + string cluster; + string word; + while(getline(in, line)) { + ++lc; + if (line.size() == 0) continue; + if (line[0] == '#') continue; + unsigned cend = 1; + while((line[cend] != ' ' && line[cend] != '\t') && cend < line.size()) { + ++cend; + } + if (cend == line.size()) { + cerr << "Line " << lc << " in " << clusters << " malformed: " << line << endl; + abort(); + } + unsigned wbeg = cend + 1; + while((line[wbeg] == ' ' || line[wbeg] == '\t') && wbeg < line.size()) { + ++wbeg; + } + if (wbeg == line.size()) { + cerr << "Line " << lc << " in " << clusters << " malformed: " << line << endl; + abort(); + } + unsigned wend = wbeg + 1; + while((line[wend] != ' ' && line[wend] != '\t') && wend < line.size()) { + ++wend; + } + const WordID clusterid = TD::Convert(line.substr(0, cend)); + const WordID wordid = TD::Convert(line.substr(wbeg, wend - wbeg)); + if (wordid >= cluster_map.size()) + cluster_map.resize(wordid + 10, kCDEC_UNK); + cluster_map[wordid] = clusterid; + } + cluster_map[kSOS_] = kSOS_; + cluster_map[kEOS_] = kEOS_; + } + + vector<WordID> cluster_map; + public: explicit NgramDetectorImpl(bool explicit_markers, unsigned order, - vector<string>& prefixes, string& target_separator) : + vector<string>& prefixes, string& target_separator, const string& clusters) : kCDEC_UNK(TD::Convert("<unk>")) , add_sos_eos_(!explicit_markers) { order_ = order; @@ -369,6 +423,9 @@ class NgramDetectorImpl { dummy_rule_.reset(new TRule("[DUMMY] ||| [BOS] [DUMMY] ||| [1] [2] </s> ||| X=0")); kSOS_ = TD::Convert("<s>"); kEOS_ = TD::Convert("</s>"); + + if (clusters.size()) + ReadClusterFile(clusters); } ~NgramDetectorImpl() { @@ -409,9 +466,10 @@ NgramDetector::NgramDetector(const string& param) { vector<string> prefixes; bool explicit_markers = false; unsigned order = 3; - ParseArgs(param, &explicit_markers, &order, prefixes, target_separator); + string clusters; + ParseArgs(param, &explicit_markers, &order, prefixes, target_separator, &clusters); pimpl_ = new NgramDetectorImpl(explicit_markers, order, prefixes, - target_separator); + target_separator, clusters); SetStateSize(pimpl_->ReserveStateSize()); } diff --git a/decoder/ff_ngrams.h b/decoder/ff_ngrams.h index 064dbb49..4965d235 100644 --- a/decoder/ff_ngrams.h +++ b/decoder/ff_ngrams.h @@ -17,7 +17,7 @@ class NgramDetector : public FeatureFunction { SparseVector<double>* features) const; protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc index bd4c4cc0..410e083c 100644 --- a/decoder/ff_rules.cc +++ b/decoder/ff_rules.cc @@ -10,6 +10,8 @@ #include "lattice.h" #include "fdict.h" #include "verbose.h" +#include "tdict.h" +#include "hg.h" using namespace std; @@ -66,15 +68,15 @@ void RuleIdentityFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, features->add_value(it->second, 1); } -RuleNgramFeatures::RuleNgramFeatures(const std::string& param) { +RuleSourceBigramFeatures::RuleSourceBigramFeatures(const std::string& param) { } -void RuleNgramFeatures::PrepareForInput(const SentenceMetadata& smeta) { +void RuleSourceBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) { // std::map<const TRule*, SparseVector<double> > rule2_feats_.clear(); } -void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, +void RuleSourceBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, const vector<const void*>& ant_contexts, SparseVector<double>* features, @@ -92,14 +94,64 @@ void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, assert(w > 0); const string& cur = TD::Convert(w); ostringstream os; - os << "RB:" << prev << '_' << cur; + os << "RBS:" << prev << '_' << cur; const int fid = FD::Convert(Escape(os.str())); if (fid <= 0) return; f.add_value(fid, 1.0); prev = cur; } ostringstream os; - os << "RB:" << prev << '_' << "</r>"; + os << "RBS:" << prev << '_' << "</r>"; + f.set_value(FD::Convert(Escape(os.str())), 1.0); + } + (*features) += it->second; +} + +RuleTargetBigramFeatures::RuleTargetBigramFeatures(const std::string& param) : inds(1000) { + for (unsigned i = 0; i < inds.size(); ++i) { + ostringstream os; + os << (i + 1); + inds[i] = os.str(); + } +} + +void RuleTargetBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) { + rule2_feats_.clear(); +} + +void RuleTargetBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const { + map<const TRule*, SparseVector<double> >::iterator it = rule2_feats_.find(edge.rule_.get()); + if (it == rule2_feats_.end()) { + const TRule& rule = *edge.rule_; + it = rule2_feats_.insert(make_pair(&rule, SparseVector<double>())).first; + SparseVector<double>& f = it->second; + string prev = "<r>"; + vector<WordID> nt_types(rule.Arity()); + unsigned ntc = 0; + for (int i = 0; i < rule.f_.size(); ++i) + if (rule.f_[i] < 0) nt_types[ntc++] = -rule.f_[i]; + for (int i = 0; i < rule.e_.size(); ++i) { + WordID w = rule.e_[i]; + string cur; + if (w > 0) { + cur = TD::Convert(w); + } else { + cur = TD::Convert(nt_types[-w]) + inds[-w]; + } + ostringstream os; + os << "RBT:" << prev << '_' << cur; + const int fid = FD::Convert(Escape(os.str())); + if (fid <= 0) return; + f.add_value(fid, 1.0); + prev = cur; + } + ostringstream os; + os << "RBT:" << prev << '_' << "</r>"; f.set_value(FD::Convert(Escape(os.str())), 1.0); } (*features) += it->second; diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index 48d8bd05..f210dc65 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -3,7 +3,9 @@ #include <vector> #include <map> +#include "trule.h" #include "ff.h" +#include "hg.h" #include "array2d.h" #include "wordid.h" @@ -12,7 +14,7 @@ class RuleIdentityFeatures : public FeatureFunction { RuleIdentityFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -22,9 +24,9 @@ class RuleIdentityFeatures : public FeatureFunction { mutable std::map<const TRule*, int> rule2_fid_; }; -class RuleNgramFeatures : public FeatureFunction { +class RuleSourceBigramFeatures : public FeatureFunction { public: - RuleNgramFeatures(const std::string& param); + RuleSourceBigramFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, @@ -37,4 +39,20 @@ class RuleNgramFeatures : public FeatureFunction { mutable std::map<const TRule*, SparseVector<double> > rule2_feats_; }; +class RuleTargetBigramFeatures : public FeatureFunction { + public: + RuleTargetBigramFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + virtual void PrepareForInput(const SentenceMetadata& smeta); + private: + std::vector<std::string> inds; + mutable std::map<const TRule*, SparseVector<double> > rule2_feats_; +}; + #endif diff --git a/decoder/ff_ruleshape.cc b/decoder/ff_ruleshape.cc index f56ccfa9..7bb548c4 100644 --- a/decoder/ff_ruleshape.cc +++ b/decoder/ff_ruleshape.cc @@ -1,5 +1,7 @@ #include "ff_ruleshape.h" +#include "trule.h" +#include "hg.h" #include "fdict.h" #include <sstream> diff --git a/decoder/ff_ruleshape.h b/decoder/ff_ruleshape.h index 23c9827e..9f20faf3 100644 --- a/decoder/ff_ruleshape.h +++ b/decoder/ff_ruleshape.h @@ -9,7 +9,7 @@ class RuleShapeFeatures : public FeatureFunction { RuleShapeFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ff_source_path.cc b/decoder/ff_source_path.cc new file mode 100644 index 00000000..2a3bee2e --- /dev/null +++ b/decoder/ff_source_path.cc @@ -0,0 +1,42 @@ +#include "ff_source_path.h" + +#include "hg.h" + +using namespace std; + +SourcePathFeatures::SourcePathFeatures(const string& param) : FeatureFunction(sizeof(int)) {} + +void SourcePathFeatures::FireBigramFeature(WordID prev, WordID cur, SparseVector<double>* features) const { + int& fid = bigram_fids[prev][cur]; + if (!fid) fid = FD::Convert("SB:"+TD::Convert(prev) + "_" + TD::Convert(cur)); + if (fid) features->add_value(fid, 1.0); +} + +void SourcePathFeatures::FireUnigramFeature(WordID cur, SparseVector<double>* features) const { + int& fid = unigram_fids[cur]; + if (!fid) fid = FD::Convert("SU:" + TD::Convert(cur)); + if (fid) features->add_value(fid, 1.0); +} + +void SourcePathFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const { + WordID* res = reinterpret_cast<WordID*>(context); + const vector<int>& f = edge.rule_->f(); + int prev = 0; + unsigned ntc = 0; + for (unsigned i = 0; i < f.size(); ++i) { + int cur = f[i]; + if (cur < 0) + cur = *reinterpret_cast<const WordID*>(ant_contexts[ntc++]); + else + FireUnigramFeature(cur, features); + if (prev) FireBigramFeature(prev, cur, features); + prev = cur; + } + *res = prev; +} + diff --git a/decoder/ff_source_path.h b/decoder/ff_source_path.h new file mode 100644 index 00000000..03126412 --- /dev/null +++ b/decoder/ff_source_path.h @@ -0,0 +1,26 @@ +#ifndef _FF_SOURCE_PATH_H_ +#define _FF_SOURCE_PATH_H_ + +#include <vector> +#include <map> +#include "ff.h" + +class SourcePathFeatures : public FeatureFunction { + public: + SourcePathFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + + private: + void FireBigramFeature(WordID prev, WordID cur, SparseVector<double>* features) const; + void FireUnigramFeature(WordID cur, SparseVector<double>* features) const; + mutable std::map<WordID, std::map<WordID, int> > bigram_fids; + mutable std::map<WordID, int> unigram_fids; +}; + +#endif diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc index 035132b4..a1997695 100644 --- a/decoder/ff_source_syntax.cc +++ b/decoder/ff_source_syntax.cc @@ -3,6 +3,7 @@ #include <sstream> #include <stack> +#include "hg.h" #include "sentence_metadata.h" #include "array2d.h" #include "filelib.h" diff --git a/decoder/ff_source_syntax.h b/decoder/ff_source_syntax.h index 279563e1..a8c7150a 100644 --- a/decoder/ff_source_syntax.h +++ b/decoder/ff_source_syntax.h @@ -11,7 +11,7 @@ class SourceSyntaxFeatures : public FeatureFunction { ~SourceSyntaxFeatures(); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -28,7 +28,7 @@ class SourceSpanSizeFeatures : public FeatureFunction { ~SourceSpanSizeFeatures(); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ff_spans.cc b/decoder/ff_spans.cc index 0483517b..0ccac69b 100644 --- a/decoder/ff_spans.cc +++ b/decoder/ff_spans.cc @@ -4,6 +4,8 @@ #include <cassert> #include <cmath> +#include "hg.h" +#include "tdict.h" #include "filelib.h" #include "stringlib.h" #include "sentence_metadata.h" diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h index 24e0dede..d2f5e84c 100644 --- a/decoder/ff_spans.h +++ b/decoder/ff_spans.h @@ -12,7 +12,7 @@ class SpanFeatures : public FeatureFunction { SpanFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -49,7 +49,7 @@ class CMR2008ReorderingFeatures : public FeatureFunction { CMR2008ReorderingFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc index fd9210fa..7f9af9cd 100644 --- a/decoder/ff_tagger.cc +++ b/decoder/ff_tagger.cc @@ -2,6 +2,7 @@ #include <sstream> +#include "hg.h" #include "tdict.h" #include "sentence_metadata.h" #include "stringlib.h" diff --git a/decoder/ff_tagger.h b/decoder/ff_tagger.h index bd5b62c0..46418b0c 100644 --- a/decoder/ff_tagger.h +++ b/decoder/ff_tagger.h @@ -18,7 +18,7 @@ class Tagger_BigramIndicator : public FeatureFunction { Tagger_BigramIndicator(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -39,7 +39,7 @@ class LexicalPairIndicator : public FeatureFunction { virtual void PrepareForInput(const SentenceMetadata& smeta); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -59,7 +59,7 @@ class OutputIndicator : public FeatureFunction { OutputIndicator(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index d7a2dda8..ba3d0b9b 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -13,7 +13,7 @@ class RelativeSentencePosition : public FeatureFunction { RelativeSentencePosition(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -36,7 +36,7 @@ class SourceBigram : public FeatureFunction { void PrepareForInput(const SentenceMetadata& smeta); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -55,7 +55,7 @@ class LexNullJump : public FeatureFunction { LexNullJump(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -72,7 +72,7 @@ class NewJump : public FeatureFunction { NewJump(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -109,7 +109,7 @@ class LexicalTranslationTrigger : public FeatureFunction { LexicalTranslationTrigger(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -132,14 +132,14 @@ class BlunsomSynchronousParseHack : public FeatureFunction { BlunsomSynchronousParseHack(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, void* out_context) const; private: inline bool DoesNotBelong(const void* state) const { - for (int i = 0; i < NumBytesContext(); ++i) { + for (int i = 0; i < StateSize(); ++i) { if (*(static_cast<const unsigned char*>(state) + i)) return false; } return true; @@ -148,9 +148,9 @@ class BlunsomSynchronousParseHack : public FeatureFunction { inline void AppendAntecedentString(const void* state, std::vector<WordID>* yield) const { int i = 0; int ind = 0; - while (i < NumBytesContext() && !(*(static_cast<const unsigned char*>(state) + i))) { ++i; ind += 8; } - // std::cerr << i << " " << NumBytesContext() << std::endl; - assert(i != NumBytesContext()); + while (i < StateSize() && !(*(static_cast<const unsigned char*>(state) + i))) { ++i; ind += 8; } + // std::cerr << i << " " << StateSize() << std::endl; + assert(i != StateSize()); assert(ind < cur_ref_->size()); int cur = *(static_cast<const unsigned char*>(state) + i); int comp = 1; @@ -171,7 +171,7 @@ class BlunsomSynchronousParseHack : public FeatureFunction { } inline void SetStateMask(int start, int end, void* state) const { - assert((end / 8) < NumBytesContext()); + assert((end / 8) < StateSize()); int i = 0; int comp = 1; for (int j = 0; j < start; ++j) { @@ -209,7 +209,7 @@ class WordPairFeatures : public FeatureFunction { WordPairFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -226,7 +226,7 @@ class IdentityCycleDetector : public FeatureFunction { IdentityCycleDetector(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -242,7 +242,7 @@ class InputIndicator : public FeatureFunction { InputIndicator(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, @@ -258,7 +258,7 @@ class Fertility : public FeatureFunction { Fertility(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ff_wordset.cc b/decoder/ff_wordset.cc index 44468899..70cea7de 100644 --- a/decoder/ff_wordset.cc +++ b/decoder/ff_wordset.cc @@ -1,5 +1,6 @@ #include "ff_wordset.h" +#include "hg.h" #include "fdict.h" #include <sstream> #include <iostream> diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h index 7c9a3fb7..639e1514 100644 --- a/decoder/ff_wordset.h +++ b/decoder/ff_wordset.h @@ -2,6 +2,7 @@ #define _FF_WORDSET_H_ #include "ff.h" +#include "tdict.h" #include <tr1/unordered_set> #include <boost/algorithm/string.hpp> @@ -32,11 +33,9 @@ class WordSet : public FeatureFunction { ~WordSet() { } - Features features() const { return single_feature(fid_); } - protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/ffset.cc b/decoder/ffset.cc new file mode 100644 index 00000000..5820f421 --- /dev/null +++ b/decoder/ffset.cc @@ -0,0 +1,72 @@ +#include "ffset.h" + +#include "ff.h" +#include "tdict.h" +#include "hg.h" + +using namespace std; + +ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) : + models_(models), + weights_(w), + state_size_(0), + model_state_pos_(models.size()) { + for (int i = 0; i < models_.size(); ++i) { + model_state_pos_[i] = state_size_; + state_size_ += models_[i]->StateSize(); + } +} + +void ModelSet::PrepareForInput(const SentenceMetadata& smeta) { + for (int i = 0; i < models_.size(); ++i) + const_cast<FeatureFunction*>(models_[i])->PrepareForInput(smeta); +} + +void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta, + const Hypergraph& /* hg */, + const FFStates& node_states, + HG::Edge* edge, + FFState* context, + prob_t* combination_cost_estimate) const { + //edge->reset_info(); + context->resize(state_size_); + if (state_size_ > 0) { + memset(&(*context)[0], 0, state_size_); + } + SparseVector<double> est_vals; // only computed if combination_cost_estimate is non-NULL + if (combination_cost_estimate) *combination_cost_estimate = prob_t::One(); + for (int i = 0; i < models_.size(); ++i) { + const FeatureFunction& ff = *models_[i]; + void* cur_ff_context = NULL; + vector<const void*> ants(edge->tail_nodes_.size()); + bool has_context = ff.StateSize() > 0; + if (has_context) { + int spos = model_state_pos_[i]; + cur_ff_context = &(*context)[spos]; + for (int i = 0; i < ants.size(); ++i) { + ants[i] = &node_states[edge->tail_nodes_[i]][spos]; + } + } + ff.TraversalFeatures(smeta, *edge, ants, &edge->feature_values_, &est_vals, cur_ff_context); + } + if (combination_cost_estimate) + combination_cost_estimate->logeq(est_vals.dot(weights_)); + edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); +} + +void ModelSet::AddFinalFeatures(const FFState& state, HG::Edge* edge,SentenceMetadata const& smeta) const { + assert(1 == edge->rule_->Arity()); + //edge->reset_info(); + for (int i = 0; i < models_.size(); ++i) { + const FeatureFunction& ff = *models_[i]; + const void* ant_state = NULL; + bool has_context = ff.StateSize() > 0; + if (has_context) { + int spos = model_state_pos_[i]; + ant_state = &state[spos]; + } + ff.FinalTraversalFeatures(ant_state, &edge->feature_values_); + } + edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); +} + diff --git a/decoder/ffset.h b/decoder/ffset.h new file mode 100644 index 00000000..28aef667 --- /dev/null +++ b/decoder/ffset.h @@ -0,0 +1,57 @@ +#ifndef _FFSET_H_ +#define _FFSET_H_ + +#include <vector> +#include "value_array.h" +#include "prob.h" + +namespace HG { struct Edge; struct Node; } +class Hypergraph; +class FeatureFunction; +class SentenceMetadata; +class FeatureFunction; // see definition below + +// TODO let states be dynamically sized +typedef ValueArray<uint8_t> FFState; // this is a fixed array, but about 10% faster than string + +//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size) +typedef std::vector<FFState> FFStates; + +// this class is a set of FeatureFunctions that can be used to score, rescore, +// etc. a (translation?) forest +class ModelSet { + public: + ModelSet(const std::vector<double>& weights, + const std::vector<const FeatureFunction*>& models); + + // sets edge->feature_values_ and edge->edge_prob_ + // NOTE: edge must not necessarily be in hg.edges_ but its TAIL nodes + // must be. edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone + void AddFeaturesToEdge(const SentenceMetadata& smeta, + const Hypergraph& hg, + const FFStates& node_states, + HG::Edge* edge, + FFState* residual_context, + prob_t* combination_cost_estimate = NULL) const; + + //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM)) + void AddFinalFeatures(const FFState& residual_context, + HG::Edge* edge, + SentenceMetadata const& smeta) const; + + // this is called once before any feature functions apply to a hypergraph + // it can be used to initialize sentence-specific data structures + void PrepareForInput(const SentenceMetadata& smeta); + + bool empty() const { return models_.empty(); } + + bool stateless() const { return !state_size_; } + + private: + std::vector<const FeatureFunction*> models_; + const std::vector<double>& weights_; + int state_size_; + std::vector<int> model_state_pos_; +}; + +#endif diff --git a/decoder/grammar.cc b/decoder/grammar.cc index d1fe53af..ee43f537 100644 --- a/decoder/grammar.cc +++ b/decoder/grammar.cc @@ -127,48 +127,3 @@ bool TextGrammar::HasRuleForSpan(int /* i */, int /* j */, int distance) const { return (max_span_ >= distance); } -GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {} - -void RefineRule(TRulePtr pt, const unsigned int ctf_level){ - for (unsigned int i=0; i<ctf_level; ++i){ - TRulePtr r(new TRule(*pt)); - pt->fine_rules_.reset(new vector<TRulePtr>); - pt->fine_rules_->push_back(r); - pt = r; - } -} - -GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt, const unsigned int ctf_level) { - TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [1]")); - AddRule(stop_glue); - RefineRule(stop_glue, ctf_level); - TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1")); - AddRule(glue); - RefineRule(glue, ctf_level); -} - -bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const { - return (i == 0); -} - -PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) { - unordered_set<WordID> ss; - for (int i = 0; i < input.size(); ++i) { - const vector<LatticeArc>& alts = input[i]; - for (int k = 0; k < alts.size(); ++k) { - const int j = alts[k].dist2next + i; - const string& src = TD::Convert(alts[k].label); - if (ss.count(alts[k].label) == 0) { - TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1")); - pt->a_.push_back(AlignmentPoint(0,0)); - AddRule(pt); - RefineRule(pt, ctf_level); - ss.insert(alts[k].label); - } - } - } -} - -bool PassThroughGrammar::HasRuleForSpan(int, int, int distance) const { - return (distance < 2); -} diff --git a/decoder/grammar.h b/decoder/grammar.h index e6a15a69..add1a235 100644 --- a/decoder/grammar.h +++ b/decoder/grammar.h @@ -81,18 +81,4 @@ struct TextGrammar : public Grammar { }; -struct GlueGrammar : public TextGrammar { - // read glue grammar from file - explicit GlueGrammar(const std::string& file); - GlueGrammar(const std::string& goal_nt, const std::string& default_nt, const unsigned int ctf_level=0); // "S", "X" - virtual bool HasRuleForSpan(int i, int j, int distance) const; -}; - -struct PassThroughGrammar : public TextGrammar { - PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0); - virtual bool HasRuleForSpan(int i, int j, int distance) const; -}; - -void RefineRule(TRulePtr pt, const unsigned int ctf_level); - #endif diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc index 4500490a..6d2c6e67 100644 --- a/decoder/grammar_test.cc +++ b/decoder/grammar_test.cc @@ -10,14 +10,16 @@ #include "tdict.h" #include "grammar.h" #include "bottom_up_parser.h" +#include "hg.h" #include "ff.h" +#include "ffset.h" #include "weights.h" using namespace std; struct GrammarTest { GrammarTest() { - std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Weights::InitFromFile(path + "/weights.gt", &wts); } vector<weight_t> wts; @@ -41,7 +43,7 @@ BOOST_AUTO_TEST_CASE(TestTextGrammar) { } BOOST_AUTO_TEST_CASE(TestTextGrammarFile) { - std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); GrammarPtr g(new TextGrammar(path + "/grammar.prune")); vector<GrammarPtr> grammars(1, g); diff --git a/decoder/hg.h b/decoder/hg.h index 591e98ce..3d8cd9bc 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -33,47 +33,20 @@ // slow #undef HG_EDGES_TOPO_SORTED -class Hypergraph; -typedef boost::shared_ptr<Hypergraph> HypergraphP; - -// class representing an acyclic hypergraph -// - edges have 1 head, 0..n tails -class Hypergraph { -public: - Hypergraph() : is_linear_chain_(false) {} +// SmallVector is a fast, small vector<int> implementation for sizes <= 2 +typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_ +typedef std::vector<int> EdgesVector; // indices in edges_ - // SmallVector is a fast, small vector<int> implementation for sizes <= 2 - typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_ - typedef std::vector<int> EdgesVector; // indices in edges_ - - // TODO get rid of cat_? - // TODO keep cat_ and add span and/or state? :) - struct Node { - Node() : id_(), cat_() {} - int id_; // equal to this object's position in the nodes_ vector - WordID cat_; // non-terminal category if <0, 0 if not set - WordID NT() const { return -cat_; } - EdgesVector in_edges_; // an in edge is an edge with this node as its head. (in edges come from the bottom up to us) indices in edges_ - EdgesVector out_edges_; // an out edge is an edge with this node as its tail. (out edges leave us up toward the top/goal). indices in edges_ - void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting - cat_=o.cat_; - } - void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) { - copy_fixed(o); - id_=n2[id_]; - e2.reindex_push_back(o.in_edges_,in_edges_); - e2.reindex_push_back(o.out_edges_,out_edges_); - } - }; +enum { + NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF +}; +namespace HG { - // TODO get rid of edge_prob_? (can be computed on the fly as the dot - // product of the weight vector and the feature values) struct Edge { -// int poplimit; //TODO: cube pruning per edge limit? per node didn't work well at all. also, inside cost + outside(node) is the same information i'd use to set a per-edge limit anyway - and nonmonotonicity in cube pruning may mean it's good to favor edge (in same node) w/ relatively worse score Edge() : i_(-1), j_(-1), prev_i_(-1), prev_j_(-1) {} Edge(int id,Edge const& copy_pod_from) : id_(id) { copy_pod(copy_pod_from); } // call copy_features yourself later. - Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than setting after + Edge(int id,Edge const& copy_from,TailNodeVector const& tail) // fully inits - probably more expensive when push_back(Edge(...)) than sett : tail_nodes_(tail),id_(id) { copy_pod(copy_from);copy_features(copy_from); } inline int Arity() const { return tail_nodes_.size(); } int head_node_; // refers to a position in nodes_ @@ -83,8 +56,6 @@ public: prob_t edge_prob_; // dot product of weights and feat_values int id_; // equal to this object's position in the edges_ vector - //FIXME: these span ids belong in Node, not Edge, right? every node should have the same spans. - // span info. typically, i_ and j_ refer to indices in the source sentence. // In synchronous parsing, i_ and j_ will refer to target sentence/lattice indices // while prev_i_ prev_j_ will refer to positions in the source. @@ -97,54 +68,6 @@ public: short int j_; short int prev_i_; short int prev_j_; - - void copy_info(Edge const& o) { -#if USE_INFO_EDGE - set_info(o.info_.str()); // by convention, each person putting info here starts with a separator (e.g. space). it's empty if nobody put any info there. -#else - (void) o; -#endif - } - void copy_pod(Edge const& o) { - rule_=o.rule_; - i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; - } - void copy_features(Edge const& o) { - feature_values_=o.feature_values_; - copy_info(o); - } - void copy_fixed(Edge const& o) { - copy_pod(o); - copy_features(o); - edge_prob_ = o.edge_prob_; - } - void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) { - copy_fixed(o); - head_node_=n2[o.head_node_]; - id_=e2[o.id_]; - n2.reindex_push_back(o.tail_nodes_,tail_nodes_); - } - -#if USE_INFO_EDGE - std::ostringstream info_; - void set_info(std::string const& s) { - info_.str(s); - info_.seekp(0,std::ios_base::end); - } - Edge(Edge const& o) : head_node_(o.head_node_),tail_nodes_(o.tail_nodes_),rule_(o.rule_),feature_values_(o.feature_values_),edge_prob_(o.edge_prob_),id_(o.id_),i_(o.i_),j_(o.j_),prev_i_(o.prev_i_),prev_j_(o.prev_j_), info_(o.info_.str(),std::ios_base::ate) { -// info_.seekp(0,std::ios_base::end); - } - void operator=(Edge const& o) { - head_node_ = o.head_node_; tail_nodes_ = o.tail_nodes_; rule_ = o.rule_; feature_values_ = o.feature_values_; edge_prob_ = o.edge_prob_; id_ = o.id_; i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; - set_info(o.info_.str()); - } - std::string info() const { return info_.str(); } - void reset_info() { info_.str(""); info_.clear(); } -#else - std::string info() const { return std::string(); } - void reset_info() { } - void set_info(std::string const& ) { } -#endif void show(std::ostream &o,unsigned mask=SPAN|RULE) const { o<<'{'; if (mask&CATEGORY) @@ -159,10 +82,6 @@ public: o<<' '<<feature_values_; if (mask&RULE) o<<' '<<rule_->AsString(mask&RULE_LHS); - if (USE_INFO_EDGE) { - std::string const& i=info(); - if (mask&&!i.empty()) o << " |||"<<i; // remember, the initial space is expected as part of i - } o<<'}'; } std::string show(unsigned mask=SPAN|RULE) const { @@ -170,12 +89,28 @@ public: show(o,mask); return o.str(); } - /* generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle) - - FIXME: make kbest create a simple derivation-tree structure (could be a - hg), and replace the list-of-edges viterbi.h with a tree-structured one. - CreateViterbiHypergraph can do for 1best, though. - */ + void copy_pod(Edge const& o) { + rule_=o.rule_; + i_ = o.i_; j_ = o.j_; prev_i_ = o.prev_i_; prev_j_ = o.prev_j_; + } + void copy_features(Edge const& o) { + feature_values_=o.feature_values_; + } + void copy_fixed(Edge const& o) { + copy_pod(o); + copy_features(o); + edge_prob_ = o.edge_prob_; + } + void copy_reindex(Edge const& o,indices_after const& n2,indices_after const& e2) { + copy_fixed(o); + head_node_=n2[o.head_node_]; + id_=e2[o.id_]; + n2.reindex_push_back(o.tail_nodes_,tail_nodes_); + } + // generic recursion re: child_handle=re(tail_nodes_[i],i,parent_handle) + // FIXME: make kbest create a simple derivation-tree structure (could be a + // hg), and replace the list-of-edges viterbi.h with a tree-structured one. + // CreateViterbiHypergraph can do for 1best, though. template <class EdgeRecurse,class TEdgeHandle> std::string derivation_tree(EdgeRecurse const& re,TEdgeHandle const& eh,bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const { std::ostringstream o; @@ -203,7 +138,43 @@ public: } }; - // all this info ought to live in Node, but for some reason it's on Edges. + // TODO get rid of cat_? + // TODO keep cat_ and add span and/or state? :) + struct Node { + Node() : id_(), cat_() {} + int id_; // equal to this object's position in the nodes_ vector + WordID cat_; // non-terminal category if <0, 0 if not set + WordID NT() const { return -cat_; } + EdgesVector in_edges_; // an in edge is an edge with this node as its head. (in edges come from the bottom up to us) indices in edges_ + EdgesVector out_edges_; // an out edge is an edge with this node as its tail. (out edges leave us up toward the top/goal). indices in edges_ + void copy_fixed(Node const& o) { // nonstructural fields only - structural ones are managed by sorting/pruning/subsetting + cat_=o.cat_; + } + void copy_reindex(Node const& o,indices_after const& n2,indices_after const& e2) { + copy_fixed(o); + id_=n2[id_]; + e2.reindex_push_back(o.in_edges_,in_edges_); + e2.reindex_push_back(o.out_edges_,out_edges_); + } + }; + +} // namespace HG + +class Hypergraph; +typedef boost::shared_ptr<Hypergraph> HypergraphP; +// class representing an acyclic hypergraph +// - edges have 1 head, 0..n tails +class Hypergraph { +public: + Hypergraph() : is_linear_chain_(false) {} + typedef HG::Node Node; + typedef HG::Edge Edge; + typedef SmallVectorUnsigned TailNodeVector; // indices in nodes_ + typedef std::vector<int> EdgesVector; // indices in edges_ + enum { + NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF + }; + // except for stateful models that have split nt,span, this should identify the node void SetNodeOrigin(int nodeid,NTSpan &r) const { Node const &n=nodes_[nodeid]; @@ -230,18 +201,9 @@ public: } return s; } - // 0 if none, -TD index otherwise (just like in rule) WordID NodeLHS(int nodeid) const { Node const &n=nodes_[nodeid]; return n.NT(); - /* - if (!n.in_edges_.empty()) { - Edge const& e=edges_[n.in_edges_.front()]; - if (e.rule_) - return -e.rule_->lhs_; - } - return 0; - */ } typedef std::vector<prob_t> EdgeProbs; @@ -250,14 +212,8 @@ public: typedef std::vector<bool> NodeMask; std::string show_viterbi_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const; -// builds viterbi hg and returns it formatted as a pretty string - - enum { - NONE=0,CATEGORY=1,SPAN=2,PROB=4,FEATURES=8,RULE=16,RULE_LHS=32,PREV_SPAN=64,ALL=0xFFFFFFFF - }; std::string show_first_tree(bool indent=true,int show_mask=SPAN|RULE,int maxdepth=0x7FFFFFFF,int depth=0) const; - // same as above, but takes in_edges_[0] all the way down - to make it viterbi cost (1-best), call ViterbiSortInEdges() first typedef Edge const* EdgeHandle; EdgeHandle operator()(int tailn,int /*taili*/,EdgeHandle /*parent*/) const { @@ -334,7 +290,7 @@ public: Edge* AddEdge(Edge const& in_edge, const TailNodeVector& tail) { edges_.push_back(Edge(edges_.size(),in_edge)); Edge* edge = &edges_.back(); - edge->copy_features(in_edge); + edge->feature_values_ = in_edge.feature_values_; edge->tail_nodes_ = tail; // possibly faster than copying to Edge() constructed above then copying via push_back. perhaps optimized it's the same. index_tails(*edge); return edge; @@ -503,9 +459,9 @@ public: template <class V> void visit_edges_topo(V &v) { - for (int i = 0; i < nodes_.size(); ++i) { + for (unsigned i = 0; i < nodes_.size(); ++i) { EdgesVector const& in=nodes_[i].in_edges_; - for (int j=0;j<in.size();++j) { + for (unsigned j=0;j<in.size();++j) { int e=in[j]; v(i,e,edges_[e]); } @@ -534,14 +490,14 @@ private: // for generic Viterbi/Inside algorithms struct EdgeProb { typedef prob_t Weight; - inline const prob_t& operator()(const Hypergraph::Edge& e) const { return e.edge_prob_; } + inline const prob_t& operator()(const HG::Edge& e) const { return e.edge_prob_; } }; struct EdgeSelectEdgeWeightFunction { typedef prob_t Weight; typedef std::vector<bool> EdgeMask; EdgeSelectEdgeWeightFunction(const EdgeMask& v) : v_(v) {} - inline prob_t operator()(const Hypergraph::Edge& e) const { + inline prob_t operator()(const HG::Edge& e) const { if (v_[e.id_]) return prob_t::One(); else return prob_t::Zero(); } @@ -551,7 +507,7 @@ private: struct ScaledEdgeProb { ScaledEdgeProb(const double& alpha) : alpha_(alpha) {} - inline prob_t operator()(const Hypergraph::Edge& e) const { return e.edge_prob_.pow(alpha_); } + inline prob_t operator()(const HG::Edge& e) const { return e.edge_prob_.pow(alpha_); } const double alpha_; typedef prob_t Weight; }; @@ -560,7 +516,7 @@ struct ScaledEdgeProb { struct EdgeFeaturesAndProbWeightFunction { typedef SparseVector<prob_t> Weight; typedef Weight Result; //TODO: change Result->Weight everywhere? - inline const Weight operator()(const Hypergraph::Edge& e) const { + inline const Weight operator()(const HG::Edge& e) const { SparseVector<prob_t> res; for (SparseVector<double>::const_iterator it = e.feature_values_.begin(); it != e.feature_values_.end(); ++it) @@ -571,7 +527,7 @@ struct EdgeFeaturesAndProbWeightFunction { struct TransitionCountWeightFunction { typedef double Weight; - inline double operator()(const Hypergraph::Edge& e) const { (void)e; return 1.0; } + inline double operator()(const HG::Edge& e) const { (void)e; return 1.0; } }; #endif diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index 3a68a429..64c6663e 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -28,7 +28,7 @@ struct HGReader : public JSONParser { hg.ConnectEdgeToHeadNode(&hg.edges_[in_edges[i]], node); } } - void CreateEdge(const TRulePtr& rule, FeatureVector* feats, const SmallVectorUnsigned& tail) { + void CreateEdge(const TRulePtr& rule, SparseVector<double>* feats, const SmallVectorUnsigned& tail) { Hypergraph::Edge* edge = hg.AddEdge(rule, tail); feats->swap(edge->feature_values_); edge->i_ = spans[0]; @@ -392,8 +392,8 @@ string HypergraphIO::AsPLF(const Hypergraph& hg, bool include_global_parentheses const Hypergraph::Edge& e = hg.edges_[hg.nodes_[i].out_edges_[j]]; const string output = e.rule_->e_.size() ==2 ? Escape(TD::Convert(e.rule_->e_[1])) : EPS; double prob = log(e.edge_prob_); - if (isinf(prob)) { prob = -9e20; } - if (isnan(prob)) { prob = 0; } + if (std::isinf(prob)) { prob = -9e20; } + if (std::isnan(prob)) { prob = 0; } os << "('" << output << "'," << prob << "," << e.head_node_ - i << "),"; } os << "),"; diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc index 37469748..8519e559 100644 --- a/decoder/hg_test.cc +++ b/decoder/hg_test.cc @@ -339,7 +339,7 @@ BOOST_AUTO_TEST_CASE(TestAddExpectations) { BOOST_AUTO_TEST_CASE(Small) { Hypergraph hg; - std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : "test_data"); + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); CreateSmallHG(&hg, path); SparseVector<double> wts; wts.set_value(FD::Convert("Model_0"), -2.0); diff --git a/decoder/incremental.cc b/decoder/incremental.cc new file mode 100644 index 00000000..85647a44 --- /dev/null +++ b/decoder/incremental.cc @@ -0,0 +1,167 @@ +#include "incremental.h" + +#include "hg.h" +#include "fdict.h" +#include "tdict.h" + +#include "lm/enumerate_vocab.hh" +#include "lm/model.hh" +#include "search/applied.hh" +#include "search/config.hh" +#include "search/context.hh" +#include "search/edge.hh" +#include "search/edge_generator.hh" +#include "search/rule.hh" +#include "search/vertex.hh" +#include "search/vertex_generator.hh" +#include "util/exception.hh" + +#include <boost/scoped_ptr.hpp> +#include <boost/scoped_array.hpp> + +#include <iostream> +#include <vector> + +namespace { + +struct MapVocab : public lm::EnumerateVocab { + public: + MapVocab() {} + + // Do not call after Lookup. + void Add(lm::WordIndex index, const StringPiece &str) { + const WordID cdec_id = TD::Convert(str.as_string()); + if (cdec_id >= out_.size()) out_.resize(cdec_id + 1); + out_[cdec_id] = index; + } + + // Assumes Add has been called and will never be called again. + lm::WordIndex FromCDec(WordID id) const { + return out_[out_.size() > id ? id : 0]; + } + + private: + std::vector<lm::WordIndex> out_; +}; + +template <class Model> class Incremental : public IncrementalBase { + public: + Incremental(const char *model_file, const std::vector<weight_t> &weights) : + IncrementalBase(weights), + m_(model_file, GetConfig()), + lm_(weights[FD::Convert("KLanguageModel")]), + oov_(weights[FD::Convert("KLanguageModel_OOV")]), + word_penalty_(weights[FD::Convert("WordPenalty")]) { + std::cerr << "Weights KLanguageModel " << lm_ << " KLanguageModel_OOV " << oov_ << " WordPenalty " << word_penalty_ << std::endl; + } + + void Search(unsigned int pop_limit, const Hypergraph &hg) const; + + private: + void ConvertEdge(const search::Context<Model> &context, search::Vertex *vertices, const Hypergraph::Edge &in, search::EdgeGenerator &gen) const; + + lm::ngram::Config GetConfig() { + lm::ngram::Config ret; + ret.enumerate_vocab = &vocab_; + return ret; + } + + MapVocab vocab_; + + const Model m_; + + const float lm_, oov_, word_penalty_; +}; + +void PrintApplied(const Hypergraph &hg, const search::Applied final) { + const std::vector<WordID> &words = static_cast<const Hypergraph::Edge*>(final.GetNote().vp)->rule_->e(); + const search::Applied *child(final.Children()); + for (std::vector<WordID>::const_iterator i = words.begin(); i != words.end(); ++i) { + if (*i > 0) { + std::cout << TD::Convert(*i) << ' '; + } else { + PrintApplied(hg, *child++); + } + } +} + +template <class Model> void Incremental<Model>::Search(unsigned int pop_limit, const Hypergraph &hg) const { + boost::scoped_array<search::Vertex> out_vertices(new search::Vertex[hg.nodes_.size()]); + search::Config config(lm_, pop_limit, search::NBestConfig(1)); + search::Context<Model> context(config, m_); + search::SingleBest best; + + for (unsigned int i = 0; i < hg.nodes_.size() - 1; ++i) { + search::EdgeGenerator gen; + const Hypergraph::EdgesVector &down_edges = hg.nodes_[i].in_edges_; + for (unsigned int j = 0; j < down_edges.size(); ++j) { + unsigned int edge_index = down_edges[j]; + ConvertEdge(context, out_vertices.get(), hg.edges_[edge_index], gen); + } + search::VertexGenerator<search::SingleBest> vertex_gen(context, out_vertices[i], best); + gen.Search(context, vertex_gen); + } + const search::Applied top = out_vertices[hg.nodes_.size() - 2].BestChild(); + if (!top.Valid()) { + std::cout << "NO PATH FOUND" << std::endl; + } else { + PrintApplied(hg, top); + std::cout << "||| " << top.GetScore() << std::endl; + } +} + +template <class Model> void Incremental<Model>::ConvertEdge(const search::Context<Model> &context, search::Vertex *vertices, const Hypergraph::Edge &in, search::EdgeGenerator &gen) const { + const std::vector<WordID> &e = in.rule_->e(); + std::vector<lm::WordIndex> words; + words.reserve(e.size()); + std::vector<search::PartialVertex> nts; + unsigned int terminals = 0; + float score = 0.0; + for (std::vector<WordID>::const_iterator word = e.begin(); word != e.end(); ++word) { + if (*word <= 0) { + nts.push_back(vertices[in.tail_nodes_[-*word]].RootPartial()); + if (nts.back().Empty()) return; + score += nts.back().Bound(); + words.push_back(lm::kMaxWordIndex); + } else { + ++terminals; + words.push_back(vocab_.FromCDec(*word)); + } + } + + search::PartialEdge out(gen.AllocateEdge(nts.size())); + + memcpy(out.NT(), &nts[0], sizeof(search::PartialVertex) * nts.size()); + + search::Note note; + note.vp = ∈ + out.SetNote(note); + + score += in.rule_->GetFeatureValues().dot(cdec_weights_); + score -= static_cast<float>(terminals) * word_penalty_ / M_LN10; + search::ScoreRuleRet res(search::ScoreRule(context.LanguageModel(), words, out.Between())); + score += res.prob * lm_ + static_cast<float>(res.oov) * oov_; + + out.SetScore(score); + + gen.AddEdge(out); +} + +} // namespace + +IncrementalBase *IncrementalBase::Load(const char *model_file, const std::vector<weight_t> &weights) { + lm::ngram::ModelType model_type; + if (!lm::ngram::RecognizeBinary(model_file, model_type)) model_type = lm::ngram::PROBING; + switch (model_type) { + case lm::ngram::PROBING: + return new Incremental<lm::ngram::ProbingModel>(model_file, weights); + case lm::ngram::REST_PROBING: + return new Incremental<lm::ngram::RestProbingModel>(model_file, weights); + default: + UTIL_THROW(util::Exception, "Sorry this lm type isn't supported yet."); + } +} + +IncrementalBase::~IncrementalBase() {} + +IncrementalBase::IncrementalBase(const std::vector<weight_t> &weights) : cdec_weights_(weights) {} diff --git a/decoder/incremental.h b/decoder/incremental.h new file mode 100644 index 00000000..f791a626 --- /dev/null +++ b/decoder/incremental.h @@ -0,0 +1,23 @@ +#ifndef _INCREMENTAL_H_ +#define _INCREMENTAL_H_ + +#include "weights.h" +#include <vector> + +class Hypergraph; + +class IncrementalBase { + public: + static IncrementalBase *Load(const char *model_file, const std::vector<weight_t> &weights); + + virtual ~IncrementalBase(); + + virtual void Search(unsigned int pop_limit, const Hypergraph &hg) const = 0; + + protected: + IncrementalBase(const std::vector<weight_t> &weights); + + const std::vector<weight_t> &cdec_weights_; +}; + +#endif // _INCREMENTAL_H_ diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h index f73a1d3f..c0377fe8 100644 --- a/decoder/inside_outside.h +++ b/decoder/inside_outside.h @@ -42,7 +42,7 @@ WeightType Inside(const Hypergraph& hg, Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_; const unsigned num_in_edges = in.size(); for (unsigned j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[in[j]]; + const HG::Edge& edge = hg.edges_[in[j]]; WeightType score = weight(edge); for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) { const int tail_node_index = edge.tail_nodes_[k]; @@ -74,7 +74,7 @@ void Outside(const Hypergraph& hg, Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_; const int num_in_edges = in.size(); for (int j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[in[j]]; + const HG::Edge& edge = hg.edges_[in[j]]; WeightType head_and_edge_weight = weight(edge); head_and_edge_weight *= head_node_outside_score; const int num_tail_nodes = edge.tail_nodes_.size(); @@ -138,7 +138,7 @@ struct InsideOutsides { Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_; const int num_in_edges = in.size(); for (int j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[in[j]]; + const HG::Edge& edge = hg.edges_[in[j]]; KType kbar_e = outside[i]; const int num_tail_nodes = edge.tail_nodes_.size(); for (int k = 0; k < num_tail_nodes; ++k) @@ -156,7 +156,7 @@ struct InsideOutsides { const int num_in_edges = in.size(); for (int j = 0; j < num_in_edges; ++j) { int edgei=in[j]; - const Hypergraph::Edge& edge = hg.edges_[edgei]; + const HG::Edge& edge = hg.edges_[edgei]; V x=weight(edge)*outside[i]; const int num_tail_nodes = edge.tail_nodes_.size(); for (int k = 0; k < num_tail_nodes; ++k) diff --git a/decoder/kbest.h b/decoder/kbest.h index 9af3a20e..9a55f653 100644 --- a/decoder/kbest.h +++ b/decoder/kbest.h @@ -48,7 +48,7 @@ namespace KBest { } struct Derivation { - Derivation(const Hypergraph::Edge& e, + Derivation(const HG::Edge& e, const SmallVectorInt& jv, const WeightType& w, const SparseVector<double>& f) : @@ -58,11 +58,11 @@ namespace KBest { feature_values(f) {} // dummy constructor, just for query - Derivation(const Hypergraph::Edge& e, + Derivation(const HG::Edge& e, const SmallVectorInt& jv) : edge(&e), j(jv) {} T yield; - const Hypergraph::Edge* const edge; + const HG::Edge* const edge; const SmallVectorInt j; const WeightType score; const SparseVector<double> feature_values; @@ -82,8 +82,8 @@ namespace KBest { Derivation const* d; explicit EdgeHandle(Derivation const* d) : d(d) { } // operator bool() const { return d->edge; } - operator Hypergraph::Edge const* () const { return d->edge; } -// Hypergraph::Edge const * operator ->() const { return d->edge; } + operator HG::Edge const* () const { return d->edge; } +// HG::Edge const * operator ->() const { return d->edge; } }; EdgeHandle operator()(unsigned t,unsigned taili,EdgeHandle const& parent) const { @@ -158,7 +158,7 @@ namespace KBest { // the yield is computed in LazyKthBest before the derivation is added to D // returns NULL if j refers to derivation numbers larger than the // antecedent structure define - Derivation* CreateDerivation(const Hypergraph::Edge& e, const SmallVectorInt& j) { + Derivation* CreateDerivation(const HG::Edge& e, const SmallVectorInt& j) { WeightType score = w(e); SparseVector<double> feats = e.feature_values_; for (int i = 0; i < e.Arity(); ++i) { @@ -177,7 +177,7 @@ namespace KBest { const Hypergraph::Node& node = g.nodes_[v]; for (unsigned i = 0; i < node.in_edges_.size(); ++i) { - const Hypergraph::Edge& edge = g.edges_[node.in_edges_[i]]; + const HG::Edge& edge = g.edges_[node.in_edges_[i]]; SmallVectorInt jv(edge.Arity(), 0); Derivation* d = CreateDerivation(edge, jv); assert(d); diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index b603e27a..d2c4715c 100644 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -12,6 +12,7 @@ #include "scorer.h" #include "hg.h" #include "ff_factory.h" +#include "ffset.h" #include "ff_bleu.h" #include "sparse_vector.h" #include "viterbi.h" @@ -26,7 +27,7 @@ struct Translation { typedef std::vector<WordID> Sentence; Sentence sentence; - FeatureVector features; + SparseVector<double> features; Translation() { } Translation(Hypergraph const& hg,WeightVector *feature_weights=0) { @@ -57,14 +58,14 @@ struct Oracle { } // feature 0 will be the error rate in fear and hope // move toward hope - FeatureVector ModelHopeGradient() const { - FeatureVector r=hope.features-model.features; + SparseVector<double> ModelHopeGradient() const { + SparseVector<double> r=hope.features-model.features; r.set_value(0,0); return r; } // move toward hope from fear - FeatureVector FearHopeGradient() const { - FeatureVector r=hope.features-fear.features; + SparseVector<double> FearHopeGradient() const { + SparseVector<double> r=hope.features-fear.features; r.set_value(0,0); return r; } diff --git a/decoder/program_options.h b/decoder/program_options.h index 87afb320..3cd7649a 100644 --- a/decoder/program_options.h +++ b/decoder/program_options.h @@ -94,7 +94,7 @@ struct any_printer : public boost::function<void (Ostream &,boost::any const&)> {} template <class T> - explicit any_printer(T const* tag) : F(typed_print<T>()) { + explicit any_printer(T const*) : F(typed_print<T>()) { } template <class T> diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index a978cfc2..3b43b586 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -15,13 +15,73 @@ #include "tdict.h" #include "viterbi.h" #include "verbose.h" +#include <tr1/unordered_map> #define foreach BOOST_FOREACH #define reverse_foreach BOOST_REVERSE_FOREACH using namespace std; +using namespace std::tr1; static bool printGrammarsUsed = false; +struct GlueGrammar : public TextGrammar { + // read glue grammar from file + explicit GlueGrammar(const std::string& file); + GlueGrammar(const std::string& goal_nt, const std::string& default_nt, const unsigned int ctf_level=0); // "S", "X" + virtual bool HasRuleForSpan(int i, int j, int distance) const; +}; + +struct PassThroughGrammar : public TextGrammar { + PassThroughGrammar(const Lattice& input, const std::string& cat, const unsigned int ctf_level=0); + virtual bool HasRuleForSpan(int i, int j, int distance) const; +}; + +GlueGrammar::GlueGrammar(const string& file) : TextGrammar(file) {} + +static void RefineRule(TRulePtr pt, const unsigned int ctf_level){ + for (unsigned int i=0; i<ctf_level; ++i){ + TRulePtr r(new TRule(*pt)); + pt->fine_rules_.reset(new vector<TRulePtr>); + pt->fine_rules_->push_back(r); + pt = r; + } +} + +GlueGrammar::GlueGrammar(const string& goal_nt, const string& default_nt, const unsigned int ctf_level) { + TRulePtr stop_glue(new TRule("[" + goal_nt + "] ||| [" + default_nt + ",1] ||| [1]")); + AddRule(stop_glue); + RefineRule(stop_glue, ctf_level); + TRulePtr glue(new TRule("[" + goal_nt + "] ||| [" + goal_nt + ",1] ["+ default_nt + ",2] ||| [1] [2] ||| Glue=1")); + AddRule(glue); + RefineRule(glue, ctf_level); +} + +bool GlueGrammar::HasRuleForSpan(int i, int /* j */, int /* distance */) const { + return (i == 0); +} + +PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, const unsigned int ctf_level) { + unordered_set<WordID> ss; + for (int i = 0; i < input.size(); ++i) { + const vector<LatticeArc>& alts = input[i]; + for (int k = 0; k < alts.size(); ++k) { + const int j = alts[k].dist2next + i; + const string& src = TD::Convert(alts[k].label); + if (ss.count(alts[k].label) == 0) { + TRulePtr pt(new TRule("[" + cat + "] ||| " + src + " ||| " + src + " ||| PassThrough=1")); + pt->a_.push_back(AlignmentPoint(0,0)); + AddRule(pt); + RefineRule(pt, ctf_level); + ss.insert(alts[k].label); + } + } + } +} + +bool PassThroughGrammar::HasRuleForSpan(int, int, int distance) const { + return (distance < 2); +} + struct SCFGTranslatorImpl { SCFGTranslatorImpl(const boost::program_options::variables_map& conf) : max_span_limit(conf["scfg_max_span_limit"].as<int>()), diff --git a/decoder/tromble_loss.h b/decoder/tromble_loss.h index 599a2d54..fde33100 100644 --- a/decoder/tromble_loss.h +++ b/decoder/tromble_loss.h @@ -28,7 +28,7 @@ class TrombleLossComputer : private boost::base_from_member<boost::scoped_ptr<Tr protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector<const void*>& ant_contexts, SparseVector<double>* features, SparseVector<double>* estimated_features, diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc index 1b9c6665..9e381ac6 100644 --- a/decoder/viterbi.cc +++ b/decoder/viterbi.cc @@ -139,8 +139,8 @@ inline bool close_enough(double a,double b,double epsilon) return diff<=epsilon*fabs(a) || diff<=epsilon*fabs(b); } -FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) { - FeatureVector r; +SparseVector<double> ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) { + SparseVector<double> r; const prob_t p = Viterbi<FeatureVectorTraversal>(hg, &r); if (weights) { double logp=log(p); diff --git a/decoder/viterbi.h b/decoder/viterbi.h index 03e961a2..a8a0ea7f 100644 --- a/decoder/viterbi.h +++ b/decoder/viterbi.h @@ -14,10 +14,10 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name="forest" //TODO: make T a typename inside Traversal and WeightType a typename inside WeightFunction? // Traversal must implement: // typedef T Result; -// void operator()(Hypergraph::Edge const& e,const vector<const Result*>& ants, Result* result) const; +// void operator()(HG::Edge const& e,const vector<const Result*>& ants, Result* result) const; // WeightFunction must implement: // typedef prob_t Weight; -// Weight operator()(Hypergraph::Edge const& e) const; +// Weight operator()(HG::Edge const& e) const; template<class Traversal,class WeightFunction> typename WeightFunction::Weight Viterbi(const Hypergraph& hg, typename Traversal::Result* result, @@ -39,9 +39,9 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg, *cur_node_best_weight = WeightType(1); continue; } - Hypergraph::Edge const* edge_best=0; + HG::Edge const* edge_best=0; for (unsigned j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[cur_node.in_edges_[j]]; + const HG::Edge& edge = hg.edges_[cur_node.in_edges_[j]]; WeightType score = weight(edge); for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) score *= vit_weight[edge.tail_nodes_[k]]; @@ -51,7 +51,7 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg, } } assert(edge_best); - Hypergraph::Edge const& edgeb=*edge_best; + HG::Edge const& edgeb=*edge_best; std::vector<const T*> antsb(edgeb.tail_nodes_.size()); for (unsigned k = 0; k < edgeb.tail_nodes_.size(); ++k) antsb[k] = &vit_result[edgeb.tail_nodes_[k]]; @@ -98,7 +98,7 @@ prob_t Viterbi(const Hypergraph& hg, struct PathLengthTraversal { typedef int Result; - void operator()(const Hypergraph::Edge& edge, + void operator()(const HG::Edge& edge, const std::vector<const int*>& ants, int* result) const { (void) edge; @@ -109,7 +109,7 @@ struct PathLengthTraversal { struct ESentenceTraversal { typedef std::vector<WordID> Result; - void operator()(const Hypergraph::Edge& edge, + void operator()(const HG::Edge& edge, const std::vector<const Result*>& ants, Result* result) const { edge.rule_->ESubstitute(ants, result); @@ -118,7 +118,7 @@ struct ESentenceTraversal { struct ELengthTraversal { typedef int Result; - void operator()(const Hypergraph::Edge& edge, + void operator()(const HG::Edge& edge, const std::vector<const int*>& ants, int* result) const { *result = edge.rule_->ELength() - edge.rule_->Arity(); @@ -128,7 +128,7 @@ struct ELengthTraversal { struct FSentenceTraversal { typedef std::vector<WordID> Result; - void operator()(const Hypergraph::Edge& edge, + void operator()(const HG::Edge& edge, const std::vector<const Result*>& ants, Result* result) const { edge.rule_->FSubstitute(ants, result); @@ -142,7 +142,7 @@ struct ETreeTraversal { const std::string space; const std::string right; typedef std::vector<WordID> Result; - void operator()(const Hypergraph::Edge& edge, + void operator()(const HG::Edge& edge, const std::vector<const Result*>& ants, Result* result) const { Result tmp; @@ -162,7 +162,7 @@ struct FTreeTraversal { const std::string space; const std::string right; typedef std::vector<WordID> Result; - void operator()(const Hypergraph::Edge& edge, + void operator()(const HG::Edge& edge, const std::vector<const Result*>& ants, Result* result) const { Result tmp; @@ -177,8 +177,8 @@ struct FTreeTraversal { }; struct ViterbiPathTraversal { - typedef std::vector<Hypergraph::Edge const*> Result; - void operator()(const Hypergraph::Edge& edge, + typedef std::vector<HG::Edge const*> Result; + void operator()(const HG::Edge& edge, std::vector<Result const*> const& ants, Result* result) const { for (unsigned i = 0; i < ants.size(); ++i) @@ -189,8 +189,8 @@ struct ViterbiPathTraversal { }; struct FeatureVectorTraversal { - typedef FeatureVector Result; - void operator()(Hypergraph::Edge const& edge, + typedef SparseVector<double> Result; + void operator()(HG::Edge const& edge, std::vector<Result const*> const& ants, Result* result) const { for (unsigned i = 0; i < ants.size(); ++i) @@ -210,6 +210,6 @@ int ViterbiELength(const Hypergraph& hg); int ViterbiPathLength(const Hypergraph& hg); /// if weights supplied, assert viterbi prob = features.dot(*weights) (exception if fatal, cerr warn if not). return features (sum over all edges in viterbi derivation) -FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights=0,bool fatal_dotprod_disagreement=false); +SparseVector<double> ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights=0,bool fatal_dotprod_disagreement=false); #endif |