From 4d48a6d19521b24d9ac0987ce9a472d9ba574c4b Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 24 Jun 2012 16:42:56 +0200 Subject: RuleTargetBigramFeatures, parallelize.rb --- decoder/ff_rules.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'decoder/ff_rules.h') diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index 48d8bd05..08b168b0 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -22,9 +22,24 @@ class RuleIdentityFeatures : public FeatureFunction { mutable std::map rule2_fid_; }; -class RuleNgramFeatures : public FeatureFunction { +class RuleSourceBigramFeatures : public FeatureFunction { public: - RuleNgramFeatures(const std::string& param); + RuleSourceBigramFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const; + virtual void PrepareForInput(const SentenceMetadata& smeta); + private: + mutable std::map > rule2_feats_; +}; + +class RuleTargetBigramFeatures : public FeatureFunction { + public: + RuleTargetBigramFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, -- cgit v1.2.3 From 51b5c16c9110999ac573bd3383d7eb0e3f10fc37 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 16 Oct 2012 00:37:21 -0400 Subject: clean up of bad header includes --- decoder/Makefile.am | 2 + decoder/apply_models.cc | 1 + decoder/cdec_ff.cc | 1 + decoder/cfg.h | 2 +- decoder/cfg_format.h | 2 +- decoder/cfg_test.cc | 4 +- decoder/decoder.cc | 12 +-- decoder/exp_semiring.h | 2 +- decoder/ff.cc | 200 +++---------------------------------- decoder/ff.h | 238 +++----------------------------------------- decoder/ff_basic.cc | 80 +++++++++++++++ decoder/ff_basic.h | 68 +++++++++++++ decoder/ff_bleu.h | 2 +- decoder/ff_charset.cc | 6 +- decoder/ff_charset.h | 6 +- decoder/ff_context.cc | 2 + decoder/ff_context.h | 2 +- decoder/ff_csplit.cc | 1 + decoder/ff_csplit.h | 4 +- decoder/ff_dwarf.cc | 1 + decoder/ff_dwarf.h | 2 +- decoder/ff_external.cc | 8 +- decoder/ff_external.h | 6 +- decoder/ff_factory.h | 4 - decoder/ff_klm.cc | 6 -- decoder/ff_klm.h | 3 +- decoder/ff_lm.cc | 4 - decoder/ff_lm.h | 5 +- decoder/ff_ngrams.h | 2 +- decoder/ff_rules.cc | 2 + decoder/ff_rules.h | 5 +- decoder/ff_ruleshape.cc | 2 + decoder/ff_ruleshape.h | 2 +- decoder/ff_source_syntax.cc | 1 + decoder/ff_source_syntax.h | 4 +- decoder/ff_spans.cc | 2 + decoder/ff_spans.h | 4 +- decoder/ff_tagger.cc | 1 + decoder/ff_tagger.h | 6 +- decoder/ff_wordalign.h | 30 +++--- decoder/ff_wordset.cc | 1 + decoder/ff_wordset.h | 5 +- decoder/ffset.cc | 72 ++++++++++++++ decoder/ffset.h | 57 +++++++++++ decoder/grammar_test.cc | 2 + decoder/hg.h | 10 +- decoder/hg_io.cc | 2 +- decoder/inside_outside.h | 8 +- decoder/kbest.h | 14 +-- decoder/oracle_bleu.h | 11 +- decoder/program_options.h | 2 +- decoder/tromble_loss.h | 2 +- decoder/viterbi.cc | 4 +- decoder/viterbi.h | 32 +++--- example_extff/ff_example.cc | 2 + 55 files changed, 429 insertions(+), 530 deletions(-) create mode 100644 decoder/ff_basic.cc create mode 100644 decoder/ff_basic.h create mode 100644 decoder/ffset.cc create mode 100644 decoder/ffset.h (limited to 'decoder/ff_rules.h') diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 28863dbe..5c0a1964 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -56,6 +56,8 @@ libcdec_a_SOURCES = \ phrasetable_fst.cc \ trule.cc \ ff.cc \ + ffset.cc \ + ff_basic.cc \ ff_rules.cc \ ff_wordset.cc \ ff_context.cc \ diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc index 9ba59d1b..330de9e2 100644 --- a/decoder/apply_models.cc +++ b/decoder/apply_models.cc @@ -16,6 +16,7 @@ #include "verbose.h" #include "hg.h" #include "ff.h" +#include "ffset.h" #define NORMAL_CP 1 #define FAST_CP 2 diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 54f6e12b..99ab7473 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -1,6 +1,7 @@ #include #include "ff.h" +#include "ff_basic.h" #include "ff_context.h" #include "ff_spans.h" #include "ff_lm.h" diff --git a/decoder/cfg.h b/decoder/cfg.h index 8cb29bb9..aeeacb83 100644 --- a/decoder/cfg.h +++ b/decoder/cfg.h @@ -130,7 +130,7 @@ struct CFG { int lhs; // index into nts RHS rhs; prob_t p; // h unused for now (there's nothing admissable, and p is already using 1st pass inside as pushed toward top) - FeatureVector f; // may be empty, unless copy_features on Init + SparseVector f; // may be empty, unless copy_features on Init IF_CFG_TRULE(TRulePtr rule;) int size() const { // for stats only return rhs.size(); diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h index 2f40d483..d12da261 100644 --- a/decoder/cfg_format.h +++ b/decoder/cfg_format.h @@ -100,7 +100,7 @@ struct CFGFormat { } } - void print_features(std::ostream &o,prob_t p,FeatureVector const& fv=FeatureVector()) const { + void print_features(std::ostream &o,prob_t p,SparseVector const& fv=SparseVector()) const { bool logp=(logprob_feat && p!=prob_t::One()); if (features || logp) { o << partsep; diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc index b8f4cf11..316c6d16 100644 --- a/decoder/cfg_test.cc +++ b/decoder/cfg_test.cc @@ -25,9 +25,9 @@ struct CFGTest : public TestWithParam { Hypergraph hg; CFG cfg; CFGFormat form; - FeatureVector weights; + SparseVector weights; - static void JsonFN(Hypergraph &hg,CFG &cfg,FeatureVector &featw,std::string file + static void JsonFN(Hypergraph &hg,CFG &cfg,SparseVector &featw,std::string file ,std::string const& wts="Model_0 1 EgivenF 1 f1 1") { istringstream ws(wts); diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 47b298b9..fef88d3f 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -29,6 +29,7 @@ #include "oracle_bleu.h" #include "apply_models.h" #include "ff.h" +#include "ffset.h" #include "ff_factory.h" #include "viterbi.h" #include "kbest.h" @@ -90,11 +91,6 @@ inline void ShowBanner() { cerr << "cdec v1.0 (c) 2009-2011 by Chris Dyer\n"; } -inline void show_models(po::variables_map const& conf,ModelSet &ms,char const* header) { - cerr<(); } @@ -132,7 +128,7 @@ inline boost::shared_ptr make_ff(string const& ffp,bool verbose } boost::shared_ptr pf = ff_registry.Create(ff, param); if (!pf) exit(1); - int nbyte=pf->NumBytesContext(); + int nbyte=pf->StateSize(); if (verbose_feature_functions && !SILENT) cerr<<"State is "< dummy; // = last_weights Oracle oc=oracle.ComputeOracle(smeta,&forest,dummy,10,conf["forest_output"].as()); if (!SILENT) cerr << " +Oracle BLEU forest (nodes/edges): " << forest.nodes_.size() << '/' << forest.edges_.size() << endl; if (!SILENT) cerr << " +Oracle BLEU (paths): " << forest.NumberOfPaths() << endl; diff --git a/decoder/exp_semiring.h b/decoder/exp_semiring.h index 111eaaf1..2a9034bb 100644 --- a/decoder/exp_semiring.h +++ b/decoder/exp_semiring.h @@ -59,7 +59,7 @@ struct PRWeightFunction { explicit PRWeightFunction(const PWeightFunction& pwf = PWeightFunction(), const RWeightFunction& rwf = RWeightFunction()) : pweight(pwf), rweight(rwf) {} - PRPair operator()(const Hypergraph::Edge& e) const { + PRPair operator()(const HG::Edge& e) const { const P p = pweight(e); const R r = rweight(e); return PRPair(p, r * p); diff --git a/decoder/ff.cc b/decoder/ff.cc index 008fcad4..6e276a5e 100644 --- a/decoder/ff.cc +++ b/decoder/ff.cc @@ -1,9 +1,3 @@ -//TODO: non-sparse vector for all feature functions? modelset applymodels keeps track of who has what features? it's nice having FF that could generate a handful out of 10000 possible feats, though. - -//TODO: actually score rule_feature()==true features once only, hash keyed on rule or modify TRule directly? need to keep clear in forest which features come from models vs. rules; then rescoring could drop all the old models features at once - -#include "fast_lexical_cast.hpp" -#include #include "ff.h" #include "tdict.h" @@ -16,8 +10,7 @@ FeatureFunction::~FeatureFunction() {} void FeatureFunction::PrepareForInput(const SentenceMetadata&) {} void FeatureFunction::FinalTraversalFeatures(const void* /* ant_state */, - SparseVector* /* features */) const { -} + SparseVector* /* features */) const {} string FeatureFunction::usage_helper(std::string const& name,std::string const& params,std::string const& details,bool sp,bool sd) { string r=name; @@ -32,188 +25,21 @@ string FeatureFunction::usage_helper(std::string const& name,std::string const& return r; } -Features FeatureFunction::single_feature(WordID feat) { - return Features(1,feat); -} - -Features ModelSet::all_features(std::ostream *warn,bool warn0) { - //return ::all_features(models_,weights_,warn,warn0); -} - -void show_features(Features const& ffs,DenseWeightVector const& weights_,std::ostream &out,std::ostream &warn,bool warn_zero_wt) { - out << "Weight Feature\n"; - for (unsigned i=0;i* final_features) const { + FinalTraversalFeatures(residual_state,final_features); } -void FeatureFunction::TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector& ant_states, - SparseVector* features, - SparseVector* estimated_features, - void* state) const { - throw std::runtime_error("TraversalFeaturesImpl not implemented - override it or TraversalFeaturesLog.\n"); +void FeatureFunction::TraversalFeaturesImpl(const SentenceMetadata&, + const Hypergraph::Edge&, + const std::vector&, + SparseVector*, + SparseVector*, + void*) const { + cerr << "TraversalFeaturesImpl not implemented - override it or TraversalFeaturesLog\n"; abort(); } -void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector& ant_states, - SparseVector* features, - SparseVector* estimated_features, - void* state) const { - (void) smeta; - (void) ant_states; - (void) state; - (void) estimated_features; - features->set_value(fid_, edge.rule_->EWords() * value_); -} - -SourceWordPenalty::SourceWordPenalty(const string& param) : - fid_(FD::Convert("SourceWordPenalty")), - value_(-1.0 / log(10)) { - if (!param.empty()) { - cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl; - } -} - -Features SourceWordPenalty::features() const { - return single_feature(fid_); -} - -Features WordPenalty::features() const { - return single_feature(fid_); -} - - -void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector& ant_states, - SparseVector* features, - SparseVector* estimated_features, - void* state) const { - (void) smeta; - (void) ant_states; - (void) state; - (void) estimated_features; - features->set_value(fid_, edge.rule_->FWords() * value_); -} - -ArityPenalty::ArityPenalty(const std::string& param) : - value_(-1.0 / log(10)) { - string fname = "Arity_"; - unsigned MAX=DEFAULT_MAX_ARITY; - using namespace boost; - if (!param.empty()) - MAX=lexical_cast(param); - for (unsigned i = 0; i <= MAX; ++i) { - WordID fid=FD::Convert(fname+lexical_cast(i)); - fids_.push_back(fid); - } - while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen. doesn't change anything -} - -Features ArityPenalty::features() const { - return Features(fids_.begin(),fids_.end()); -} - -void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector& ant_states, - SparseVector* features, - SparseVector* estimated_features, - void* state) const { - (void) smeta; - (void) ant_states; - (void) state; - (void) estimated_features; - unsigned a=edge.Arity(); - features->set_value(a& w, const vector& models) : - models_(models), - weights_(w), - state_size_(0), - model_state_pos_(models.size()) { - for (int i = 0; i < models_.size(); ++i) { - model_state_pos_[i] = state_size_; - state_size_ += models_[i]->NumBytesContext(); - } -} - -void ModelSet::PrepareForInput(const SentenceMetadata& smeta) { - for (int i = 0; i < models_.size(); ++i) - const_cast(models_[i])->PrepareForInput(smeta); -} - -void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta, - const Hypergraph& /* hg */, - const FFStates& node_states, - Hypergraph::Edge* edge, - FFState* context, - prob_t* combination_cost_estimate) const { - //edge->reset_info(); - context->resize(state_size_); - if (state_size_ > 0) { - memset(&(*context)[0], 0, state_size_); - } - SparseVector est_vals; // only computed if combination_cost_estimate is non-NULL - if (combination_cost_estimate) *combination_cost_estimate = prob_t::One(); - for (int i = 0; i < models_.size(); ++i) { - const FeatureFunction& ff = *models_[i]; - void* cur_ff_context = NULL; - vector ants(edge->tail_nodes_.size()); - bool has_context = ff.NumBytesContext() > 0; - if (has_context) { - int spos = model_state_pos_[i]; - cur_ff_context = &(*context)[spos]; - for (int i = 0; i < ants.size(); ++i) { - ants[i] = &node_states[edge->tail_nodes_[i]][spos]; - } - } - ff.TraversalFeatures(smeta, *edge, ants, &edge->feature_values_, &est_vals, cur_ff_context); - } - if (combination_cost_estimate) - combination_cost_estimate->logeq(est_vals.dot(weights_)); - edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); -} - -void ModelSet::AddFinalFeatures(const FFState& state, Hypergraph::Edge* edge,SentenceMetadata const& smeta) const { - assert(1 == edge->rule_->Arity()); - //edge->reset_info(); - for (int i = 0; i < models_.size(); ++i) { - const FeatureFunction& ff = *models_[i]; - const void* ant_state = NULL; - bool has_context = ff.NumBytesContext() > 0; - if (has_context) { - int spos = model_state_pos_[i]; - ant_state = &state[spos]; - } - ff.FinalTraversalFeatures(smeta, *edge, ant_state, &edge->feature_values_); - } - edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); -} - diff --git a/decoder/ff.h b/decoder/ff.h index 227787ca..4acbb7e3 100644 --- a/decoder/ff.h +++ b/decoder/ff.h @@ -1,26 +1,13 @@ #ifndef _FF_H_ #define _FF_H_ -#define DEBUG_INIT 0 -#if DEBUG_INIT -# include -# define DBGINIT(a) do { std::cerr< +#include #include -#include -#include "fdict.h" -#include "hg.h" -#include "feature_vector.h" -#include "value_array.h" +#include "sparse_vector.h" +namespace HG { struct Edge; struct Node; } +class Hypergraph; class SentenceMetadata; -class FeatureFunction; // see definition below - -typedef std::vector Features; // set of features ids // if you want to develop a new feature, inherit from this class and // override TraversalFeaturesImpl(...). If it's a feature that returns / @@ -30,51 +17,31 @@ class FeatureFunction { friend class ExternalFeature; public: std::string name_; // set by FF factory using usage() - bool debug_; // also set by FF factory checking param for immediate initial "debug" - //called after constructor, but before name_ and debug_ have been set - virtual void Init() { DBGINIT("default FF::Init name="< 0; } + int StateSize() const { return state_size_; } // override this. not virtual because we want to expose this to factory template for help before creating a FF static std::string usage(bool show_params,bool show_details) { return usage_helper("FIXME_feature_needs_name","[no parameters]","[no documentation yet]",show_params,show_details); } static std::string usage_helper(std::string const& name,std::string const& params,std::string const& details,bool show_params,bool show_details); - static Features single_feature(int feat); -public: - - // stateless feature that doesn't depend on source span: override and return true. then your feature can be precomputed over rules. - virtual bool rule_feature() const { return false; } // called once, per input, before any feature calls to TraversalFeatures, etc. // used to initialize sentence-specific data structures virtual void PrepareForInput(const SentenceMetadata& smeta); - //OVERRIDE THIS: - virtual Features features() const { return single_feature(FD::Convert(name_)); } - // returns the number of bytes of context that this feature function will - // (maximally) use. By default, 0 ("stateless" models in Hiero/Joshua). - // NOTE: this value is fixed for the instance of your class, you cannot - // use different amounts of memory for different nodes in the forest. this will be read as soon as you create a ModelSet, then fixed forever on - inline int NumBytesContext() const { return state_size_; } - // Compute the feature values and (if this applies) the estimates of the // feature values when this edge is used incorporated into a larger context inline void TraversalFeatures(const SentenceMetadata& smeta, - Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, + SparseVector* features, + SparseVector* estimated_features, void* out_state) const { - TraversalFeaturesLog(smeta, edge, ant_contexts, + TraversalFeaturesImpl(smeta, edge, ant_contexts, features, estimated_features, out_state); // TODO it's easy for careless feature function developers to overwrite // the end of their state and clobber someone else's memory. These bugs @@ -89,16 +56,13 @@ public: protected: virtual void FinalTraversalFeatures(const void* residual_state, - FeatureVector* final_features) const; + SparseVector* final_features) const; public: //override either this or one of above. virtual void FinalTraversalFeatures(const SentenceMetadata& /* smeta */, - Hypergraph::Edge& /* edge */, // so you can log() + const HG::Edge& /* edge */, const void* residual_state, - FeatureVector* final_features) const { - FinalTraversalFeatures(residual_state,final_features); - } - + SparseVector* final_features) const; protected: // context is a pointer to a buffer of size NumBytesContext() that the @@ -108,191 +72,19 @@ public: // of the particular FeatureFunction class. There is one exception: // equality of the contents (i.e., memcmp) is required to determine whether // two states can be combined. - - // by Log, I mean that the edge is non-const only so you can log to it with INFO_EDGE(edge,msg<<"etc."). most features don't use this so implement the below. it has a different name to allow a default implementation without name hiding when inheriting + overriding just 1. - virtual void TraversalFeaturesLog(const SentenceMetadata& smeta, - Hypergraph::Edge& edge, // this is writable only so you can use log() - const std::vector& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, - void* context) const { - TraversalFeaturesImpl(smeta,edge,ant_contexts,features,estimated_features,context); - } - - // override above or below. virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - Hypergraph::Edge const& edge, + const HG::Edge& edge, const std::vector& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, + SparseVector* features, + SparseVector* estimated_features, void* context) const; // !!! ONLY call this from subclass *CONSTRUCTORS* !!! void SetStateSize(size_t state_size) { state_size_ = state_size; } - int StateSize() const { return state_size_; } - private: - int state_size_; -}; - - -// word penalty feature, for each word on the E side of a rule, -// add value_ -class WordPenalty : public FeatureFunction { - public: - Features features() const; - WordPenalty(const std::string& param); - static std::string usage(bool p,bool d) { - return usage_helper("WordPenalty","","number of target words (local feature)",p,d); - } - bool rule_feature() const { return true; } - protected: - virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, - void* context) const; - private: - const int fid_; - const double value_; -}; - -class SourceWordPenalty : public FeatureFunction { - public: - bool rule_feature() const { return true; } - Features features() const; - SourceWordPenalty(const std::string& param); - static std::string usage(bool p,bool d) { - return usage_helper("SourceWordPenalty","","number of source words (local feature, and meaningless except when input has non-constant number of source words, e.g. segmentation/morphology/speech recognition lattice)",p,d); - } - protected: - virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, - void* context) const; - private: - const int fid_; - const double value_; -}; - -#define DEFAULT_MAX_ARITY 9 -#define DEFAULT_MAX_ARITY_STRINGIZE(x) #x -#define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x) -#define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY) - -class ArityPenalty : public FeatureFunction { - public: - bool rule_feature() const { return true; } - Features features() const; - ArityPenalty(const std::string& param); - static std::string usage(bool p,bool d) { - return usage_helper("ArityPenalty","[MaxArity(default " DEFAULT_MAX_ARITY_STR ")]","Indicator feature Arity_N=1 for rule of arity N (local feature). 0<=N<=MaxArity(default " DEFAULT_MAX_ARITY_STR ")",p,d); - } - - protected: - virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, - const std::vector& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, - void* context) const; - private: - std::vector fids_; - const double value_; -}; - -void show_features(Features const& features,DenseWeightVector const& weights,std::ostream &out,std::ostream &warn,bool warn_zero_wt=true); //show features and weights - -template -Features all_features(std::vector const& models_,DenseWeightVector &weights_,std::ostream *warn=0,bool warn_fid_0=false) { - using namespace std; - Features ffs; -#define WARNFF(x) do { if (warn) { *warn << "WARNING: "<< x << endl; } } while(0) - typedef map FFM; - FFM ff_from; - for (unsigned i=0;iname_; - Features si=models_[i]->features(); - if (si.empty()) { - WARNFF(ffname<<" doesn't yet report any feature IDs - either supply feature weight, or use --no_freeze_feature_set, or implement features() method"); - } - unsigned n0=0; - for (unsigned j=0;j= weights_.size()) - weights_.resize(fid+1); - if (warn_fid_0 || fid) { - pair i_new=ff_from.insert(FFM::value_type(fid,ffname)); - if (i_new.second) { - if (fid) - ffs.push_back(fid); - else - WARNFF("Feature id 0 for "<second); - } - } - } - if (n0) - WARNFF(ffname<<" (models["< -void show_all_features(std::vector const& models_,DenseWeightVector &weights_,std::ostream &out,std::ostream &warn,bool warn_fid_0=true,bool warn_zero_wt=true) { - return show_features(all_features(models_,weights_,&warn,warn_fid_0),weights_,out,warn,warn_zero_wt); -} - -typedef ValueArray FFState; // this is about 10% faster than string. -//typedef std::string FFState; - -//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size) -typedef std::vector FFStates; - -// this class is a set of FeatureFunctions that can be used to score, rescore, -// etc. a (translation?) forest -class ModelSet { - public: - ModelSet(const std::vector& weights, - const std::vector& models); - - // sets edge->feature_values_ and edge->edge_prob_ - // NOTE: edge must not necessarily be in hg.edges_ but its TAIL nodes - // must be. edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone - void AddFeaturesToEdge(const SentenceMetadata& smeta, - const Hypergraph& hg, - const FFStates& node_states, - Hypergraph::Edge* edge, - FFState* residual_context, - prob_t* combination_cost_estimate = NULL) const; - - //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM)) - void AddFinalFeatures(const FFState& residual_context, - Hypergraph::Edge* edge, - SentenceMetadata const& smeta) const; - - // this is called once before any feature functions apply to a hypergraph - // it can be used to initialize sentence-specific data structures - void PrepareForInput(const SentenceMetadata& smeta); - - bool empty() const { return models_.empty(); } - - bool stateless() const { return !state_size_; } - Features all_features(std::ostream *warnings=0,bool warn_fid_zero=false); // this will warn about duplicate features as well (one function overwrites the feature of another). also resizes weights_ so it is large enough to hold the (0) weight for the largest reported feature id. since 0 is a NULL feature id, it's never included. if warn_fid_zero, then even the first 0 id is - void show_features(std::ostream &out,std::ostream &warn,bool warn_zero_wt=true); - private: - std::vector models_; - const std::vector& weights_; int state_size_; - std::vector model_state_pos_; }; #endif diff --git a/decoder/ff_basic.cc b/decoder/ff_basic.cc new file mode 100644 index 00000000..f9404d24 --- /dev/null +++ b/decoder/ff_basic.cc @@ -0,0 +1,80 @@ +#include "ff_basic.h" + +#include "fast_lexical_cast.hpp" +#include "hg.h" + +using namespace std; + +// Hiero and Joshua use log_10(e) as the value, so I do to +WordPenalty::WordPenalty(const string& param) : + fid_(FD::Convert("WordPenalty")), + value_(-1.0 / log(10)) { + if (!param.empty()) { + cerr << "Warning WordPenalty ignoring parameter: " << param << endl; + } +} + +void WordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_states, + SparseVector* features, + SparseVector* estimated_features, + void* state) const { + (void) smeta; + (void) ant_states; + (void) state; + (void) estimated_features; + features->set_value(fid_, edge.rule_->EWords() * value_); +} + + +SourceWordPenalty::SourceWordPenalty(const string& param) : + fid_(FD::Convert("SourceWordPenalty")), + value_(-1.0 / log(10)) { + if (!param.empty()) { + cerr << "Warning SourceWordPenalty ignoring parameter: " << param << endl; + } +} + +void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_states, + SparseVector* features, + SparseVector* estimated_features, + void* state) const { + (void) smeta; + (void) ant_states; + (void) state; + (void) estimated_features; + features->set_value(fid_, edge.rule_->FWords() * value_); +} + + +ArityPenalty::ArityPenalty(const std::string& param) : + value_(-1.0 / log(10)) { + string fname = "Arity_"; + unsigned MAX=DEFAULT_MAX_ARITY; + using namespace boost; + if (!param.empty()) + MAX=lexical_cast(param); + for (unsigned i = 0; i <= MAX; ++i) { + WordID fid=FD::Convert(fname+lexical_cast(i)); + fids_.push_back(fid); + } + while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen. doesn't change anything +} + +void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_states, + SparseVector* features, + SparseVector* estimated_features, + void* state) const { + (void) smeta; + (void) ant_states; + (void) state; + (void) estimated_features; + unsigned a=edge.Arity(); + features->set_value(a& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const; + private: + const int fid_; + const double value_; +}; + +class SourceWordPenalty : public FeatureFunction { + public: + SourceWordPenalty(const std::string& param); + static std::string usage(bool p,bool d) { + return usage_helper("SourceWordPenalty","","number of source words (local feature, and meaningless except when input has non-constant number of source words, e.g. segmentation/morphology/speech recognition lattice)",p,d); + } + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const; + private: + const int fid_; + const double value_; +}; + +#define DEFAULT_MAX_ARITY 9 +#define DEFAULT_MAX_ARITY_STRINGIZE(x) #x +#define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x) +#define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY) + +class ArityPenalty : public FeatureFunction { + public: + ArityPenalty(const std::string& param); + static std::string usage(bool p,bool d) { + return usage_helper("ArityPenalty","[MaxArity(default " DEFAULT_MAX_ARITY_STR ")]","Indicator feature Arity_N=1 for rule of arity N (local feature). 0<=N<=MaxArity(default " DEFAULT_MAX_ARITY_STR ")",p,d); + } + + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const HG::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const; + private: + std::vector fids_; + const double value_; +}; + +#endif diff --git a/decoder/ff_bleu.h b/decoder/ff_bleu.h index 5544920e..344dc788 100644 --- a/decoder/ff_bleu.h +++ b/decoder/ff_bleu.h @@ -20,7 +20,7 @@ class BLEUModel : public FeatureFunction { static std::string usage(bool param,bool verbose); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_charset.cc b/decoder/ff_charset.cc index 472de82b..6429088b 100644 --- a/decoder/ff_charset.cc +++ b/decoder/ff_charset.cc @@ -1,5 +1,7 @@ #include "ff_charset.h" +#include "tdict.h" +#include "hg.h" #include "fdict.h" #include "stringlib.h" @@ -20,8 +22,8 @@ bool ContainsNonLatin(const string& word) { void NonLatinCount::TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, const std::vector& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, + SparseVector* features, + SparseVector* estimated_features, void* context) const { const vector& e = edge.rule_->e(); int count = 0; diff --git a/decoder/ff_charset.h b/decoder/ff_charset.h index b1ad537e..267ef65d 100644 --- a/decoder/ff_charset.h +++ b/decoder/ff_charset.h @@ -13,10 +13,10 @@ class NonLatinCount : public FeatureFunction { NonLatinCount(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, + SparseVector* features, + SparseVector* estimated_features, void* context) const; private: mutable std::map is_non_latin_; diff --git a/decoder/ff_context.cc b/decoder/ff_context.cc index 9de4d737..f2b0e67c 100644 --- a/decoder/ff_context.cc +++ b/decoder/ff_context.cc @@ -5,12 +5,14 @@ #include #include +#include "hg.h" #include "filelib.h" #include "stringlib.h" #include "sentence_metadata.h" #include "lattice.h" #include "fdict.h" #include "verbose.h" +#include "tdict.h" RuleContextFeatures::RuleContextFeatures(const string& param) { // cerr << "initializing RuleContextFeatures with parameters: " << param; diff --git a/decoder/ff_context.h b/decoder/ff_context.h index 89bcb557..19198ec3 100644 --- a/decoder/ff_context.h +++ b/decoder/ff_context.h @@ -14,7 +14,7 @@ class RuleContextFeatures : public FeatureFunction { RuleContextFeatures(const string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc index 252dbf8c..e6f78f84 100644 --- a/decoder/ff_csplit.cc +++ b/decoder/ff_csplit.cc @@ -5,6 +5,7 @@ #include "klm/lm/model.hh" +#include "hg.h" #include "sentence_metadata.h" #include "lattice.h" #include "tdict.h" diff --git a/decoder/ff_csplit.h b/decoder/ff_csplit.h index 38c0c5b8..64d42526 100644 --- a/decoder/ff_csplit.h +++ b/decoder/ff_csplit.h @@ -12,7 +12,7 @@ class BasicCSplitFeatures : public FeatureFunction { BasicCSplitFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -27,7 +27,7 @@ class ReverseCharLMCSplitFeature : public FeatureFunction { ReverseCharLMCSplitFeature(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_dwarf.cc b/decoder/ff_dwarf.cc index 43528405..fe7a472e 100644 --- a/decoder/ff_dwarf.cc +++ b/decoder/ff_dwarf.cc @@ -4,6 +4,7 @@ #include #include #include +#include "hg.h" #include "ff_dwarf.h" #include "dwarf.h" #include "wordid.h" diff --git a/decoder/ff_dwarf.h b/decoder/ff_dwarf.h index 083fcc7c..3d6a7da6 100644 --- a/decoder/ff_dwarf.h +++ b/decoder/ff_dwarf.h @@ -56,7 +56,7 @@ class Dwarf : public FeatureFunction { function word alignments set by 3. */ void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_external.cc b/decoder/ff_external.cc index dbb903d0..dea0e20f 100644 --- a/decoder/ff_external.cc +++ b/decoder/ff_external.cc @@ -1,8 +1,10 @@ #include "ff_external.h" -#include "stringlib.h" #include +#include "stringlib.h" +#include "hg.h" + using namespace std; ExternalFeature::ExternalFeature(const string& param) { @@ -50,8 +52,8 @@ void ExternalFeature::FinalTraversalFeatures(const void* context, void ExternalFeature::TraversalFeaturesImpl(const SentenceMetadata& smeta, const Hypergraph::Edge& edge, const std::vector& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, + SparseVector* features, + SparseVector* estimated_features, void* context) const { ff_ext->TraversalFeaturesImpl(smeta, edge, ant_contexts, features, estimated_features, context); } diff --git a/decoder/ff_external.h b/decoder/ff_external.h index 283e58e8..3e2bee51 100644 --- a/decoder/ff_external.h +++ b/decoder/ff_external.h @@ -13,10 +13,10 @@ class ExternalFeature : public FeatureFunction { SparseVector* features) const; protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, - FeatureVector* features, - FeatureVector* estimated_features, + SparseVector* features, + SparseVector* estimated_features, void* context) const; private: void* lib_handle; diff --git a/decoder/ff_factory.h b/decoder/ff_factory.h index 5eb68c8b..bfdd3257 100644 --- a/decoder/ff_factory.h +++ b/decoder/ff_factory.h @@ -43,7 +43,6 @@ template struct FFFactory : public FactoryBase { FP Create(std::string param) const { FF *ret=new FF(param); - ret->Init(); return FP(ret); } virtual std::string usage(bool params,bool verbose) const { @@ -57,7 +56,6 @@ template struct FsaFactory : public FactoryBase { FP Create(std::string param) const { FF *ret=new FF(param); - ret->Init(); return FP(ret); } virtual std::string usage(bool params,bool verbose) const { @@ -98,8 +96,6 @@ struct FactoryRegistry : public UntypedFactoryRegistry { if (debug) cerr<<"debug enabled for "<(*it->second).Create(param); - res->init_name_debug(ffname,debug); - // could add a res->Init() here instead of in Create if we wanted feature id to potentially differ based on the registered name rather than static usage() - of course, specific feature ids can be computed on the basis of feature param as well; this only affects the default single feature id=name return res; } }; diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc index 09ef282c..fefa90bd 100644 --- a/decoder/ff_klm.cc +++ b/decoder/ff_klm.cc @@ -326,11 +326,6 @@ KLanguageModel::KLanguageModel(const string& param) { SetStateSize(pimpl_->ReserveStateSize()); } -template -Features KLanguageModel::features() const { - return single_feature(fid_); -} - template KLanguageModel::~KLanguageModel() { delete pimpl_; @@ -362,7 +357,6 @@ void KLanguageModel::FinalTraversalFeatures(const void* ant_state, template boost::shared_ptr CreateModel(const std::string ¶m) { KLanguageModel *ret = new KLanguageModel(param); - ret->Init(); return boost::shared_ptr(ret); } diff --git a/decoder/ff_klm.h b/decoder/ff_klm.h index 6efe50f6..b5ceffd0 100644 --- a/decoder/ff_klm.h +++ b/decoder/ff_klm.h @@ -20,10 +20,9 @@ class KLanguageModel : public FeatureFunction { virtual void FinalTraversalFeatures(const void* context, SparseVector* features) const; static std::string usage(bool param,bool verbose); - Features features() const; protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index 5e16d4e3..6ec7b4f3 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -519,10 +519,6 @@ LanguageModel::LanguageModel(const string& param) { SetStateSize(LanguageModelImpl::OrderToStateSize(order)); } -Features LanguageModel::features() const { - return single_feature(fid_); -} - LanguageModel::~LanguageModel() { delete pimpl_; } diff --git a/decoder/ff_lm.h b/decoder/ff_lm.h index ccee4268..94e18f00 100644 --- a/decoder/ff_lm.h +++ b/decoder/ff_lm.h @@ -55,10 +55,9 @@ class LanguageModel : public FeatureFunction { SparseVector* features) const; std::string DebugStateToString(const void* state) const; static std::string usage(bool param,bool verbose); - Features features() const; protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -81,7 +80,7 @@ class LanguageModelRandLM : public FeatureFunction { std::string DebugStateToString(const void* state) const; protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_ngrams.h b/decoder/ff_ngrams.h index 064dbb49..4965d235 100644 --- a/decoder/ff_ngrams.h +++ b/decoder/ff_ngrams.h @@ -17,7 +17,7 @@ class NgramDetector : public FeatureFunction { SparseVector* features) const; protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc index bd4c4cc0..0aafb0ba 100644 --- a/decoder/ff_rules.cc +++ b/decoder/ff_rules.cc @@ -10,6 +10,8 @@ #include "lattice.h" #include "fdict.h" #include "verbose.h" +#include "tdict.h" +#include "hg.h" using namespace std; diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index 48d8bd05..7f5e1dfa 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -3,6 +3,7 @@ #include #include +#include "trule.h" #include "ff.h" #include "array2d.h" #include "wordid.h" @@ -12,7 +13,7 @@ class RuleIdentityFeatures : public FeatureFunction { RuleIdentityFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -27,7 +28,7 @@ class RuleNgramFeatures : public FeatureFunction { RuleNgramFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_ruleshape.cc b/decoder/ff_ruleshape.cc index f56ccfa9..7bb548c4 100644 --- a/decoder/ff_ruleshape.cc +++ b/decoder/ff_ruleshape.cc @@ -1,5 +1,7 @@ #include "ff_ruleshape.h" +#include "trule.h" +#include "hg.h" #include "fdict.h" #include diff --git a/decoder/ff_ruleshape.h b/decoder/ff_ruleshape.h index 23c9827e..9f20faf3 100644 --- a/decoder/ff_ruleshape.h +++ b/decoder/ff_ruleshape.h @@ -9,7 +9,7 @@ class RuleShapeFeatures : public FeatureFunction { RuleShapeFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc index 035132b4..a1997695 100644 --- a/decoder/ff_source_syntax.cc +++ b/decoder/ff_source_syntax.cc @@ -3,6 +3,7 @@ #include #include +#include "hg.h" #include "sentence_metadata.h" #include "array2d.h" #include "filelib.h" diff --git a/decoder/ff_source_syntax.h b/decoder/ff_source_syntax.h index 279563e1..a8c7150a 100644 --- a/decoder/ff_source_syntax.h +++ b/decoder/ff_source_syntax.h @@ -11,7 +11,7 @@ class SourceSyntaxFeatures : public FeatureFunction { ~SourceSyntaxFeatures(); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -28,7 +28,7 @@ class SourceSpanSizeFeatures : public FeatureFunction { ~SourceSpanSizeFeatures(); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_spans.cc b/decoder/ff_spans.cc index 0483517b..0ccac69b 100644 --- a/decoder/ff_spans.cc +++ b/decoder/ff_spans.cc @@ -4,6 +4,8 @@ #include #include +#include "hg.h" +#include "tdict.h" #include "filelib.h" #include "stringlib.h" #include "sentence_metadata.h" diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h index 24e0dede..d2f5e84c 100644 --- a/decoder/ff_spans.h +++ b/decoder/ff_spans.h @@ -12,7 +12,7 @@ class SpanFeatures : public FeatureFunction { SpanFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -49,7 +49,7 @@ class CMR2008ReorderingFeatures : public FeatureFunction { CMR2008ReorderingFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_tagger.cc b/decoder/ff_tagger.cc index fd9210fa..7f9af9cd 100644 --- a/decoder/ff_tagger.cc +++ b/decoder/ff_tagger.cc @@ -2,6 +2,7 @@ #include +#include "hg.h" #include "tdict.h" #include "sentence_metadata.h" #include "stringlib.h" diff --git a/decoder/ff_tagger.h b/decoder/ff_tagger.h index bd5b62c0..46418b0c 100644 --- a/decoder/ff_tagger.h +++ b/decoder/ff_tagger.h @@ -18,7 +18,7 @@ class Tagger_BigramIndicator : public FeatureFunction { Tagger_BigramIndicator(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -39,7 +39,7 @@ class LexicalPairIndicator : public FeatureFunction { virtual void PrepareForInput(const SentenceMetadata& smeta); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -59,7 +59,7 @@ class OutputIndicator : public FeatureFunction { OutputIndicator(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index d7a2dda8..ba3d0b9b 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -13,7 +13,7 @@ class RelativeSentencePosition : public FeatureFunction { RelativeSentencePosition(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -36,7 +36,7 @@ class SourceBigram : public FeatureFunction { void PrepareForInput(const SentenceMetadata& smeta); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -55,7 +55,7 @@ class LexNullJump : public FeatureFunction { LexNullJump(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -72,7 +72,7 @@ class NewJump : public FeatureFunction { NewJump(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -109,7 +109,7 @@ class LexicalTranslationTrigger : public FeatureFunction { LexicalTranslationTrigger(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -132,14 +132,14 @@ class BlunsomSynchronousParseHack : public FeatureFunction { BlunsomSynchronousParseHack(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, void* out_context) const; private: inline bool DoesNotBelong(const void* state) const { - for (int i = 0; i < NumBytesContext(); ++i) { + for (int i = 0; i < StateSize(); ++i) { if (*(static_cast(state) + i)) return false; } return true; @@ -148,9 +148,9 @@ class BlunsomSynchronousParseHack : public FeatureFunction { inline void AppendAntecedentString(const void* state, std::vector* yield) const { int i = 0; int ind = 0; - while (i < NumBytesContext() && !(*(static_cast(state) + i))) { ++i; ind += 8; } - // std::cerr << i << " " << NumBytesContext() << std::endl; - assert(i != NumBytesContext()); + while (i < StateSize() && !(*(static_cast(state) + i))) { ++i; ind += 8; } + // std::cerr << i << " " << StateSize() << std::endl; + assert(i != StateSize()); assert(ind < cur_ref_->size()); int cur = *(static_cast(state) + i); int comp = 1; @@ -171,7 +171,7 @@ class BlunsomSynchronousParseHack : public FeatureFunction { } inline void SetStateMask(int start, int end, void* state) const { - assert((end / 8) < NumBytesContext()); + assert((end / 8) < StateSize()); int i = 0; int comp = 1; for (int j = 0; j < start; ++j) { @@ -209,7 +209,7 @@ class WordPairFeatures : public FeatureFunction { WordPairFeatures(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -226,7 +226,7 @@ class IdentityCycleDetector : public FeatureFunction { IdentityCycleDetector(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -242,7 +242,7 @@ class InputIndicator : public FeatureFunction { InputIndicator(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, @@ -258,7 +258,7 @@ class Fertility : public FeatureFunction { Fertility(const std::string& param); protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ff_wordset.cc b/decoder/ff_wordset.cc index 44468899..70cea7de 100644 --- a/decoder/ff_wordset.cc +++ b/decoder/ff_wordset.cc @@ -1,5 +1,6 @@ #include "ff_wordset.h" +#include "hg.h" #include "fdict.h" #include #include diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h index 7c9a3fb7..639e1514 100644 --- a/decoder/ff_wordset.h +++ b/decoder/ff_wordset.h @@ -2,6 +2,7 @@ #define _FF_WORDSET_H_ #include "ff.h" +#include "tdict.h" #include #include @@ -32,11 +33,9 @@ class WordSet : public FeatureFunction { ~WordSet() { } - Features features() const { return single_feature(fid_); } - protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, - const Hypergraph::Edge& edge, + const HG::Edge& edge, const std::vector& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/ffset.cc b/decoder/ffset.cc new file mode 100644 index 00000000..653a29f8 --- /dev/null +++ b/decoder/ffset.cc @@ -0,0 +1,72 @@ +#include "ffset.h" + +#include "ff.h" +#include "tdict.h" +#include "hg.h" + +using namespace std; + +ModelSet::ModelSet(const vector& w, const vector& models) : + models_(models), + weights_(w), + state_size_(0), + model_state_pos_(models.size()) { + for (int i = 0; i < models_.size(); ++i) { + model_state_pos_[i] = state_size_; + state_size_ += models_[i]->StateSize(); + } +} + +void ModelSet::PrepareForInput(const SentenceMetadata& smeta) { + for (int i = 0; i < models_.size(); ++i) + const_cast(models_[i])->PrepareForInput(smeta); +} + +void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta, + const Hypergraph& /* hg */, + const FFStates& node_states, + HG::Edge* edge, + FFState* context, + prob_t* combination_cost_estimate) const { + //edge->reset_info(); + context->resize(state_size_); + if (state_size_ > 0) { + memset(&(*context)[0], 0, state_size_); + } + SparseVector est_vals; // only computed if combination_cost_estimate is non-NULL + if (combination_cost_estimate) *combination_cost_estimate = prob_t::One(); + for (int i = 0; i < models_.size(); ++i) { + const FeatureFunction& ff = *models_[i]; + void* cur_ff_context = NULL; + vector ants(edge->tail_nodes_.size()); + bool has_context = ff.StateSize() > 0; + if (has_context) { + int spos = model_state_pos_[i]; + cur_ff_context = &(*context)[spos]; + for (int i = 0; i < ants.size(); ++i) { + ants[i] = &node_states[edge->tail_nodes_[i]][spos]; + } + } + ff.TraversalFeatures(smeta, *edge, ants, &edge->feature_values_, &est_vals, cur_ff_context); + } + if (combination_cost_estimate) + combination_cost_estimate->logeq(est_vals.dot(weights_)); + edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); +} + +void ModelSet::AddFinalFeatures(const FFState& state, HG::Edge* edge,SentenceMetadata const& smeta) const { + assert(1 == edge->rule_->Arity()); + //edge->reset_info(); + for (int i = 0; i < models_.size(); ++i) { + const FeatureFunction& ff = *models_[i]; + const void* ant_state = NULL; + bool has_context = ff.StateSize() > 0; + if (has_context) { + int spos = model_state_pos_[i]; + ant_state = &state[spos]; + } + ff.FinalTraversalFeatures(smeta, *edge, ant_state, &edge->feature_values_); + } + edge->edge_prob_.logeq(edge->feature_values_.dot(weights_)); +} + diff --git a/decoder/ffset.h b/decoder/ffset.h new file mode 100644 index 00000000..28aef667 --- /dev/null +++ b/decoder/ffset.h @@ -0,0 +1,57 @@ +#ifndef _FFSET_H_ +#define _FFSET_H_ + +#include +#include "value_array.h" +#include "prob.h" + +namespace HG { struct Edge; struct Node; } +class Hypergraph; +class FeatureFunction; +class SentenceMetadata; +class FeatureFunction; // see definition below + +// TODO let states be dynamically sized +typedef ValueArray FFState; // this is a fixed array, but about 10% faster than string + +//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size) +typedef std::vector FFStates; + +// this class is a set of FeatureFunctions that can be used to score, rescore, +// etc. a (translation?) forest +class ModelSet { + public: + ModelSet(const std::vector& weights, + const std::vector& models); + + // sets edge->feature_values_ and edge->edge_prob_ + // NOTE: edge must not necessarily be in hg.edges_ but its TAIL nodes + // must be. edge features are supposed to be overwritten, not added to (possibly because rule features aren't in ModelSet so need to be left alone + void AddFeaturesToEdge(const SentenceMetadata& smeta, + const Hypergraph& hg, + const FFStates& node_states, + HG::Edge* edge, + FFState* residual_context, + prob_t* combination_cost_estimate = NULL) const; + + //this is called INSTEAD of above when result of edge is goal (must be a unary rule - i.e. one variable, but typically it's assumed that there are no target terminals either (e.g. for LM)) + void AddFinalFeatures(const FFState& residual_context, + HG::Edge* edge, + SentenceMetadata const& smeta) const; + + // this is called once before any feature functions apply to a hypergraph + // it can be used to initialize sentence-specific data structures + void PrepareForInput(const SentenceMetadata& smeta); + + bool empty() const { return models_.empty(); } + + bool stateless() const { return !state_size_; } + + private: + std::vector models_; + const std::vector& weights_; + int state_size_; + std::vector model_state_pos_; +}; + +#endif diff --git a/decoder/grammar_test.cc b/decoder/grammar_test.cc index 4500490a..912f4f12 100644 --- a/decoder/grammar_test.cc +++ b/decoder/grammar_test.cc @@ -10,7 +10,9 @@ #include "tdict.h" #include "grammar.h" #include "bottom_up_parser.h" +#include "hg.h" #include "ff.h" +#include "ffset.h" #include "weights.h" using namespace std; diff --git a/decoder/hg.h b/decoder/hg.h index f53d2fd2..3d8cd9bc 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -490,14 +490,14 @@ private: // for generic Viterbi/Inside algorithms struct EdgeProb { typedef prob_t Weight; - inline const prob_t& operator()(const Hypergraph::Edge& e) const { return e.edge_prob_; } + inline const prob_t& operator()(const HG::Edge& e) const { return e.edge_prob_; } }; struct EdgeSelectEdgeWeightFunction { typedef prob_t Weight; typedef std::vector EdgeMask; EdgeSelectEdgeWeightFunction(const EdgeMask& v) : v_(v) {} - inline prob_t operator()(const Hypergraph::Edge& e) const { + inline prob_t operator()(const HG::Edge& e) const { if (v_[e.id_]) return prob_t::One(); else return prob_t::Zero(); } @@ -507,7 +507,7 @@ private: struct ScaledEdgeProb { ScaledEdgeProb(const double& alpha) : alpha_(alpha) {} - inline prob_t operator()(const Hypergraph::Edge& e) const { return e.edge_prob_.pow(alpha_); } + inline prob_t operator()(const HG::Edge& e) const { return e.edge_prob_.pow(alpha_); } const double alpha_; typedef prob_t Weight; }; @@ -516,7 +516,7 @@ struct ScaledEdgeProb { struct EdgeFeaturesAndProbWeightFunction { typedef SparseVector Weight; typedef Weight Result; //TODO: change Result->Weight everywhere? - inline const Weight operator()(const Hypergraph::Edge& e) const { + inline const Weight operator()(const HG::Edge& e) const { SparseVector res; for (SparseVector::const_iterator it = e.feature_values_.begin(); it != e.feature_values_.end(); ++it) @@ -527,7 +527,7 @@ struct EdgeFeaturesAndProbWeightFunction { struct TransitionCountWeightFunction { typedef double Weight; - inline double operator()(const Hypergraph::Edge& e) const { (void)e; return 1.0; } + inline double operator()(const HG::Edge& e) const { (void)e; return 1.0; } }; #endif diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index 8f604c89..64c6663e 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -28,7 +28,7 @@ struct HGReader : public JSONParser { hg.ConnectEdgeToHeadNode(&hg.edges_[in_edges[i]], node); } } - void CreateEdge(const TRulePtr& rule, FeatureVector* feats, const SmallVectorUnsigned& tail) { + void CreateEdge(const TRulePtr& rule, SparseVector* feats, const SmallVectorUnsigned& tail) { Hypergraph::Edge* edge = hg.AddEdge(rule, tail); feats->swap(edge->feature_values_); edge->i_ = spans[0]; diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h index f73a1d3f..c0377fe8 100644 --- a/decoder/inside_outside.h +++ b/decoder/inside_outside.h @@ -42,7 +42,7 @@ WeightType Inside(const Hypergraph& hg, Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_; const unsigned num_in_edges = in.size(); for (unsigned j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[in[j]]; + const HG::Edge& edge = hg.edges_[in[j]]; WeightType score = weight(edge); for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) { const int tail_node_index = edge.tail_nodes_[k]; @@ -74,7 +74,7 @@ void Outside(const Hypergraph& hg, Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_; const int num_in_edges = in.size(); for (int j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[in[j]]; + const HG::Edge& edge = hg.edges_[in[j]]; WeightType head_and_edge_weight = weight(edge); head_and_edge_weight *= head_node_outside_score; const int num_tail_nodes = edge.tail_nodes_.size(); @@ -138,7 +138,7 @@ struct InsideOutsides { Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_; const int num_in_edges = in.size(); for (int j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[in[j]]; + const HG::Edge& edge = hg.edges_[in[j]]; KType kbar_e = outside[i]; const int num_tail_nodes = edge.tail_nodes_.size(); for (int k = 0; k < num_tail_nodes; ++k) @@ -156,7 +156,7 @@ struct InsideOutsides { const int num_in_edges = in.size(); for (int j = 0; j < num_in_edges; ++j) { int edgei=in[j]; - const Hypergraph::Edge& edge = hg.edges_[edgei]; + const HG::Edge& edge = hg.edges_[edgei]; V x=weight(edge)*outside[i]; const int num_tail_nodes = edge.tail_nodes_.size(); for (int k = 0; k < num_tail_nodes; ++k) diff --git a/decoder/kbest.h b/decoder/kbest.h index 9af3a20e..9a55f653 100644 --- a/decoder/kbest.h +++ b/decoder/kbest.h @@ -48,7 +48,7 @@ namespace KBest { } struct Derivation { - Derivation(const Hypergraph::Edge& e, + Derivation(const HG::Edge& e, const SmallVectorInt& jv, const WeightType& w, const SparseVector& f) : @@ -58,11 +58,11 @@ namespace KBest { feature_values(f) {} // dummy constructor, just for query - Derivation(const Hypergraph::Edge& e, + Derivation(const HG::Edge& e, const SmallVectorInt& jv) : edge(&e), j(jv) {} T yield; - const Hypergraph::Edge* const edge; + const HG::Edge* const edge; const SmallVectorInt j; const WeightType score; const SparseVector feature_values; @@ -82,8 +82,8 @@ namespace KBest { Derivation const* d; explicit EdgeHandle(Derivation const* d) : d(d) { } // operator bool() const { return d->edge; } - operator Hypergraph::Edge const* () const { return d->edge; } -// Hypergraph::Edge const * operator ->() const { return d->edge; } + operator HG::Edge const* () const { return d->edge; } +// HG::Edge const * operator ->() const { return d->edge; } }; EdgeHandle operator()(unsigned t,unsigned taili,EdgeHandle const& parent) const { @@ -158,7 +158,7 @@ namespace KBest { // the yield is computed in LazyKthBest before the derivation is added to D // returns NULL if j refers to derivation numbers larger than the // antecedent structure define - Derivation* CreateDerivation(const Hypergraph::Edge& e, const SmallVectorInt& j) { + Derivation* CreateDerivation(const HG::Edge& e, const SmallVectorInt& j) { WeightType score = w(e); SparseVector feats = e.feature_values_; for (int i = 0; i < e.Arity(); ++i) { @@ -177,7 +177,7 @@ namespace KBest { const Hypergraph::Node& node = g.nodes_[v]; for (unsigned i = 0; i < node.in_edges_.size(); ++i) { - const Hypergraph::Edge& edge = g.edges_[node.in_edges_[i]]; + const HG::Edge& edge = g.edges_[node.in_edges_[i]]; SmallVectorInt jv(edge.Arity(), 0); Derivation* d = CreateDerivation(edge, jv); assert(d); diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index b603e27a..d2c4715c 100644 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -12,6 +12,7 @@ #include "scorer.h" #include "hg.h" #include "ff_factory.h" +#include "ffset.h" #include "ff_bleu.h" #include "sparse_vector.h" #include "viterbi.h" @@ -26,7 +27,7 @@ struct Translation { typedef std::vector Sentence; Sentence sentence; - FeatureVector features; + SparseVector features; Translation() { } Translation(Hypergraph const& hg,WeightVector *feature_weights=0) { @@ -57,14 +58,14 @@ struct Oracle { } // feature 0 will be the error rate in fear and hope // move toward hope - FeatureVector ModelHopeGradient() const { - FeatureVector r=hope.features-model.features; + SparseVector ModelHopeGradient() const { + SparseVector r=hope.features-model.features; r.set_value(0,0); return r; } // move toward hope from fear - FeatureVector FearHopeGradient() const { - FeatureVector r=hope.features-fear.features; + SparseVector FearHopeGradient() const { + SparseVector r=hope.features-fear.features; r.set_value(0,0); return r; } diff --git a/decoder/program_options.h b/decoder/program_options.h index 87afb320..3cd7649a 100644 --- a/decoder/program_options.h +++ b/decoder/program_options.h @@ -94,7 +94,7 @@ struct any_printer : public boost::function {} template - explicit any_printer(T const* tag) : F(typed_print()) { + explicit any_printer(T const*) : F(typed_print()) { } template diff --git a/decoder/tromble_loss.h b/decoder/tromble_loss.h index 599a2d54..fde33100 100644 --- a/decoder/tromble_loss.h +++ b/decoder/tromble_loss.h @@ -28,7 +28,7 @@ class TrombleLossComputer : private boost::base_from_member& ant_contexts, SparseVector* features, SparseVector* estimated_features, diff --git a/decoder/viterbi.cc b/decoder/viterbi.cc index 1b9c6665..9e381ac6 100644 --- a/decoder/viterbi.cc +++ b/decoder/viterbi.cc @@ -139,8 +139,8 @@ inline bool close_enough(double a,double b,double epsilon) return diff<=epsilon*fabs(a) || diff<=epsilon*fabs(b); } -FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) { - FeatureVector r; +SparseVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights,bool fatal_dotprod_disagreement) { + SparseVector r; const prob_t p = Viterbi(hg, &r); if (weights) { double logp=log(p); diff --git a/decoder/viterbi.h b/decoder/viterbi.h index 03e961a2..a8a0ea7f 100644 --- a/decoder/viterbi.h +++ b/decoder/viterbi.h @@ -14,10 +14,10 @@ std::string viterbi_stats(Hypergraph const& hg, std::string const& name="forest" //TODO: make T a typename inside Traversal and WeightType a typename inside WeightFunction? // Traversal must implement: // typedef T Result; -// void operator()(Hypergraph::Edge const& e,const vector& ants, Result* result) const; +// void operator()(HG::Edge const& e,const vector& ants, Result* result) const; // WeightFunction must implement: // typedef prob_t Weight; -// Weight operator()(Hypergraph::Edge const& e) const; +// Weight operator()(HG::Edge const& e) const; template typename WeightFunction::Weight Viterbi(const Hypergraph& hg, typename Traversal::Result* result, @@ -39,9 +39,9 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg, *cur_node_best_weight = WeightType(1); continue; } - Hypergraph::Edge const* edge_best=0; + HG::Edge const* edge_best=0; for (unsigned j = 0; j < num_in_edges; ++j) { - const Hypergraph::Edge& edge = hg.edges_[cur_node.in_edges_[j]]; + const HG::Edge& edge = hg.edges_[cur_node.in_edges_[j]]; WeightType score = weight(edge); for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) score *= vit_weight[edge.tail_nodes_[k]]; @@ -51,7 +51,7 @@ typename WeightFunction::Weight Viterbi(const Hypergraph& hg, } } assert(edge_best); - Hypergraph::Edge const& edgeb=*edge_best; + HG::Edge const& edgeb=*edge_best; std::vector antsb(edgeb.tail_nodes_.size()); for (unsigned k = 0; k < edgeb.tail_nodes_.size(); ++k) antsb[k] = &vit_result[edgeb.tail_nodes_[k]]; @@ -98,7 +98,7 @@ prob_t Viterbi(const Hypergraph& hg, struct PathLengthTraversal { typedef int Result; - void operator()(const Hypergraph::Edge& edge, + void operator()(const HG::Edge& edge, const std::vector& ants, int* result) const { (void) edge; @@ -109,7 +109,7 @@ struct PathLengthTraversal { struct ESentenceTraversal { typedef std::vector Result; - void operator()(const Hypergraph::Edge& edge, + void operator()(const HG::Edge& edge, const std::vector& ants, Result* result) const { edge.rule_->ESubstitute(ants, result); @@ -118,7 +118,7 @@ struct ESentenceTraversal { struct ELengthTraversal { typedef int Result; - void operator()(const Hypergraph::Edge& edge, + void operator()(const HG::Edge& edge, const std::vector& ants, int* result) const { *result = edge.rule_->ELength() - edge.rule_->Arity(); @@ -128,7 +128,7 @@ struct ELengthTraversal { struct FSentenceTraversal { typedef std::vector Result; - void operator()(const Hypergraph::Edge& edge, + void operator()(const HG::Edge& edge, const std::vector& ants, Result* result) const { edge.rule_->FSubstitute(ants, result); @@ -142,7 +142,7 @@ struct ETreeTraversal { const std::string space; const std::string right; typedef std::vector Result; - void operator()(const Hypergraph::Edge& edge, + void operator()(const HG::Edge& edge, const std::vector& ants, Result* result) const { Result tmp; @@ -162,7 +162,7 @@ struct FTreeTraversal { const std::string space; const std::string right; typedef std::vector Result; - void operator()(const Hypergraph::Edge& edge, + void operator()(const HG::Edge& edge, const std::vector& ants, Result* result) const { Result tmp; @@ -177,8 +177,8 @@ struct FTreeTraversal { }; struct ViterbiPathTraversal { - typedef std::vector Result; - void operator()(const Hypergraph::Edge& edge, + typedef std::vector Result; + void operator()(const HG::Edge& edge, std::vector const& ants, Result* result) const { for (unsigned i = 0; i < ants.size(); ++i) @@ -189,8 +189,8 @@ struct ViterbiPathTraversal { }; struct FeatureVectorTraversal { - typedef FeatureVector Result; - void operator()(Hypergraph::Edge const& edge, + typedef SparseVector Result; + void operator()(HG::Edge const& edge, std::vector const& ants, Result* result) const { for (unsigned i = 0; i < ants.size(); ++i) @@ -210,6 +210,6 @@ int ViterbiELength(const Hypergraph& hg); int ViterbiPathLength(const Hypergraph& hg); /// if weights supplied, assert viterbi prob = features.dot(*weights) (exception if fatal, cerr warn if not). return features (sum over all edges in viterbi derivation) -FeatureVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights=0,bool fatal_dotprod_disagreement=false); +SparseVector ViterbiFeatures(Hypergraph const& hg,WeightVector const* weights=0,bool fatal_dotprod_disagreement=false); #endif diff --git a/example_extff/ff_example.cc b/example_extff/ff_example.cc index 51ebf364..4e478ecd 100644 --- a/example_extff/ff_example.cc +++ b/example_extff/ff_example.cc @@ -2,6 +2,8 @@ #include #include +#include "hg.h" + using namespace std; // example of a "stateful" feature made available as an external library -- cgit v1.2.3 From 0c54220adfaada6ad1e2d54f31a9895da35127fd Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 5 Nov 2012 18:57:39 +0100 Subject: build fix, default learning rate --- decoder/ff_rules.h | 1 + dtrain/dtrain.cc | 4 +- dtrain/dtrain.h | 3 +- dtrain/test/example/dtrain.ini | 8 +-- dtrain/test/example/expected-output | 128 ++++++++++++++---------------------- 5 files changed, 59 insertions(+), 85 deletions(-) (limited to 'decoder/ff_rules.h') diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index dc9a15d5..b100ec34 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -5,6 +5,7 @@ #include #include "trule.h" #include "ff.h" +#include "hg.h" #include "array2d.h" #include "wordid.h" diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index b7a4bb6f..18286668 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -24,13 +24,13 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg) ("pair_threshold", po::value()->default_value(0.), "bleu [0,1] threshold to filter pairs") ("N", po::value()->default_value(4), "N for Ngrams (BLEU)") ("scorer", po::value()->default_value("stupid_bleu"), "scoring: bleu, stupid_, smooth_, approx_, lc_") - ("learning_rate", po::value()->default_value(0.0001), "learning rate") + ("learning_rate", po::value()->default_value(1.0), "learning rate") ("gamma", po::value()->default_value(0.), "gamma for SVM (0 for perceptron)") ("select_weights", po::value()->default_value("last"), "output best, last, avg weights ('VOID' to throw away)") ("rescale", po::value()->zero_tokens(), "rescale weight vector after each input") ("l1_reg", po::value()->default_value("none"), "apply l1 regularization as in 'Tsuroka et al' (2010)") ("l1_reg_strength", po::value(), "l1 regularization strength") - ("fselect", po::value()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPL") // TODO + ("fselect", po::value()->default_value(-1), "select top x percent (or by threshold) of features after each epoch NOT IMPLEMENTED") // TODO ("approx_bleu_d", po::value()->default_value(0.9), "discount for approx. BLEU") ("scale_bleu_diff", po::value()->zero_tokens(), "learning rate <- bleu diff of a misranked pair") ("loss_margin", po::value()->default_value(0.), "update if no error in pref pair but model scores this near") diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h index 7e084a79..4b6f415c 100644 --- a/dtrain/dtrain.h +++ b/dtrain/dtrain.h @@ -3,7 +3,7 @@ #undef DTRAIN_FASTER_PERCEPTRON // only look at misranked pairs // DO NOT USE WITH SVM! -#define DTRAIN_LOCAL +//#define DTRAIN_LOCAL #define DTRAIN_DOTS 10 // after how many inputs to display a '.' #define DTRAIN_GRAMMAR_DELIM "########EOS########" #define DTRAIN_SCALE 100000 @@ -22,7 +22,6 @@ #include "filelib.h" - using namespace std; using namespace dtrain; namespace po = boost::program_options; diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index 8338b2d3..72d50ca1 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -1,18 +1,18 @@ input=test/example/nc-wmt11.1k.gz # use '-' for STDIN output=- # a weights file (add .gz for gzip compression) or STDOUT '-' -select_weights=VOID # don't output weights +select_weights=VOID # don't output weights decoder_config=test/example/cdec.ini # config for cdec # weights for these features will be printed on each iteration print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough tmp=/tmp -stop_after=100 # stop epoch after 10 inputs +stop_after=10 # stop epoch after 10 inputs # interesting stuff -epochs=3 # run over input 3 times +epochs=2 # run over input 2 times k=100 # use 100best lists N=4 # optimize (approx) BLEU4 scorer=stupid_bleu # use 'stupid' BLEU+1 -learning_rate=0.0001 # learning rate +learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron) gamma=0 # use SVM reg sample_from=kbest # use kbest lists (as opposed to forest) filter=uniq # only unique entries in kbest (surface form) diff --git a/dtrain/test/example/expected-output b/dtrain/test/example/expected-output index 43798484..05326763 100644 --- a/dtrain/test/example/expected-output +++ b/dtrain/test/example/expected-output @@ -4,17 +4,17 @@ Reading test/example/nc-wmt11.en.srilm.gz ----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 **************************************************************************************************** Example feature: Shape_S00000_T00000 -Seeding random number sequence to 2108658507 +Seeding random number sequence to 2912000813 dtrain Parameters: k 100 N 4 - T 3 + T 2 scorer 'stupid_bleu' sample from 'kbest' filter 'uniq' - learning rate 0.0001 + learning rate 1 gamma 0 loss margin 0 pairs 'XYX' @@ -26,90 +26,64 @@ Parameters: cdec cfg 'test/example/cdec.ini' input 'test/example/nc-wmt11.1k.gz' output '-' - stop_after 100 + stop_after 10 (a dot represents 10 inputs) -Iteration #1 of 3. - .......... 100 -Stopping after 100 input sentences. +Iteration #1 of 2. + . 10 +Stopping after 10 input sentences. WEIGHTS - Glue = -0.236 - WordPenalty = +0.056111 - LanguageModel = +0.71011 - LanguageModel_OOV = -0.489 - PhraseModel_0 = -0.21332 - PhraseModel_1 = -0.13038 - PhraseModel_2 = +0.085148 - PhraseModel_3 = -0.16982 - PhraseModel_4 = -0.026332 - PhraseModel_5 = +0.2133 - PhraseModel_6 = +0.1002 - PassThrough = -0.5541 + Glue = -637 + WordPenalty = +1064 + LanguageModel = +1175.3 + LanguageModel_OOV = -1437 + PhraseModel_0 = +1935.6 + PhraseModel_1 = +2499.3 + PhraseModel_2 = +964.96 + PhraseModel_3 = +1410.8 + PhraseModel_4 = -5977.9 + PhraseModel_5 = +522 + PhraseModel_6 = +1089 + PassThrough = -1308 --- - 1best avg score: 0.16928 (+0.16928) - 1best avg model score: 2.4454 (+2.4454) - avg # pairs: 1616.2 - avg # rank err: 769.6 + 1best avg score: 0.16963 (+0.16963) + 1best avg model score: 64485 (+64485) + avg # pairs: 1494.4 + avg # rank err: 702.6 avg # margin viol: 0 - non0 feature count: 4068 - avg list sz: 96.65 - avg f count: 118.01 -(time 1.3 min, 0.79 s/S) + non0 feature count: 528 + avg list sz: 85.7 + avg f count: 102.75 +(time 0.083 min, 0.5 s/S) -Iteration #2 of 3. - .......... 100 +Iteration #2 of 2. + . 10 WEIGHTS - Glue = -0.1721 - WordPenalty = -0.14132 - LanguageModel = +0.56023 - LanguageModel_OOV = -0.6786 - PhraseModel_0 = +0.14155 - PhraseModel_1 = +0.34218 - PhraseModel_2 = +0.22954 - PhraseModel_3 = -0.24762 - PhraseModel_4 = -0.25848 - PhraseModel_5 = -0.0453 - PhraseModel_6 = -0.0264 - PassThrough = -0.7436 + Glue = -1196 + WordPenalty = +809.52 + LanguageModel = +3112.1 + LanguageModel_OOV = -1464 + PhraseModel_0 = +3895.5 + PhraseModel_1 = +4683.4 + PhraseModel_2 = +1092.8 + PhraseModel_3 = +1079.6 + PhraseModel_4 = -6827.7 + PhraseModel_5 = -888 + PhraseModel_6 = +142 + PassThrough = -1335 --- - 1best avg score: 0.19585 (+0.02657) - 1best avg model score: -16.311 (-18.757) - avg # pairs: 1475.8 - avg # rank err: 668.48 + 1best avg score: 0.277 (+0.10736) + 1best avg model score: -3110.5 (-67595) + avg # pairs: 1144.2 + avg # rank err: 529.1 avg # margin viol: 0 - non0 feature count: 6300 - avg list sz: 96.08 - avg f count: 114.92 -(time 1.3 min, 0.76 s/S) - -Iteration #3 of 3. - .......... 100 -WEIGHTS - Glue = -0.1577 - WordPenalty = -0.086902 - LanguageModel = +0.30136 - LanguageModel_OOV = -0.7848 - PhraseModel_0 = +0.11743 - PhraseModel_1 = +0.11142 - PhraseModel_2 = -0.0053865 - PhraseModel_3 = -0.18731 - PhraseModel_4 = -0.67144 - PhraseModel_5 = +0.1236 - PhraseModel_6 = -0.2665 - PassThrough = -0.8498 - --- - 1best avg score: 0.20034 (+0.0044978) - 1best avg model score: -7.2775 (+9.0336) - avg # pairs: 1578.6 - avg # rank err: 705.77 - avg # margin viol: 0 - non0 feature count: 7313 - avg list sz: 96.84 - avg f count: 124.48 -(time 1.5 min, 0.9 s/S) + non0 feature count: 859 + avg list sz: 74.9 + avg f count: 112.84 +(time 0.067 min, 0.4 s/S) Writing weights file to '-' ... done --- -Best iteration: 3 [SCORE 'stupid_bleu'=0.20034]. -This took 4.0833 min. +Best iteration: 2 [SCORE 'stupid_bleu'=0.277]. +This took 0.15 min. -- cgit v1.2.3 From 4201c2acfc03c5a0d8ae6a82628e18046020d873 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 23 Mar 2013 23:09:37 -0400 Subject: fix rules features --- decoder/ff_rules.cc | 20 ++++++++++++++++---- decoder/ff_rules.h | 1 + 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'decoder/ff_rules.h') diff --git a/decoder/ff_rules.cc b/decoder/ff_rules.cc index 6716d3da..410e083c 100644 --- a/decoder/ff_rules.cc +++ b/decoder/ff_rules.cc @@ -107,7 +107,12 @@ void RuleSourceBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& sme (*features) += it->second; } -RuleTargetBigramFeatures::RuleTargetBigramFeatures(const std::string& param) { +RuleTargetBigramFeatures::RuleTargetBigramFeatures(const std::string& param) : inds(1000) { + for (unsigned i = 0; i < inds.size(); ++i) { + ostringstream os; + os << (i + 1); + inds[i] = os.str(); + } } void RuleTargetBigramFeatures::PrepareForInput(const SentenceMetadata& smeta) { @@ -126,11 +131,18 @@ void RuleTargetBigramFeatures::TraversalFeaturesImpl(const SentenceMetadata& sme it = rule2_feats_.insert(make_pair(&rule, SparseVector())).first; SparseVector& f = it->second; string prev = ""; + vector nt_types(rule.Arity()); + unsigned ntc = 0; + for (int i = 0; i < rule.f_.size(); ++i) + if (rule.f_[i] < 0) nt_types[ntc++] = -rule.f_[i]; for (int i = 0; i < rule.e_.size(); ++i) { WordID w = rule.e_[i]; - if (w < 0) w = -w; - if (w == 0) return; - const string& cur = TD::Convert(w); + string cur; + if (w > 0) { + cur = TD::Convert(w); + } else { + cur = TD::Convert(nt_types[-w]) + inds[-w]; + } ostringstream os; os << "RBT:" << prev << '_' << cur; const int fid = FD::Convert(Escape(os.str())); diff --git a/decoder/ff_rules.h b/decoder/ff_rules.h index b100ec34..f210dc65 100644 --- a/decoder/ff_rules.h +++ b/decoder/ff_rules.h @@ -51,6 +51,7 @@ class RuleTargetBigramFeatures : public FeatureFunction { void* context) const; virtual void PrepareForInput(const SentenceMetadata& smeta); private: + std::vector inds; mutable std::map > rule2_feats_; }; -- cgit v1.2.3