From e10981b37ecc42cafca3e6d05e1eb44602b213b3 Mon Sep 17 00:00:00 2001 From: graehl Date: Thu, 19 Aug 2010 04:46:18 +0000 Subject: ValueArray instead of string for state is 10% faster git-svn-id: https://ws10smt.googlecode.com/svn/trunk@599 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/apply_fsa_models.cc | 157 +++++++++++++++++++++++++++++++++++++------- decoder/cdec.cc | 2 + decoder/cfg.cc | 4 +- decoder/decode.sh | 2 +- decoder/ff.h | 5 +- utils/hash.h | 28 ++++++++ utils/maybe_update_bound.h | 17 +++++ 7 files changed, 189 insertions(+), 26 deletions(-) create mode 100755 utils/maybe_update_bound.h diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc index b43c02a4..7cd5fc6d 100755 --- a/decoder/apply_fsa_models.cc +++ b/decoder/apply_fsa_models.cc @@ -1,3 +1,4 @@ +#include "maybe_update_bound.h" #include "apply_fsa_models.h" #include "hg.h" #include "ff_fsa_dynamic.h" @@ -20,6 +21,7 @@ using namespace std; //impl details (not exported). flat namespace for my ease. +typedef CFG::RHS RHS; typedef CFG::BinRhs BinRhs; typedef CFG::NTs NTs; typedef CFG::NT NT; @@ -50,7 +52,7 @@ with 3, we predict all sorts of useless items - that won't give us our goal A an */ #define TRIE_START_LHS 1 -// option 1) above. 0 would be option 3), which is dumb +// 1 is option 1) above. 0 would be option 3), which is dumb // if we don't greedy-binarize, we want to encode recognized prefixes p (X -> p . rest) efficiently. if we're doing this, we may as well also push costs so we can best-first select rules in a lazy fashion. this is effectively left-branching binarization, of course. template @@ -61,58 +63,169 @@ struct fsa_map_type { #define FSA_MAP(k,v) fsa_map_type >::type typedef WordID LHS; // so this will be -NTHandle. +struct get_second { + template + typename P::second_type const& operator()(P const& p) const { + return p.second; + } +}; + +struct PrefixTrieNode; +struct PrefixTrieEdge { + prob_t p;// viterbi additional prob, i.e. product over path incl. p_final = total rule prob + //DPFSA() + // we can probably just store deltas, but for debugging remember the full p + // prob_t delta; // + PrefixTrieNode *dest; + WordID w; // for lhs, this will be positive NTHandle instead + + // for sorting most probable first in adj; actually >(p) + inline bool operator <(PrefixTrieEdge const& o) const { + return o.p Adj; -// typedef vector Adj; + + typedef FSA_MAP(WordID,PrefixTrieEdge) PrefixTrieEdgeFor; +public: + PrefixTrieEdgeFor edge_for; //TODO: move builder elsewhere? then need 2nd hash or edge include pointer to builder. just clear this later + void index_adj() { + index_adj(edge_for); + } + + template + void index_adj(M &m) { + m.clear(); + for (int i=0;idone_building_r(); + } + + // for done_building; compute incremental (telescoped) edge p + PrefixTrieEdge const& operator()(PrefixTrieEdgeFor::value_type &pair) const { + PrefixTrieEdge &e=pair.second; + e.p=(e.dest->p)/p; + return e; + } + + // call only once. + void done_building() { + adj.reinit_map(edge_for.begin(),edge_for.end(),*this); + if (final) + p_final/=p; + std::sort(adj.begin(),adj.end()); + } + + typedef ValueArray Adj; +// typedef vector Adj; Adj adj; - //TODO: + + typedef WordID W; + typedef NTHandle N; // not negative + typedef W const* RI; + + // let's compute p_min so that every rule reachable from the created node has p at least this low. + PrefixTrieNode *improve_edge(PrefixTrieEdge const& e,prob_t rulep) { + PrefixTrieNode *d=e.dest; + maybe_increase_max(d->p,rulep); + return d; + } + + PrefixTrieNode *build(W w,prob_t rulep) { + PrefixTrieEdgeFor::iterator i=edge_for.find(w); + if (i!=edge_for.end()) + return improve_edge(i->second,rulep); + PrefixTrieEdge &e=edge_for[w]; + return e.dest=new PrefixTrieNode(rulep); + } + + void set_final(prob_t pf) { + final=true;p_final=pf; + } + +#ifdef HAVE_TAIL_RECURSE + // add string i...end + void build(RI i,RI end, prob_t rulep) { + if (i==end) { + set_final(rulep); + } else + // tail recursion: + build(*i)->build(i+1,end,rulep); + } +#endif }; #if TRIE_START_LHS -//Trie starts with lhs, then continues w/ rhs +//Trie starts with lhs (nonneg index), then continues w/ rhs (mixed >0 word, else NT) #else // just rhs. i think item names should exclude lhs if possible (most sharing). get prefix cost w/ forward = viterbi (global best-first admissable h only) and it should be ok? #endif // costs are pushed. struct PrefixTrie { - CFG const* cfgp; + CFG *cfgp; + Rules const* rulesp; + Rules const& rules() const { return *rulesp; } CFG const& cfg() const { return *cfgp; } - PrefixTrie(CFG const& cfg) : cfgp(&cfg) { + PrefixTrieNode root; + PrefixTrie(CFG &cfg) : cfgp(&cfg),rulesp(&cfg.rules) { +// cfg.SortLocalBestFirst(); // instead we'll sort in done_building_r + cfg.VisitRuleIds(*this); + root.done_building_r(); + root.index_adj(); // maybe the index we use for building trie should be replaced by a much larger/faster table since we look up by lhs many times in parsing? //TODO: } + void operator()(int ri) const { + Rule const& r=rules()[ri]; + prob_t p=r.p; + PrefixTrieNode *n=const_cast(root).build(r.lhs,p); + for (RHS::const_iterator i=r.rhs.begin(),e=r.rhs.end();;++i) { + if (i==e) { + n->set_final(p); + break; + } + n=n->build(*i,p); + } +#ifdef HAVE_TAIL_RECURSE + root.build(r.lhs,r.p)->build(r.rhs,r.p); +#endif + } }; // these should go in a global best-first queue struct Item { prob_t forward; + // NOTE: sum = viterbi (max) /* The forward probability alpha_i(X[k]->x.y) is the sum of the probabilities of all constrained paths of length that end in state X[k]->x.y*/ prob_t inner; /* The inner probability beta_i(X[k]->x.y) is the sum of the probabilities of all paths of length i-k that start in state X[k,k]->.xy and end in X[k,i]->x.y, and generate the input symbols x[k,...,i-1] */ - + PrefixTrieNode *dot; // dot is a function of the stuff already recognized, and gives a set of suffixes y to complete to finish a rhs for lhs() -> dot y + NTHandle lhs() const { return dot->lhs; } }; }//anon ns @@ -166,7 +279,7 @@ void ApplyFsa::ApplyBottomUp() void ApplyFsa::ApplyEarley() { hgcfg.GiveCFG(cfg); - cfg.SortLocalBestFirst(); + PrefixTrie rt(cfg); // don't need to uniq - option to do that already exists in cfg_options //TODO: } diff --git a/decoder/cdec.cc b/decoder/cdec.cc index a4c3613b..ca6284f6 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -881,5 +881,7 @@ int main(int argc, char** argv) { cout << "0\t**OBJ**=" << acc_obj << ';' << acc_vec << endl << flush; } } + exit(0); // maybe this will save some destruction overhead. or g++ without cxx_atexit needed? + return 0; } diff --git a/decoder/cfg.cc b/decoder/cfg.cc index c02f46ec..b2219193 100755 --- a/decoder/cfg.cc +++ b/decoder/cfg.cc @@ -308,7 +308,7 @@ struct add_virtual_rules { //TODO: don't actually build substrings of rhs; define key type that stores ref to rhs in new_nts by index (because it may grow), and also allows an int* [b,e) range to serve as key (i.e. don't insert the latter type of key). int n=rhs.size(); if (n<=2) return 0; - int longest1=1; + int longest1=1; // all this other stuff is not uninitialized when used, based on checking this and other things (it's complicated, learn to prove theorems, gcc) int mid=n/2; int best_k; enum {HAVE_L=-1,HAVE_NONE=0,HAVE_R=1}; @@ -445,7 +445,7 @@ void CFG::BinarizeSplit(CFGBinarize const& b) { Rule &r=newr[i];expr; } // NOTE: must use indices since we'll be adding rules as we iterate. int n_changed_total=0; - int n_changed; + int n_changed=0; // quiets a warning #define CFG_SPLIT_PASS(N,free,just1) \ for (int pass=0;pass const& models_,DenseWeightVector &weight return show_features(all_features(models_,weights_,&warn,warn_fid_0),weights_,out,warn,warn_zero_wt); } -typedef std::string FFState; //FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size) +typedef ValueArray FFState; // this is about 10% faster than string. +//typedef std::string FFState; + +//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size) typedef std::vector FFStates; // this class is a set of FeatureFunctions that can be used to score, rescore, diff --git a/utils/hash.h b/utils/hash.h index fbe10b4e..7e38bb2c 100755 --- a/utils/hash.h +++ b/utils/hash.h @@ -60,6 +60,34 @@ typename H::mapped_type & get_default(H &ht,K const& k,typename H::mapped_type c return const_cast(ht.insert(typename H::value_type(k,v)).first->second); } +// get_or_construct w/ no arg: just use ht[k] +template +typename H::mapped_type & get_or_construct(H &ht,K const& k,C0 const& c0) { + typedef typename H::mapped_type V; + typedef typename H::value_type KV; + typename H::iterator_type i=ht.find(k); + if (i==ht.end()) { + return const_cast(ht.insert(KV(k,V(c0))).first->second); + } else { + return i->second; + } +} + + +// get_or_call (0 arg) +template +typename H::mapped_type & get_or_call(H &ht,K const& k,F const& f) { + typedef typename H::mapped_type V; + typedef typename H::value_type KV; + typename H::iterator_type i=ht.find(k); + if (i==ht.end()) { + return const_cast(ht.insert(KV(k,f())).first->second); + } else { + return i->second; + } +} + + // the below could also return a ref to the mapped max/min. they have the advantage of not falsely claiming an improvement when an equal value already existed. otherwise you could just modify the get_default and if equal assume new. template bool improve_mapped_max(H &ht,K const& k,typename H::mapped_type const& v) { diff --git a/utils/maybe_update_bound.h b/utils/maybe_update_bound.h new file mode 100755 index 00000000..d57215d0 --- /dev/null +++ b/utils/maybe_update_bound.h @@ -0,0 +1,17 @@ +#ifndef MAYBE_UPDATE_BOUND_H +#define MAYBE_UPDATE_BOUND_H + +template +inline void maybe_increase_max(To &to,const From &from) { + if (to +inline void maybe_decrease_min(To &to,const From &from) { + if (from