diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-08-19 04:46:18 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-08-19 04:46:18 +0000 |
commit | e10981b37ecc42cafca3e6d05e1eb44602b213b3 (patch) | |
tree | 3adb3e239c0fddc6dfa7850e21c7ae4ab3d64e6c | |
parent | 1f196ed477d4ad445a00bb1836dd51d3507e063d (diff) |
ValueArray instead of string for state is 10% faster
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@599 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-x | decoder/apply_fsa_models.cc | 157 | ||||
-rw-r--r-- | decoder/cdec.cc | 2 | ||||
-rwxr-xr-x | decoder/cfg.cc | 4 | ||||
-rwxr-xr-x | decoder/decode.sh | 2 | ||||
-rw-r--r-- | decoder/ff.h | 5 | ||||
-rwxr-xr-x | utils/hash.h | 28 | ||||
-rwxr-xr-x | utils/maybe_update_bound.h | 17 |
7 files changed, 189 insertions, 26 deletions
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc index b43c02a4..7cd5fc6d 100755 --- a/decoder/apply_fsa_models.cc +++ b/decoder/apply_fsa_models.cc @@ -1,3 +1,4 @@ +#include "maybe_update_bound.h" #include "apply_fsa_models.h" #include "hg.h" #include "ff_fsa_dynamic.h" @@ -20,6 +21,7 @@ using namespace std; //impl details (not exported). flat namespace for my ease. +typedef CFG::RHS RHS; typedef CFG::BinRhs BinRhs; typedef CFG::NTs NTs; typedef CFG::NT NT; @@ -50,7 +52,7 @@ with 3, we predict all sorts of useless items - that won't give us our goal A an */ #define TRIE_START_LHS 1 -// option 1) above. 0 would be option 3), which is dumb +// 1 is option 1) above. 0 would be option 3), which is dumb // if we don't greedy-binarize, we want to encode recognized prefixes p (X -> p . rest) efficiently. if we're doing this, we may as well also push costs so we can best-first select rules in a lazy fashion. this is effectively left-branching binarization, of course. template <class K,class V,class Hash> @@ -61,58 +63,169 @@ struct fsa_map_type { #define FSA_MAP(k,v) fsa_map_type<k,v,boost::hash<k> >::type typedef WordID LHS; // so this will be -NTHandle. +struct get_second { + template <class P> + typename P::second_type const& operator()(P const& p) const { + return p.second; + } +}; + +struct PrefixTrieNode; +struct PrefixTrieEdge { + prob_t p;// viterbi additional prob, i.e. product over path incl. p_final = total rule prob + //DPFSA() + // we can probably just store deltas, but for debugging remember the full p + // prob_t delta; // + PrefixTrieNode *dest; + WordID w; // for lhs, this will be positive NTHandle instead + + // for sorting most probable first in adj; actually >(p) + inline bool operator <(PrefixTrieEdge const& o) const { + return o.p<p; + } +}; struct PrefixTrieNode { - prob_t backward; // (viterbi) backward prob (for cost pushing) -# + prob_t p; // viterbi (max prob) of rule this node leads to - when building. telescope later onto edges for best-first. #if TRIE_START_LHS + bool final; // may also have successors, of course + prob_t p_final; // additional prob beyond what we already paid. while building, this is the total prob + // instead of completed map, we have trie start w/ lhs. + NTHandle lhs; // instead of storing this in Item. +#else typedef FSA_MAP(LHS,RuleHandle) Completed; // can only have one rule w/ a given signature (duplicates should be collapsed when making CFG). but there may be multiple rules, with different LHS Completed completed; -#else - bool complete; // may also have successors, of course #endif - // instead of completed map, we have trie start w/ lhs.? + explicit PrefixTrieNode(prob_t p=1) : p(p),final(false) { } // outgoing edges will be ordered highest p to worst p - struct Edge { - DPFSA(prob_t p;) // we can probably just store deltas, but for debugging remember the full p - prob_t delta; // p[i]=delta*p[i-1], with p(-1)=1 - PrefixTrieNode *dest; - WordID w; - }; - typedef FSA_MAP(WordID,Edge) BuildAdj; - BuildAdj build_adj; //TODO: move builder elsewhere? - typedef ValueArray<Edge> Adj; -// typedef vector<Edge> Adj; + + typedef FSA_MAP(WordID,PrefixTrieEdge) PrefixTrieEdgeFor; +public: + PrefixTrieEdgeFor edge_for; //TODO: move builder elsewhere? then need 2nd hash or edge include pointer to builder. just clear this later + void index_adj() { + index_adj(edge_for); + } + + template <class M> + void index_adj(M &m) { + m.clear(); + for (int i=0;i<adj.size();++i) { + PrefixTrieEdge const& e=adj[i]; + m[e.w]=e; + } + } + + // call only once. + void done_building_r() { + done_building(); + for (int i=0;i<adj.size();++i) + adj[i].dest->done_building_r(); + } + + // for done_building; compute incremental (telescoped) edge p + PrefixTrieEdge const& operator()(PrefixTrieEdgeFor::value_type &pair) const { + PrefixTrieEdge &e=pair.second; + e.p=(e.dest->p)/p; + return e; + } + + // call only once. + void done_building() { + adj.reinit_map(edge_for.begin(),edge_for.end(),*this); + if (final) + p_final/=p; + std::sort(adj.begin(),adj.end()); + } + + typedef ValueArray<PrefixTrieEdge> Adj; +// typedef vector<PrefixTrieEdge> Adj; Adj adj; - //TODO: + + typedef WordID W; + typedef NTHandle N; // not negative + typedef W const* RI; + + // let's compute p_min so that every rule reachable from the created node has p at least this low. + PrefixTrieNode *improve_edge(PrefixTrieEdge const& e,prob_t rulep) { + PrefixTrieNode *d=e.dest; + maybe_increase_max(d->p,rulep); + return d; + } + + PrefixTrieNode *build(W w,prob_t rulep) { + PrefixTrieEdgeFor::iterator i=edge_for.find(w); + if (i!=edge_for.end()) + return improve_edge(i->second,rulep); + PrefixTrieEdge &e=edge_for[w]; + return e.dest=new PrefixTrieNode(rulep); + } + + void set_final(prob_t pf) { + final=true;p_final=pf; + } + +#ifdef HAVE_TAIL_RECURSE + // add string i...end + void build(RI i,RI end, prob_t rulep) { + if (i==end) { + set_final(rulep); + } else + // tail recursion: + build(*i)->build(i+1,end,rulep); + } +#endif }; #if TRIE_START_LHS -//Trie starts with lhs, then continues w/ rhs +//Trie starts with lhs (nonneg index), then continues w/ rhs (mixed >0 word, else NT) #else // just rhs. i think item names should exclude lhs if possible (most sharing). get prefix cost w/ forward = viterbi (global best-first admissable h only) and it should be ok? #endif // costs are pushed. struct PrefixTrie { - CFG const* cfgp; + CFG *cfgp; + Rules const* rulesp; + Rules const& rules() const { return *rulesp; } CFG const& cfg() const { return *cfgp; } - PrefixTrie(CFG const& cfg) : cfgp(&cfg) { + PrefixTrieNode root; + PrefixTrie(CFG &cfg) : cfgp(&cfg),rulesp(&cfg.rules) { +// cfg.SortLocalBestFirst(); // instead we'll sort in done_building_r + cfg.VisitRuleIds(*this); + root.done_building_r(); + root.index_adj(); // maybe the index we use for building trie should be replaced by a much larger/faster table since we look up by lhs many times in parsing? //TODO: } + void operator()(int ri) const { + Rule const& r=rules()[ri]; + prob_t p=r.p; + PrefixTrieNode *n=const_cast<PrefixTrieNode&>(root).build(r.lhs,p); + for (RHS::const_iterator i=r.rhs.begin(),e=r.rhs.end();;++i) { + if (i==e) { + n->set_final(p); + break; + } + n=n->build(*i,p); + } +#ifdef HAVE_TAIL_RECURSE + root.build(r.lhs,r.p)->build(r.rhs,r.p); +#endif + } }; // these should go in a global best-first queue struct Item { prob_t forward; + // NOTE: sum = viterbi (max) /* The forward probability alpha_i(X[k]->x.y) is the sum of the probabilities of all constrained paths of length that end in state X[k]->x.y*/ prob_t inner; /* The inner probability beta_i(X[k]->x.y) is the sum of the probabilities of all paths of length i-k that start in state X[k,k]->.xy and end in X[k,i]->x.y, and generate the input symbols x[k,...,i-1] */ - + PrefixTrieNode *dot; // dot is a function of the stuff already recognized, and gives a set of suffixes y to complete to finish a rhs for lhs() -> dot y + NTHandle lhs() const { return dot->lhs; } }; }//anon ns @@ -166,7 +279,7 @@ void ApplyFsa::ApplyBottomUp() void ApplyFsa::ApplyEarley() { hgcfg.GiveCFG(cfg); - cfg.SortLocalBestFirst(); + PrefixTrie rt(cfg); // don't need to uniq - option to do that already exists in cfg_options //TODO: } diff --git a/decoder/cdec.cc b/decoder/cdec.cc index a4c3613b..ca6284f6 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -881,5 +881,7 @@ int main(int argc, char** argv) { cout << "0\t**OBJ**=" << acc_obj << ';' << acc_vec << endl << flush; } } + exit(0); // maybe this will save some destruction overhead. or g++ without cxx_atexit needed? + return 0; } diff --git a/decoder/cfg.cc b/decoder/cfg.cc index c02f46ec..b2219193 100755 --- a/decoder/cfg.cc +++ b/decoder/cfg.cc @@ -308,7 +308,7 @@ struct add_virtual_rules { //TODO: don't actually build substrings of rhs; define key type that stores ref to rhs in new_nts by index (because it may grow), and also allows an int* [b,e) range to serve as key (i.e. don't insert the latter type of key). int n=rhs.size(); if (n<=2) return 0; - int longest1=1; + int longest1=1; // all this other stuff is not uninitialized when used, based on checking this and other things (it's complicated, learn to prove theorems, gcc) int mid=n/2; int best_k; enum {HAVE_L=-1,HAVE_NONE=0,HAVE_R=1}; @@ -445,7 +445,7 @@ void CFG::BinarizeSplit(CFGBinarize const& b) { Rule &r=newr[i];expr; } // NOTE: must use indices since we'll be adding rules as we iterate. int n_changed_total=0; - int n_changed; + int n_changed=0; // quiets a warning #define CFG_SPLIT_PASS(N,free,just1) \ for (int pass=0;pass<b.N;++pass) { \ n_changed=0; \ diff --git a/decoder/decode.sh b/decoder/decode.sh index 13cc6620..677e64ad 100755 --- a/decoder/decode.sh +++ b/decoder/decode.sh @@ -5,6 +5,6 @@ if [ "$lm" ] ; then lmargs1="LanguageModel lm.gz -n LM" fi set -x -$gdb $d/cdec -c $d/${cfg:=cdec-fsa}.ini -i $d/${in:=1dev.ur} $lmargs0 "$lmargs1" --show_features --show_config --show_weights "$@" +$gdb ${cdec:=$d/cdec} -c $d/${cfg:=cdec-fsa}.ini -i $d/${in:=1dev.ur} $lmargs0 "$lmargs1" --show_features --show_config --show_weights "$@" set +x } diff --git a/decoder/ff.h b/decoder/ff.h index 726845c4..80a880d8 100644 --- a/decoder/ff.h +++ b/decoder/ff.h @@ -243,7 +243,10 @@ void show_all_features(std::vector<FFp> const& models_,DenseWeightVector &weight return show_features(all_features(models_,weights_,&warn,warn_fid_0),weights_,out,warn,warn_zero_wt); } -typedef std::string FFState; //FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size) +typedef ValueArray<char> FFState; // this is about 10% faster than string. +//typedef std::string FFState; + +//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size) typedef std::vector<FFState> FFStates; // this class is a set of FeatureFunctions that can be used to score, rescore, diff --git a/utils/hash.h b/utils/hash.h index fbe10b4e..7e38bb2c 100755 --- a/utils/hash.h +++ b/utils/hash.h @@ -60,6 +60,34 @@ typename H::mapped_type & get_default(H &ht,K const& k,typename H::mapped_type c return const_cast<typename H::mapped_type &>(ht.insert(typename H::value_type(k,v)).first->second); } +// get_or_construct w/ no arg: just use ht[k] +template <class H,class K,class C0> +typename H::mapped_type & get_or_construct(H &ht,K const& k,C0 const& c0) { + typedef typename H::mapped_type V; + typedef typename H::value_type KV; + typename H::iterator_type i=ht.find(k); + if (i==ht.end()) { + return const_cast<V &>(ht.insert(KV(k,V(c0))).first->second); + } else { + return i->second; + } +} + + +// get_or_call (0 arg) +template <class H,class K,class F> +typename H::mapped_type & get_or_call(H &ht,K const& k,F const& f) { + typedef typename H::mapped_type V; + typedef typename H::value_type KV; + typename H::iterator_type i=ht.find(k); + if (i==ht.end()) { + return const_cast<V &>(ht.insert(KV(k,f())).first->second); + } else { + return i->second; + } +} + + // the below could also return a ref to the mapped max/min. they have the advantage of not falsely claiming an improvement when an equal value already existed. otherwise you could just modify the get_default and if equal assume new. template <class H,class K> bool improve_mapped_max(H &ht,K const& k,typename H::mapped_type const& v) { diff --git a/utils/maybe_update_bound.h b/utils/maybe_update_bound.h new file mode 100755 index 00000000..d57215d0 --- /dev/null +++ b/utils/maybe_update_bound.h @@ -0,0 +1,17 @@ +#ifndef MAYBE_UPDATE_BOUND_H +#define MAYBE_UPDATE_BOUND_H + +template <class To,class From> +inline void maybe_increase_max(To &to,const From &from) { + if (to<from) + to=from; +} + +template <class To,class From> +inline void maybe_decrease_min(To &to,const From &from) { + if (from<to) + to=from; +} + + +#endif |