summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-19 04:46:18 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-19 04:46:18 +0000
commit1b57d4a77c5d32068d29bf3772d756c2c844e361 (patch)
tree2e671f8141ef345022b7196ec81b6abbacce66f3 /decoder
parent6ba469b231c727fbfea5c831852d231247e24526 (diff)
ValueArray instead of string for state is 10% faster
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@599 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder')
-rwxr-xr-xdecoder/apply_fsa_models.cc157
-rw-r--r--decoder/cdec.cc2
-rwxr-xr-xdecoder/cfg.cc4
-rwxr-xr-xdecoder/decode.sh2
-rw-r--r--decoder/ff.h5
5 files changed, 144 insertions, 26 deletions
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc
index b43c02a4..7cd5fc6d 100755
--- a/decoder/apply_fsa_models.cc
+++ b/decoder/apply_fsa_models.cc
@@ -1,3 +1,4 @@
+#include "maybe_update_bound.h"
#include "apply_fsa_models.h"
#include "hg.h"
#include "ff_fsa_dynamic.h"
@@ -20,6 +21,7 @@ using namespace std;
//impl details (not exported). flat namespace for my ease.
+typedef CFG::RHS RHS;
typedef CFG::BinRhs BinRhs;
typedef CFG::NTs NTs;
typedef CFG::NT NT;
@@ -50,7 +52,7 @@ with 3, we predict all sorts of useless items - that won't give us our goal A an
*/
#define TRIE_START_LHS 1
-// option 1) above. 0 would be option 3), which is dumb
+// 1 is option 1) above. 0 would be option 3), which is dumb
// if we don't greedy-binarize, we want to encode recognized prefixes p (X -> p . rest) efficiently. if we're doing this, we may as well also push costs so we can best-first select rules in a lazy fashion. this is effectively left-branching binarization, of course.
template <class K,class V,class Hash>
@@ -61,58 +63,169 @@ struct fsa_map_type {
#define FSA_MAP(k,v) fsa_map_type<k,v,boost::hash<k> >::type
typedef WordID LHS; // so this will be -NTHandle.
+struct get_second {
+ template <class P>
+ typename P::second_type const& operator()(P const& p) const {
+ return p.second;
+ }
+};
+
+struct PrefixTrieNode;
+struct PrefixTrieEdge {
+ prob_t p;// viterbi additional prob, i.e. product over path incl. p_final = total rule prob
+ //DPFSA()
+ // we can probably just store deltas, but for debugging remember the full p
+ // prob_t delta; //
+ PrefixTrieNode *dest;
+ WordID w; // for lhs, this will be positive NTHandle instead
+
+ // for sorting most probable first in adj; actually >(p)
+ inline bool operator <(PrefixTrieEdge const& o) const {
+ return o.p<p;
+ }
+};
struct PrefixTrieNode {
- prob_t backward; // (viterbi) backward prob (for cost pushing)
-#
+ prob_t p; // viterbi (max prob) of rule this node leads to - when building. telescope later onto edges for best-first.
#if TRIE_START_LHS
+ bool final; // may also have successors, of course
+ prob_t p_final; // additional prob beyond what we already paid. while building, this is the total prob
+ // instead of completed map, we have trie start w/ lhs.
+ NTHandle lhs; // instead of storing this in Item.
+#else
typedef FSA_MAP(LHS,RuleHandle) Completed; // can only have one rule w/ a given signature (duplicates should be collapsed when making CFG). but there may be multiple rules, with different LHS
Completed completed;
-#else
- bool complete; // may also have successors, of course
#endif
- // instead of completed map, we have trie start w/ lhs.?
+ explicit PrefixTrieNode(prob_t p=1) : p(p),final(false) { }
// outgoing edges will be ordered highest p to worst p
- struct Edge {
- DPFSA(prob_t p;) // we can probably just store deltas, but for debugging remember the full p
- prob_t delta; // p[i]=delta*p[i-1], with p(-1)=1
- PrefixTrieNode *dest;
- WordID w;
- };
- typedef FSA_MAP(WordID,Edge) BuildAdj;
- BuildAdj build_adj; //TODO: move builder elsewhere?
- typedef ValueArray<Edge> Adj;
-// typedef vector<Edge> Adj;
+
+ typedef FSA_MAP(WordID,PrefixTrieEdge) PrefixTrieEdgeFor;
+public:
+ PrefixTrieEdgeFor edge_for; //TODO: move builder elsewhere? then need 2nd hash or edge include pointer to builder. just clear this later
+ void index_adj() {
+ index_adj(edge_for);
+ }
+
+ template <class M>
+ void index_adj(M &m) {
+ m.clear();
+ for (int i=0;i<adj.size();++i) {
+ PrefixTrieEdge const& e=adj[i];
+ m[e.w]=e;
+ }
+ }
+
+ // call only once.
+ void done_building_r() {
+ done_building();
+ for (int i=0;i<adj.size();++i)
+ adj[i].dest->done_building_r();
+ }
+
+ // for done_building; compute incremental (telescoped) edge p
+ PrefixTrieEdge const& operator()(PrefixTrieEdgeFor::value_type &pair) const {
+ PrefixTrieEdge &e=pair.second;
+ e.p=(e.dest->p)/p;
+ return e;
+ }
+
+ // call only once.
+ void done_building() {
+ adj.reinit_map(edge_for.begin(),edge_for.end(),*this);
+ if (final)
+ p_final/=p;
+ std::sort(adj.begin(),adj.end());
+ }
+
+ typedef ValueArray<PrefixTrieEdge> Adj;
+// typedef vector<PrefixTrieEdge> Adj;
Adj adj;
- //TODO:
+
+ typedef WordID W;
+ typedef NTHandle N; // not negative
+ typedef W const* RI;
+
+ // let's compute p_min so that every rule reachable from the created node has p at least this low.
+ PrefixTrieNode *improve_edge(PrefixTrieEdge const& e,prob_t rulep) {
+ PrefixTrieNode *d=e.dest;
+ maybe_increase_max(d->p,rulep);
+ return d;
+ }
+
+ PrefixTrieNode *build(W w,prob_t rulep) {
+ PrefixTrieEdgeFor::iterator i=edge_for.find(w);
+ if (i!=edge_for.end())
+ return improve_edge(i->second,rulep);
+ PrefixTrieEdge &e=edge_for[w];
+ return e.dest=new PrefixTrieNode(rulep);
+ }
+
+ void set_final(prob_t pf) {
+ final=true;p_final=pf;
+ }
+
+#ifdef HAVE_TAIL_RECURSE
+ // add string i...end
+ void build(RI i,RI end, prob_t rulep) {
+ if (i==end) {
+ set_final(rulep);
+ } else
+ // tail recursion:
+ build(*i)->build(i+1,end,rulep);
+ }
+#endif
};
#if TRIE_START_LHS
-//Trie starts with lhs, then continues w/ rhs
+//Trie starts with lhs (nonneg index), then continues w/ rhs (mixed >0 word, else NT)
#else
// just rhs. i think item names should exclude lhs if possible (most sharing). get prefix cost w/ forward = viterbi (global best-first admissable h only) and it should be ok?
#endif
// costs are pushed.
struct PrefixTrie {
- CFG const* cfgp;
+ CFG *cfgp;
+ Rules const* rulesp;
+ Rules const& rules() const { return *rulesp; }
CFG const& cfg() const { return *cfgp; }
- PrefixTrie(CFG const& cfg) : cfgp(&cfg) {
+ PrefixTrieNode root;
+ PrefixTrie(CFG &cfg) : cfgp(&cfg),rulesp(&cfg.rules) {
+// cfg.SortLocalBestFirst(); // instead we'll sort in done_building_r
+ cfg.VisitRuleIds(*this);
+ root.done_building_r();
+ root.index_adj(); // maybe the index we use for building trie should be replaced by a much larger/faster table since we look up by lhs many times in parsing?
//TODO:
}
+ void operator()(int ri) const {
+ Rule const& r=rules()[ri];
+ prob_t p=r.p;
+ PrefixTrieNode *n=const_cast<PrefixTrieNode&>(root).build(r.lhs,p);
+ for (RHS::const_iterator i=r.rhs.begin(),e=r.rhs.end();;++i) {
+ if (i==e) {
+ n->set_final(p);
+ break;
+ }
+ n=n->build(*i,p);
+ }
+#ifdef HAVE_TAIL_RECURSE
+ root.build(r.lhs,r.p)->build(r.rhs,r.p);
+#endif
+ }
};
// these should go in a global best-first queue
struct Item {
prob_t forward;
+ // NOTE: sum = viterbi (max)
/* The forward probability alpha_i(X[k]->x.y) is the sum of the probabilities of all
constrained paths of length that end in state X[k]->x.y*/
prob_t inner;
/* The inner probability beta_i(X[k]->x.y) is the sum of the probabilities of all
paths of length i-k that start in state X[k,k]->.xy and end in X[k,i]->x.y, and generate the input symbols x[k,...,i-1] */
-
+ PrefixTrieNode *dot; // dot is a function of the stuff already recognized, and gives a set of suffixes y to complete to finish a rhs for lhs() -> dot y
+ NTHandle lhs() const { return dot->lhs; }
};
}//anon ns
@@ -166,7 +279,7 @@ void ApplyFsa::ApplyBottomUp()
void ApplyFsa::ApplyEarley()
{
hgcfg.GiveCFG(cfg);
- cfg.SortLocalBestFirst();
+ PrefixTrie rt(cfg);
// don't need to uniq - option to do that already exists in cfg_options
//TODO:
}
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index a4c3613b..ca6284f6 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -881,5 +881,7 @@ int main(int argc, char** argv) {
cout << "0\t**OBJ**=" << acc_obj << ';' << acc_vec << endl << flush;
}
}
+ exit(0); // maybe this will save some destruction overhead. or g++ without cxx_atexit needed?
+ return 0;
}
diff --git a/decoder/cfg.cc b/decoder/cfg.cc
index c02f46ec..b2219193 100755
--- a/decoder/cfg.cc
+++ b/decoder/cfg.cc
@@ -308,7 +308,7 @@ struct add_virtual_rules {
//TODO: don't actually build substrings of rhs; define key type that stores ref to rhs in new_nts by index (because it may grow), and also allows an int* [b,e) range to serve as key (i.e. don't insert the latter type of key).
int n=rhs.size();
if (n<=2) return 0;
- int longest1=1;
+ int longest1=1; // all this other stuff is not uninitialized when used, based on checking this and other things (it's complicated, learn to prove theorems, gcc)
int mid=n/2;
int best_k;
enum {HAVE_L=-1,HAVE_NONE=0,HAVE_R=1};
@@ -445,7 +445,7 @@ void CFG::BinarizeSplit(CFGBinarize const& b) {
Rule &r=newr[i];expr; } // NOTE: must use indices since we'll be adding rules as we iterate.
int n_changed_total=0;
- int n_changed;
+ int n_changed=0; // quiets a warning
#define CFG_SPLIT_PASS(N,free,just1) \
for (int pass=0;pass<b.N;++pass) { \
n_changed=0; \
diff --git a/decoder/decode.sh b/decoder/decode.sh
index 13cc6620..677e64ad 100755
--- a/decoder/decode.sh
+++ b/decoder/decode.sh
@@ -5,6 +5,6 @@ if [ "$lm" ] ; then
lmargs1="LanguageModel lm.gz -n LM"
fi
set -x
-$gdb $d/cdec -c $d/${cfg:=cdec-fsa}.ini -i $d/${in:=1dev.ur} $lmargs0 "$lmargs1" --show_features --show_config --show_weights "$@"
+$gdb ${cdec:=$d/cdec} -c $d/${cfg:=cdec-fsa}.ini -i $d/${in:=1dev.ur} $lmargs0 "$lmargs1" --show_features --show_config --show_weights "$@"
set +x
}
diff --git a/decoder/ff.h b/decoder/ff.h
index 726845c4..80a880d8 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -243,7 +243,10 @@ void show_all_features(std::vector<FFp> const& models_,DenseWeightVector &weight
return show_features(all_features(models_,weights_,&warn,warn_fid_0),weights_,out,warn,warn_zero_wt);
}
-typedef std::string FFState; //FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size)
+typedef ValueArray<char> FFState; // this is about 10% faster than string.
+//typedef std::string FFState;
+
+//FIXME: only context.data() is required to be contiguous, and it becomes invalid after next string operation. use ValueArray instead? (higher performance perhaps, save a word due to fixed size)
typedef std::vector<FFState> FFStates;
// this class is a set of FeatureFunctions that can be used to score, rescore,