summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-09-23 00:10:52 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-09-23 00:10:52 +0000
commit3a802d691a34fe4a75af110fc8c4a771f18b7641 (patch)
tree1d1494a1a1c1a1def5b05893f0d4dac9ae904c13
parent6409c2c91b7fd3d3b42c6bb0d3376b83eb4d5611 (diff)
pausing earley best-first fsa work
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@656 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-xdecoder/apply_fsa_models.cc141
-rwxr-xr-xdecoder/oracle_bleu.h1
-rwxr-xr-xgraehl/NOTES.earley4
-rwxr-xr-xutils/agenda.h17
-rw-r--r--utils/d_ary_heap.h16
5 files changed, 137 insertions, 42 deletions
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc
index 6031dbab..3e93cadd 100755
--- a/decoder/apply_fsa_models.cc
+++ b/decoder/apply_fsa_models.cc
@@ -165,6 +165,16 @@ struct PrefixTrieEdge {
struct PrefixTrieNode {
best_t p; // viterbi (max prob) of rule this node leads to - when building. telescope later onto edges for best-first.
// bool final; // may also have successors, of course. we don't really need to track this; a null dest edge in the adj list lets us encounter the fact in best first order.
+ void p_delta(int next,best_t &p) const {
+ p*=adj[next].p;
+ }
+ void inc_adj(int &next,best_t &p) const {
+ p/=adj[next].p; //TODO: cache deltas
+ ++next;
+ p*=adj[next].p;
+ }
+
+
typedef TrieBackP BP;
typedef std::vector<BP> BPs;
void back_vec(BPs &ns) const {
@@ -249,7 +259,7 @@ public:
// assert(edge.p.is_1()); // actually, after done_building, e will have telescoped dest->p/p.
NTHandle n=-edge.w;
assert(n>=0);
- SHOWM3(DPFSA,"index_lhs",i,edge,n);
+// SHOWM3(DPFSA,"index_lhs",i,edge,n);
v[n]=edge.dest;
}
}
@@ -428,24 +438,6 @@ private:
};
-// these should go in a global best-first queue
-struct ItemPrio {
- // NOTE: sum = viterbi (max)
- ItemPrio() : priority(init_0()),inner(init_0()) { }
- best_t priority; // includes inner prob. (forward)
- /* The forward probability alpha_i(X[k]->x.y) is the sum of the probabilities of all
- constrained paths of length i that end in state X[k]->x.y*/
- best_t inner;
- /* The inner probability beta_i(X[k]->x.y) is the sum of the probabilities of all
- paths of length i-k that start in state X[k,k]->.xy and end in X[k,i]->x.y, and generate the input symbols x[k,...,i-1] */
- template<class O>
- void print(O &o) const {
- o<<priority; // TODO: show/use inner?
- }
- typedef ItemPrio self_type;
- SELF_TYPE_PRINT
-
-};
typedef std::size_t ItemHash;
@@ -499,15 +491,52 @@ typedef Item *ItemP;
# define INIT_LOCATION
#endif
+// these should go in a global best-first queue
+struct ItemPrio {
+ // NOTE: sum = viterbi (max)
+ ItemPrio() : priority(init_0()),inner(init_0()) { }
+ explicit ItemPrio(best_t priority) : priority(priority),inner(init_0()) { }
+ best_t priority; // includes inner prob. (forward)
+ /* The forward probability alpha_i(X[k]->x.y) is the sum of the probabilities of all
+ constrained paths of length i that end in state X[k]->x.y*/
+ best_t inner;
+ /* The inner probability beta_i(X[k]->x.y) is the sum of the probabilities of all
+ paths of length i-k that start in state X[k,k]->.xy and end in X[k,i]->x.y, and generate the input symbols x[k,...,i-1] */
+ template<class O>
+ void print(O &o) const {
+ o<<priority; // TODO: show/use inner?
+ }
+ typedef ItemPrio self_type;
+ SELF_TYPE_PRINT
+};
+
+#define ITEM_TYPE(X,t) \
+ X(t,ADJ,=-1) \
+
+#define ITEM_TYPE_TYPE ItemType
+
+DECLARE_NAMED_ENUM(ITEM_TYPE)
+DEFINE_NAMED_ENUM(ITEM_TYPE)
+
struct Item : ItemPrio,ItemKey {
- explicit Item(NodeP dot,int next=0) : ItemKey(dot),trienext(next),from(0)
+/* explicit Item(NodeP dot,best_t prio,int next) : ItemPrio(prio),ItemKey(dot),trienext(next),from(0)
INIT_LOCATION
- { }
- explicit Item(NodeP dot,FFState const& state,int next=0) : ItemKey(dot,state),trienext(next),from(0)
+ { }*/
+// ItemType t;
+ // lazy queueing of succesors item:
+ bool is_trie_adj() const {
+ return trienext>=0;
+ }
+ explicit Item(FFState const& state,NodeP dot,best_t prio,int next=0) : ItemPrio(prio),ItemKey(dot,state),trienext(next),from(0)
INIT_LOCATION
- { }
+ {
+// t=ADJ;
+// if (dot->adj.size())
+ dot->p_delta(next,priority);
+// SHOWM1(DFSA,"Item(state,dot,prio)",prio);
+ }
typedef std::queue<ItemP> Predicted;
- Predicted predicted; // this is empty, unless this is a predicted L -> .asdf item, or a to-complete L -> asdf .
+// Predicted predicted; // this is empty, unless this is a predicted L -> .asdf item, or a to-complete L -> asdf .
int trienext; // index of dot->adj to complete (if dest==0), or predict (if NT), or scan (if word). note: we could store pointer inside adj since it and trie are @ fixed addrs. less pointer arith, more space.
ItemP from; //backpointer - 0 for L -> . asdf for the rest; L -> a .sdf, it's the L -> .asdf item.
ItemP predicted_from() const {
@@ -518,6 +547,7 @@ struct Item : ItemPrio,ItemKey {
template<class O>
void print(O &o) const {
o<< '[';
+ o<<this<<": ";
ItemKey::print(o);
o<<' ';
ItemPrio::print(o);
@@ -561,25 +591,53 @@ struct Chart {
PrefixTrie trie;
typedef Agenda<Item,BetterP,GetItemKey> A;
A a;
- void best_first(unsigned kbest=1) {
+
+ /* had to stop working on this for now - it's garbage/useless in this form - see NOTES.earley */
+
+ // p_partial is priority*p(rule) - excluding the FSA model score, and predicted
+ void succ(Item const& from,int adji,best_t p_partial) {
+ PrefixTrieEdge const& te=from.dot->adj[adji];
+ NodeP dest=te.dest;
+ if (te.is_final()) {
+ // complete
+ return;
+ }
+ WordID w=te.w;
+ if (w<0) {
+ NTHandle lhs=-w;
+ } else {
+
+ }
+ }
+
+ void extend1() {
BetterP better;
- assert(kbest==1); //TODO: k-best via best-first requires revisiting best things again and adjusting desc. tricky.
- while(!a.empty()) {
- ItemP top=a.pop();
- best_t b=a.best(); // remember; best_t a<b means a better than (higher prob) than b
- best_t topb=top->priority;
- best_t trie_stop_p=topb/b;
- NodeP d=top->dot;
- PrefixTrieNode::Adj const& adj=d->adj;
- int n=top->trienext;
- for (int m=adj.size();n<m;++n) { // cube corner
- PrefixTrieEdge const& te=adj[m];
- if (better(te.p,trie_stop_p)) {
- SHOWM2(DFSA,"trying adj ",m,te)
+ Item &t=*a.top();
+ best_t tp=t.priority;
+ if (t.is_trie_adj()) {
+ best_t pstop=a.second_best(); // remember; best_t a<b means a better than (higher prob) than b
+// NodeP d=t.dot;
+ PrefixTrieNode::Adj const& adj=t.dot->adj;
+ int n=t.trienext,m=adj.size();
+ SHOWM3(DFSA,"popped",t,tp,pstop);
+ for (;n<m;++n) { // cube corner
+ PrefixTrieEdge const& te=adj[n];
+ SHOWM3(DFSA,"maybe try trie next",n,te.p,pstop);
+ if (better(te.p,pstop)) { // can get some improvement
+ SHOWM2(DFSA,"trying adj ",m,te);
} else {
- break;
+ goto done;
}
}
+ a.pop();
+ done:;
+ }
+ }
+
+ void best_first(unsigned kbest=1) {
+ assert(kbest==1); //TODO: k-best via best-first requires revisiting best things again and adjusting desc. tricky.
+ while(!a.empty()) {
+ extend1();
}
}
@@ -588,7 +646,9 @@ struct Chart {
assert(fsa.state_bytes());
print_fsa=&fsa;
goal_nt=cfg.goal_nt;
- a.add(a.construct(trie.lhs2_ex(goal_nt),fsa.start));
+ best_t prio=init_1();
+ SHOW1(DFSA,prio);
+ a.add(a.construct(fsa.start,trie.lhs2_ex(goal_nt),prio));
}
};
@@ -654,6 +714,7 @@ void ApplyFsa<F>::ApplyEarley()
Chart<F> chart(cfg,smeta,fsa);
// don't need to uniq - option to do that already exists in cfg_options
//TODO:
+ chart.best_first();
*oh=hgcfg.ih;
}
diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h
index 145c84d1..b3b1fd17 100755
--- a/decoder/oracle_bleu.h
+++ b/decoder/oracle_bleu.h
@@ -114,6 +114,7 @@ struct OracleBleu {
}
OracleBleu(int doc_size=10) {
set_oracle_doc_size(doc_size);
+ show_derivation=false;
}
ScoreP doc_score,sentscore; // made from factory, so we delete them
diff --git a/graehl/NOTES.earley b/graehl/NOTES.earley
index 4156063a..0953708c 100755
--- a/graehl/NOTES.earley
+++ b/graehl/NOTES.earley
@@ -105,3 +105,7 @@ vs.
3) * -> x . *
with 3, we predict all sorts of useless items - that won't give us our goal A and may not partcipate in any parse. this is not a good option at all.
+
+======
+
+-LM forest may have many in-edges per V. (many rules per NT lhs). so instead of generating all successors for scan/predict, i wanted to have them in sorted (admissable) -LM cost order and postpone once the prefix+rule part is more expensive than something else in the agenda. question: how many such postponed successor things
diff --git a/utils/agenda.h b/utils/agenda.h
index 1dbcd7bb..d4f13696 100755
--- a/utils/agenda.h
+++ b/utils/agenda.h
@@ -107,14 +107,27 @@ struct Agenda : intern_pool<Item,KeyF,HashKey,EqKey,Pool> {
}
// no need to destroy the canon. item because we want to remember the best cost and reject more expensive ways of using it).
ItemC pop() {
- DBG_AGENDA(assert(!empty()));
ItemC r=q.top();
q.pop();
return r;
}
+ void pop_discard() {
+ q.pop();
+ }
+
+ ItemC top() {
+ DBG_AGENDA(assert(!empty()));
+ return q.top();
+ }
+
agenda_best_t best() const {
- return priomap[q.top()]; //TODO: cache/track the global best?
+ return q.best(); //TODO: cache/track the global best?
}
+
+ agenda_best_t second_best() const {
+ return q.second_best();
+ }
+
// add only if worse than queue current best, otherwise evaluate immediately (e.g. for early stopping w/ expensive to compute additional cost). return true if postponed (added)
bool postpone(ItemP i) {
if (better(priomap[i],best())) return false;
diff --git a/utils/d_ary_heap.h b/utils/d_ary_heap.h
index 3a071772..1270638a 100644
--- a/utils/d_ary_heap.h
+++ b/utils/d_ary_heap.h
@@ -283,6 +283,22 @@
return false;
}
+ distance_type best(distance_type null=0) const {
+ return empty() ? null : get(distance,data[0]);
+ }
+ distance_type second_best(distance_type null=0) const {
+ if (data.size()<2) return null;
+ int m=std::min(data.size(),Arity+1);
+// if (m>=Arity) m=Arity+1;
+ distance_type b=get(distance,data[1]);
+ for (int i=2;i<m;++i) {
+ distance_type d=get(distance,data[i]);
+ if (better(d,b))
+ b=d;
+ }
+ return b;
+ }
+
#include "warning_push.h"
#pragma GCC diagnostic ignored "-Wtype-limits"