diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-12 03:42:47 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-12 03:42:47 +0000 |
commit | 7b9c1f91e594c4b7783c72e4516d59d60a04dc91 (patch) | |
tree | 2f5ba51f612ff0c1e424e53d44a9c71474182a8f /decoder | |
parent | 11980f43455a85f31f2941f570f9a3a1ff925408 (diff) |
DEBUG_PROMISE - looks ok
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@220 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder')
-rw-r--r-- | decoder/apply_models.cc | 5 | ||||
-rw-r--r-- | decoder/ff_lm.cc | 3 | ||||
-rw-r--r-- | decoder/hg.cc | 15 | ||||
-rw-r--r-- | decoder/hg.h | 2 |
4 files changed, 18 insertions, 7 deletions
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc index 4093f667..240bd12b 100644 --- a/decoder/apply_models.cc +++ b/decoder/apply_models.cc @@ -414,8 +414,9 @@ void ApplyModelSet(const Hypergraph& in, ma.Apply(); } else if (config.algorithm == 1) { int pl = config.pop_limit; - if (pl > 100 && in.nodes_.size() > 80000) { - pl = 30; + const int max_pl_for_large=50; + if (pl > max_pl_for_large && in.nodes_.size() > 80000) { + pl = max_pl_for_large; cerr << " Note: reducing pop_limit to " << pl << " for very large forest\n"; } CubePruningRescorer ma(models, smeta, in, pl, out); diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index 0590fa7e..5de9c321 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -465,7 +465,8 @@ LanguageModelImpl *make_lm_impl(int order, string const& f, int load_order) return new ReuseLMI(order,ngs.get(f)); } else { LanguageModelImpl *r=new LanguageModelImpl(order,f,load_order); - ngs.add(f,r->get_lm()); + if (!load_order || !ngs.have(f)) + ngs.add(f,r->get_lm()); return r; } } diff --git a/decoder/hg.cc b/decoder/hg.cc index b017b183..0a257092 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -192,16 +192,23 @@ void Hypergraph::SetPromise(NodeProbs const& inside,NodeProbs const& outside,dou if (!nn) return; assert(inside.size()==nn); assert(outside.size()==nn); - double sum; //TODO: prevent underflow by using prob_t? + double sum=0; //TODO: prevent underflow by using prob_t? if (normalize) for (int i=0;i<nn;++i) { sum+=(nodes_[i].promise=pow(inside[i]*outside[i],power)); } + double by=nn/sum; // so avg promise is 1 if (normalize) { - double by=nn/sum; // so avg promise is 1 for (int i=0;i<nn;++i) nodes_[i].promise*=by; } +//#define DEBUG_PROMISE +#ifdef DEBUG_PROMISE + cerr << "\n\nPer-node promises:\n"; + cerr << "promise\tinside\toutside\t(power="<<power<<" normalize="<<normalize<<" sum="<<sum<<" by="<<by<<")"<<endl; + for (int i=0;i<nn;++i) + cerr <<nodes_[i].promise<<'\t'<<inside[i]<<'\t'<<outside[i]<<endl; +#endif } @@ -247,11 +254,11 @@ bool Hypergraph::PruneInsideOutside(double alpha,double density,const EdgeMask* assert(!use_beam||alpha>0); assert(!use_density||density>=1); assert(!use_sum_prod_semiring||scale>0); - int rnum; + int rnum=edges_.size(); if (use_density) { const int plen = ViterbiPathLength(*this); vector<WordID> bp; - rnum = min(static_cast<int>(edges_.size()), static_cast<int>(density * static_cast<double>(plen))); + rnum = min(rnum, static_cast<int>(density * static_cast<double>(plen))); cerr << "Density pruning: keep "<<rnum<<" of "<<edges_.size()<<" edges (viterbi = "<<plen<<" edges)"<<endl; if (rnum == edges_.size()) { cerr << "No pruning required: denisty already sufficient\n"; diff --git a/decoder/hg.h b/decoder/hg.h index 4b6a6357..c7fa0fc1 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -181,6 +181,8 @@ class Hypergraph { typedef EdgeProbs NodeProbs; void SetPromise(NodeProbs const& inside,NodeProbs const& outside, double power=1, bool normalize=true); + //TODO: in my opinion, looking at the ratio of logprobs (features \dot weights) rather than the absolute difference generalizes more nicely across sentence lengths and weight vectors that are constant multiples of one another. at least make that an option. i worked around this a little in cdec by making "beam alpha per source word" but that's not helping with different tuning runs. this would also make me more comfortable about allocating promise + // beam_alpha=0 means don't beam prune, otherwise drop things that are e^beam_alpha times worse than best - // prunes any edge whose prob_t on the best path taking that edge is more than e^alpha times //density=0 means don't density prune: // for density>=1.0, keep this many times the edges needed for the 1best derivation // worse than the score of the global best past (or the highest edge posterior) |