From 64f43ca5010758b58326d727e359b5908de4fcb0 Mon Sep 17 00:00:00 2001 From: graehl Date: Wed, 28 Jul 2010 07:10:09 +0000 Subject: debugging print - still no idea on .05% difference scoring 3gram using phrases git-svn-id: https://ws10smt.googlecode.com/svn/trunk@446 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/ff_from_fsa.h | 68 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 26 deletions(-) (limited to 'decoder/ff_from_fsa.h') diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h index c517ec64..10ccfe6d 100755 --- a/decoder/ff_from_fsa.h +++ b/decoder/ff_from_fsa.h @@ -3,7 +3,9 @@ #include "ff_fsa.h" -#define FSA_FF_DEBUG 0 +#ifndef FSA_FF_DEBUG +# define FSA_FF_DEBUG 0 +#endif #if FSA_FF_DEBUG # define FSAFFDBG(e,x) FSADBGif(debug(),e,x) # define FSAFFDBGnl(e) FSADBGif_nl(debug(),e) @@ -49,35 +51,36 @@ public: void* out_state) const { TRule const& rule=*edge.rule_; - Sentence const& e = rule.e(); + Sentence const& e = rule.e(); // items in target side of rule typename Impl::Accum accum,h_accum; if (!ssz) { // special case for no state - but still build up longer phrases to score in case FSA overrides ScanPhraseAccum if (Impl::simple_phrase_score) { - // save the effort of building up the contiguous rule phrases - for (int j=0,je=e.size();j=1) // token ff.ScanAccum(smeta,edge,(WordID)e[j],NULL,NULL,&accum); - FSAFFDBG(edge," "<=1) + for (int j=0,ee=e.size();;++j) { // items in target side of rule + for(;;++j) { + if (j>=ee) goto rhs_done; // j may go 1 past ee due to k possibly getting to end + if (RHS_WORD(j)) break; } + // word @j + int k=j; + while(k fsa(ff,smeta,edge); /* fsa holds our current state once we've seen our first M rule or child left-context words. that state scores up the rest of the words at the time, and is replaced by the right state of any full child. at the end, if we've got at least M left words in all, it becomes our right state (otherwise, we don't bother storing the partial state, which might seem useful any time we're built on by a rule that has our variable in the initial position - but without also storing the heuristic for that case, we just end up rescanning from scratch anyway to produce the heuristic. so we just store all 0 bytes if we have less than M left words at the end. */ - for (int j = 0; j < e.size(); ++j) { // items in target side of rule - if (e[j] < 1) { // variable + for (int j = 0,ee=e.size(); j < ee; ++j) { // items in target side of rule + s_rhs_next: + if (!RHS_WORD(j)) { // variable // variables a* are referring to this child derivation state. SP a = ant_contexts[-e[j]]; WP al=(WP)a,ale=left_end(a); // the child left words @@ -121,7 +125,6 @@ public: assert(anw<=M); // of course, we never store more than M left words in an item. } else { // single word WordID ew=e[j]; - FSAFFDBG(edge,' '<