summaryrefslogtreecommitdiff
path: root/decoder/ff_from_fsa.h
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-26 17:25:57 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-26 17:25:57 +0000
commitfaea049a3c2adc9cb373414e084976e032886dfb (patch)
treed3b64224f0a17acf00349dab7befb12eda9dd0a2 /decoder/ff_from_fsa.h
parent5f7390dfff25569611485c668ee00c216f02143b (diff)
comment cleanup
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@421 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/ff_from_fsa.h')
-rwxr-xr-xdecoder/ff_from_fsa.h40
1 files changed, 17 insertions, 23 deletions
diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h
index 237e5d0d..f50e0fdc 100755
--- a/decoder/ff_from_fsa.h
+++ b/decoder/ff_from_fsa.h
@@ -40,8 +40,7 @@ public:
Features features() const { return ff.features(); }
- //TODO: add source span to Fsa FF interface, pass along
- //TODO: read/debug VERY CAREFULLY
+ // Log because it
void TraversalFeaturesLog(const SentenceMetadata& smeta,
Hypergraph::Edge& edge,
const std::vector<const void*>& ant_contexts,
@@ -64,25 +63,27 @@ public:
FSAFFDBGnl(edge);
return;
}
-
+//why do we compute heuristic in so many places? well, because that's how we know what state we should score words in once we're full on our left context (because of markov order bound, we know the score will be the same no matter what came before that left context)
SP h_start=ff.heuristic_start_state();
+ // these left_* refer to our output (out_state):
W left_begin=(W)out_state;
W left_out=left_begin; // [left,fsa_state) = left ctx words. if left words aren't full, then null wordid
WP left_full=left_end_full(out_state);
- FsaScanner<Impl> fsa(ff,smeta,edge);
+ FsaScanner<Impl> fsa(ff,smeta,edge); // this holds our current state and eventuallybecomes our right state if we saw enough words
TRule const& rule=*edge.rule_;
Sentence const& e = rule.e();
for (int j = 0; j < e.size(); ++j) { // items in target side of rule
if (e[j] < 1) { // variable
- SP a = ant_contexts[-e[j]];
+ SP a = ant_contexts[-e[j]]; // variables a* are referring to this child derivation state.
FSAFFDBG(edge,' '<<describe_state(a));
WP al=(WP)a;
WP ale=left_end(a);
// scan(al,le) these - the same as below else. macro for now; pull into closure object later?
- int nw=ale-al; // this many new words
- if (left_out+nw<left_full) { // nothing to score after adding
- wordcpy(left_out,al,nw);
- left_out+=nw;
+ int anw=ale-al;
+// anw left words in child. full if == M. we will use them to fill our left words, and then score the rest fully, knowing what state we're in based on h_state -> our left words -> any number of interior words which are scored then hidden
+ if (left_out+anw<left_full) { // nothing to score after adding
+ wordcpy(left_out,al,anw);
+ left_out+=anw;
} else if (left_out<left_full) { // something to score AND newly full left context to fill
int ntofill=left_full-left_out;
assert(ntofill==M-(left_out-left_begin));
@@ -91,14 +92,14 @@ public:
// heuristic known now
fsa.reset(h_start);
fsa.scan(left_begin,left_full,estimated_features); // save heuristic (happens once only)
- fsa.scan(al+ntofill,ale,features);
+ fsa.scan(al+ntofill,ale,features); // because of markov order, fully filled left words scored starting at h_start put us in the right state to score the extra words (which are forgotten)
al+=ntofill; // we used up the first ntofill words of al to end up in some known state via exactly M words total (M-ntofill were there beforehand). now we can scan the remaining al words of this child
} else { // more to score / state to update (left already full)
fsa.scan(al,ale,features);
}
- if (nw==M) // child had full state already
+ if (anw==M) // child had full state already
fsa.reset(fsa_state(a));
- assert(nw<=M);
+ assert(anw<=M);
} else { // single word
WordID ew=e[j];
FSAFFDBG(edge,' '<<TD::Convert(ew));
@@ -114,13 +115,14 @@ public:
}
}
- if (left_out<left_full) { // finally: partial heuristic foru nfilled items
+ void *out_fsa_state=fsa_state(out_state);
+ if (left_out<left_full) { // finally: partial heuristic for unfilled items
fsa.reset(h_start);
fsa.scan(left_begin,left_out,estimated_features);
- clear_fsa_state(out_state); // 0 bytes so we compare / hash correctly. don't know state yet
do { *left_out++=TD::none; } while(left_out<left_full); // none-terminate so left_end(out_state) will know how many words
+ ff.state_zero(out_fsa_state); // so we compare / hash correctly. don't know state yet because left context isn't full
} else // or else store final right-state. heuristic was already assigned
- fstatecpy(out_state,fsa.cs);
+ ff.state_copy(out_fsa_state,fsa.cs);
FSAFFDBG(edge," = " << describe_state(out_state)<<" "<<name<<"="<<ff.describe_features(*features)<<" h="<<ff.describe_features(*estimated_features)<<")");
FSAFFDBGnl(edge);
}
@@ -229,14 +231,6 @@ private:
inline void *fsa_state(void * ant) const {
return ((char *)ant+state_offset);
}
-
- void clear_fsa_state(void *ant) const { // when state is unknown
- std::memset(fsa_state(ant),0,ssz);
- }
-
- inline void fstatecpy(void *ant,void const* src) const {
- std::memcpy(fsa_state(ant),src,ssz);
- }
};