summaryrefslogtreecommitdiff
path: root/decoder/ff_fsa.h
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-23 19:17:22 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-23 19:17:22 +0000
commita2d9d0f96502c7d3c04303f3db36a8602d992287 (patch)
treef627e30f0005f1cec4d234d814cd7c6a3f060acf /decoder/ff_fsa.h
parentc57c05d19fb306f7f50cc02516a8a2901c920cca (diff)
sparse_vector use google::dense_hash_map, fsa scan logging
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@383 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/ff_fsa.h')
-rwxr-xr-xdecoder/ff_fsa.h122
1 files changed, 80 insertions, 42 deletions
diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h
index 3a9478e2..e359cfd9 100755
--- a/decoder/ff_fsa.h
+++ b/decoder/ff_fsa.h
@@ -3,7 +3,13 @@
//SEE ALSO: ff_fsa_dynamic.h, ff_from_fsa.h
-//TODO: actually compile this; probably full of syntax errors.
+#define FSA_DEBUG
+#ifdef FSA_DEBUG
+# include <iostream>
+# define FSADBG(x) do { std::cerr << x; } while(0)
+#else
+# define FSADBG(x)
+#endif
#include <stdint.h> //C99
#include <string>
@@ -34,7 +40,6 @@ protected:
if (h_start.size()!=sb) h_start.resize(sb);
state_bytes_=sb;
}
-
int fid_; // you can have more than 1 feature of course.
void init_fid(std::string const& name) { // call this, though, if you have a single feature
fid_=FD::Convert(name);
@@ -54,6 +59,11 @@ protected:
to_state(state,(char const*)begin,(char const*)end);
}
public:
+ //edges may have old features on them. override if you have more than 1 fid. we need to call this explicitly because edges may have old feature values already, and I chose to use add_value (+=) to simplify scanning a phrase, rather than set_value (=) for fsa ffs. could revisit this and use set_value and therefore sum
+ void init_features(FeatureVector *fv) const {
+ fv->set_value(fid_,0);
+ //erase(fid_)
+ }
// return m: all strings x with the same final m+1 letters must end in this state
/* markov chain of order m: P(xn|xn-1...x1)=P(xn|xn-1...xn-m) */
int markov_order() const { return 0; } // override if you use state. order 0 implies state_bytes()==0 as well, as far as scoring/splitting is concerned (you can still track state, though)
@@ -87,6 +97,69 @@ public:
};
+// init state is in cs; overwrite cs, ns repeatedly (alternatively). return resulting state
+template <class FsaFF>
+void *FsaScan(FsaFF const& ff,SentenceMetadata const& smeta,WordID const* i, WordID const* end,FeatureVector *features, void *cs,void *ns) {
+ // extra code - IT'S FOR EFFICIENCY, MAN! IT'S OK! definitely no bugs here.
+ void *os,*es;
+ WordID const* e=end-1; // boundcheck 1 earlier because in loop below we use i+1 before rechecking
+ if ((end-i)&1) { // odd # of words
+ os=cs;
+ es=ns;
+ i-=1;
+ goto odd;
+ } else {
+ es=cs;
+ os=ns;
+ }
+ for (;i<e;i+=2) {
+ ff.Scan(smeta,*i,es,os,features); // e->o
+ odd:
+ ff.Scan(smeta,*(i+1),os,es,features); // o->e
+ }
+ return es;
+}
+
+// if you have a more efficient implementation for scanning a phrase than one word at a time (e.g. LM context using sliding window buffer rather than rotating through a fixed state size), you can override this
+template <class FsaFF>
+void Scan(FsaFF const& ff,SentenceMetadata const& smeta,WordID const* i,WordID const* e,void const* state,void *next_state,FeatureVector *features) {
+ int ssz=ff.state_bytes();
+ if (!ssz) {
+ for (;i<e;++i)
+ ff.Scan(smeta,*i,0,0,features);
+ return;
+ }
+ Bytes tstate(ssz);
+ void *tst=tstate.begin();
+ bool odd=(e-i)&1;
+ void *cs,*ns;
+ if (odd) {
+ cs=tst;
+ ns=next_state;
+ } else {
+ cs=next_state;
+ ns=tst;
+ }
+ std::memcpy(cs,state,ssz);
+ void *est=FsaScan(ff,smeta,i,end,features,cs,ns);
+ assert(est==next_state);
+}
+
+
+// like above Scan, but don't bother storing final state (for FinalTraversalFeatures)
+template <class FF>
+void AccumFeatures(FF const& ff,SentenceMetadata const& smeta,WordID const* i, WordID const* end,FeatureVector *h_features,void const* start_state) {
+ int ssz=ff.state_bytes();
+ if (ssz) {
+ Bytes state(ssz),state2(ssz);
+ void *cs=state.begin(),*ns=state2.begin();
+ memcpy(cs,start_state,ff.state_bytes());
+ FsaScan(ff,smeta,i,end,h_features,cs,ns);
+ } else
+ for (;i<end;++i)
+ ff.Scan(smeta,*i,0,0,h_features);
+}
+
// if State is pod. sets state size and allocs start, h_start
template <class St>
struct FsaTypedBase : public FsaFeatureFunctionBase {
@@ -114,41 +187,20 @@ public:
}
};
+
// usage (if you're lazy):
// struct ShorterThanPrev : public FsaTypedBase<int>,FsaTypedScan<ShorterThanPrev>
-template <class Impl>
-struct FsaTypedScan {
+template <class St,class Impl>
+struct FsaTypedScan : public FsaTypedBase<St> {
void Scan(SentenceMetadata const& smeta,WordID w,void const* st,void *next_state,FeatureVector *features) const {
Impl const* impl=static_cast<Impl const*>(this);
- impl->Scan(smeta,w,impl->state(st),impl->state(next_state),features);
+ impl->ScanTyped(smeta,w,impl->state(st),impl->state(next_state),features);
+ FSADBG("Scan "<<impl->state(st)<<" ->"<<TD::Convert(w)<<" "<<impl->state(next_state)<<" = "<<(*features)[impl->fid_]<<std::endl);
}
};
-// init state is in cs; overwrite cs, ns repeatedly (alternatively). return resulting state
-template <class FsaFF>
-void *FsaScan(FsaFF const& ff,SentenceMetadata const& smeta,WordID const* i, WordID const* end,FeatureVector *h_features, void *cs,void *ns) {
- // extra code - IT'S FOR EFFICIENCY, MAN! IT'S OK! definitely no bugs here.
- void *os,*es;
- WordID const* e=end-1; // boundcheck 1 earlier because in loop below we use i+1 before rechecking
- if ((end-i)&1) { // odd # of words
- os=cs;
- es=ns;
- i-=1;
- goto odd;
- } else {
- es=cs;
- os=ns;
- }
- for (;i<e;i+=2) {
- ff.Scan(smeta,*i,es,os,h_features); // e->o
- odd:
- ff.Scan(smeta,*(i+1),os,es,h_features); // o->e
- }
- return es;
-}
-
// do not use if state size is 0, please.
const bool optimize_FsaScanner_zerostate=false;
@@ -205,20 +257,6 @@ struct FsaScanner {
};
-template <class FF>
-void AccumFeatures(FF const& ff,SentenceMetadata const& smeta,WordID const* i, WordID const* end,FeatureVector *h_features,void const* start_state) {
- int ssz=ff.state_bytes();
- if (ssz) {
- Bytes state(ssz),state2(ssz);
- void *cs=state.begin(),*ns=state2.begin();
- memcpy(cs,start_state,ff.state_bytes());
- FsaScan(ff,smeta,i,end,h_features,cs,ns);
- } else
- for (;i<end;++i)
- ff.Scan(smeta,*i,0,0,h_features);
-}
-
-
//TODO: combine 2 FsaFeatures typelist style (can recurse for more)