summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--decoder/cdec.cc11
-rw-r--r--decoder/cdec_ff.cc5
-rwxr-xr-xdecoder/ff_fsa.h40
-rwxr-xr-xdecoder/ff_lm_fsa.h6
-rwxr-xr-xdecoder/ff_sample_fsa.h5
5 files changed, 43 insertions, 24 deletions
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index 76551cfa..460e9f15 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -388,6 +388,12 @@ int main(int argc, char** argv) {
exit(1);
}
+ const string input = str("input",conf);
+ cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl;
+ ReadFile in_read(input);
+ istream *in = in_read.stream();
+ assert(*in);
+
// load feature weights (and possibly freeze feature set)
vector<double> feature_weights,prelm_feature_weights;
Weights w,prelm_w;
@@ -503,11 +509,6 @@ int main(int argc, char** argv) {
int combine_size = conf["combine_size"].as<int>();
if (combine_size < 1) combine_size = 1;
- const string input = str("input",conf);
- cerr << "Reading input from " << ((input == "-") ? "STDIN" : input.c_str()) << endl;
- ReadFile in_read(input);
- istream *in = in_read.stream();
- assert(*in);
SparseVector<prob_t> acc_vec; // accumulate gradient
double acc_obj = 0; // accumulate objective
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index c8cd8a66..fc64e5c4 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -8,19 +8,18 @@
#include "ff_factory.h"
#include "ff_ruleshape.h"
#include "ff_bleu.h"
-//#include "ff_sample_fsa.h"
#include "ff_lm_fsa.h"
+#include "ff_sample_fsa.h"
+
boost::shared_ptr<FFRegistry> global_ff_registry;
void register_feature_functions() {
global_ff_registry->Register(new FFFactory<LanguageModel>);
global_ff_registry->Register(new FFFactory<FeatureFunctionFromFsa<LanguageModelFsa> >); // same as LM but using fsa wrapper
- /* // sample_fsa:
global_ff_registry->Register(new FFFactory<WordPenaltyFromFsa>); // same as WordPenalty, but implemented using ff_fsa
global_ff_registry->Register(new FFFactory<FeatureFunctionFromFsa<LongerThanPrev> >);
global_ff_registry->Register(new FFFactory<FeatureFunctionFromFsa<ShorterThanPrev> >);
- */
//TODO: use for all features the new Register which requires usage(...)
#ifdef HAVE_RANDLM
diff --git a/decoder/ff_fsa.h b/decoder/ff_fsa.h
index 85d184ee..45837f2c 100755
--- a/decoder/ff_fsa.h
+++ b/decoder/ff_fsa.h
@@ -201,7 +201,7 @@ public:
template <class Accum>
inline void ScanAccum(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,
WordID w,void const* state,void *next_state,Accum *a) const {
- Add(d().Scan1Meta(smeta,edge,smeta,edge,w,state,next_state),a);
+ Add(d().Scan1Meta(smeta,edge,w,state,next_state),a);
}
// bounce back and forth between two state vars starting at cs, returning end state location. if we required src=dest addr safe state updating, this concept wouldn't need to exist.
@@ -233,7 +233,7 @@ public:
}
-
+ // note you'll still have to override ScanAccum
// override this (and SCAN_PHRASE_ACCUM_OVERRIDE ) if you want e.g. maximum possible order ngram scores with markov_order < n-1. in the future SparseFeatureAccumulator will probably be the only option for type-erased FSA ffs. you will be adding to accum, not setting
template <class Accum>
inline void ScanPhraseAccum(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,
@@ -284,10 +284,14 @@ public:
}
template <class Accum>
- inline void Add(Featval v,Accum *a) const { // for single-feat only
+ inline void Add(Featval v,Accum *a) const { // for single-feat only. but will work for different accums
+ a->Add(fid_,v);
+ }
+ inline void Add(Featval v,SingleFeatureAccumulator *a) const {
a->Add(v);
}
+
inline void set_feat(FeatureVector *features,Featval v) const {
features->set_value(fid_,v);
}
@@ -343,15 +347,31 @@ public:
o<<state(st);
}
int markov_order() const { return 1; }
- Featval ScanT1(WordID w,St const&,St &) const { return 0; }
- inline void ScanT(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,St const& prev_st,St &new_st,FeatureVector *features) const {
- features->maybe_add(d().fid_,d().ScanT1(w,prev_st,new_st));
+
+ // override this
+ Featval ScanT1S(WordID w,St const& /* from */ ,St & /* to */) const {
+ return 0;
+ }
+
+ // or this
+ Featval ScanT1(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,St const& from ,St & to) const {
+ return d().ScanT1S(w,from,to);
}
- inline void Scan(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,void const* st,void *next_state,FeatureVector *features) const {
+
+ // or this (most general)
+ template <class Accum>
+ inline void ScanT(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,St const& prev_st,St &new_st,Accum *a) const {
+ Add(d().ScanT1(smeta,edge,w,prev_st,new_st),a);
+ }
+
+ // note: you're on your own when it comes to Phrase overrides. see FsaFeatureFunctionBase. sorry.
+
+ template <class Accum>
+ inline void ScanAccum(SentenceMetadata const& smeta,const Hypergraph::Edge& edge,WordID w,void const* st,void *next_state,Accum *a) const {
Impl const& im=d();
- FSADBG(edge,"Scan "<<FD::Convert(im.fid_)<<" = "<<im.describe_features(*features)<<" "<<im.state(st)<<"->"<<TD::Convert(w)<<" ");
- im.ScanT(smeta,edge,w,state(st),state(next_state),features);
- FSADBG(edge,state(next_state)<<" = "<<im.describe_features(*features));
+ FSADBG(edge,"Scan "<<FD::Convert(im.fid_)<<" = "<<a->describe(im)<<" "<<im.state(st)<<"->"<<TD::Convert(w)<<" ");
+ im.ScanT(smeta,edge,w,state(st),state(next_state),a);
+ FSADBG(edge,state(next_state)<<" = "<<a->describe(im));
FSADBGnl(edge);
}
diff --git a/decoder/ff_lm_fsa.h b/decoder/ff_lm_fsa.h
index 3547d75b..c2c0972e 100755
--- a/decoder/ff_lm_fsa.h
+++ b/decoder/ff_lm_fsa.h
@@ -1,7 +1,7 @@
#ifndef FF_LM_FSA_H
#define FF_LM_FSA_H
-//FIXME: 3gram has differences in 4th decimal digit, compared to regular ff_lm. this is USUALLY a bug (there's way more actual precision in there). this was with #define LM_FSA_SHORTEN_CONTEXT 1
+//FIXME: 3gram has differences in 4th decimal digit, compared to regular ff_lm. this is USUALLY a bug (there's way more actual precision in there). this was with #define LM_FSA_SHORTEN_CONTEXT 1 and 0 (so it's not that)
#define FSA_LM_DEBUG 0
@@ -84,7 +84,7 @@ struct LanguageModelFsa : public FsaFeatureFunctionBase<LanguageModelFsa> {
WordID ctx[ngram_order_];
state_copy(ctx,old_st);
ctx[ctxlen_]=TD::none; // make this part of state? wastes space but saves copies.
- Featval p=pimpl_->WordProb(w,ctx);
+ Featval p=floored(pimpl_->WordProb(w,ctx));
// states are sri contexts so are in reverse order (most recent word is first, then 1-back comes next, etc.).
WordID *nst=(WordID *)new_st;
nst[0]=w; // new most recent word
@@ -92,7 +92,7 @@ struct LanguageModelFsa : public FsaFeatureFunctionBase<LanguageModelFsa> {
#if LM_FSA_SHORTEN_CONTEXT
p+=pimpl_->ShortenContext(nst,ctxlen_);
#endif
- Add(floored(p),a);
+ Add(p,a);
}
}
diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h
index fb44dade..a129806d 100755
--- a/decoder/ff_sample_fsa.h
+++ b/decoder/ff_sample_fsa.h
@@ -132,10 +132,9 @@ struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> {
*/
// evil anti-google int & len out-param:
- void ScanT(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,int prevlen,int &len,FeatureVector *features) const {
+ Featval ScanT1(SentenceMetadata const& /* smeta */,const Hypergraph::Edge& /* edge */,WordID w,int prevlen,int &len) const {
len=wordlen(w);
- if (len<prevlen)
- features->add_value(fid_,1);
+ return (len<prevlen) ? 1 : 0;
}
};