summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--decoder/ff.cc4
-rw-r--r--decoder/ff_factory.cc2
-rwxr-xr-xdecoder/ff_from_fsa.h8
-rwxr-xr-xdecoder/ff_sample_fsa.h22
-rw-r--r--decoder/sparse_vector.h2
5 files changed, 21 insertions, 17 deletions
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 28d6f732..4f1a3d32 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -1,6 +1,6 @@
-//TODO: actually score rule_feature()==true features once only, hash keyed on rule or modify TRule directly? need to keep clear in forest which features come from models vs. rules; then rescoring could drop all the old models features at once
+//TODO: non-sparse vector for all feature functions? modelset applymodels keeps track of who has what features? it's nice having FF that could generate a handful out of 10000 possible feats, though.
-//TODO: 0 size state != rule-local feature, i.e. still may depend on source span loc/context. identify truly rule-local features so if we want they can be added to grammar rules (minor speedup)
+//TODO: actually score rule_feature()==true features once only, hash keyed on rule or modify TRule directly? need to keep clear in forest which features come from models vs. rules; then rescoring could drop all the old models features at once
#include <boost/lexical_cast.hpp>
#include "ff.h"
diff --git a/decoder/ff_factory.cc b/decoder/ff_factory.cc
index a6d834e0..cc07b2f2 100644
--- a/decoder/ff_factory.cc
+++ b/decoder/ff_factory.cc
@@ -35,7 +35,7 @@ shared_ptr<FeatureFunction> FFRegistry::Create(const string& ffname, const strin
int pl=debug_pre.size();
bool space=false;
std::string p=param;
- bool debug=match_begin(p,debug_pre)&&(p.size()==pl||(space=p[pl]==' '));
+ bool debug=match_begin(p,debug_pre)&&(p.size()==pl||(space=(p[pl]==' '));
if (debug) {
p.erase(0,debug_pre.size()+space);
cerr<<"debug enabled for "<<ffname<< " - rest of param='"<<p<<"'\n";
diff --git a/decoder/ff_from_fsa.h b/decoder/ff_from_fsa.h
index 04a30578..51d89376 100755
--- a/decoder/ff_from_fsa.h
+++ b/decoder/ff_from_fsa.h
@@ -28,6 +28,7 @@ class FeatureFunctionFromFsa : public FeatureFunction {
typedef WordID const* WP;
public:
FeatureFunctionFromFsa(std::string const& param) : ff(param) {
+ FSAFFDBG(ff.name()<<" params="<<param<<" calling Init: ");
Init();
}
@@ -137,6 +138,7 @@ public:
return o.str();
}
+ //FIXME: it's assumed that the final rule is just a unary no-target-terminal rewrite (same as ff_lm)
virtual void FinalTraversalFeatures(const SentenceMetadata& smeta,
const void* residual_state,
FeatureVector* final_features) const
@@ -189,12 +191,14 @@ private:
M=ff.markov_order();
ssz=ff.state_bytes();
state_offset=sizeof(WordID)*M;
- SetStateSize(ff.state_bytes()+state_offset);
+ SetStateSize(ssz+state_offset);
+ assert(!ssz == !M); // no fsa state <=> markov order 0
+ FSAFFDBG("order="<<M<<" fsa_state_offset="<<state_offset<<" fsa_state_bytes="<<ssz<<" ff_state_bytes="<<StateSize()<<'\n');
}
int M; // markov order (ctx len)
FeatureFunctionFromFsa(); // not allowed.
- int state_offset; // store left-words first, then fsa state
+ int state_offset; // NOTE: in bytes (add to char* only). store left-words first, then fsa state
int ssz; // bytes in fsa state
/*
state layout: left WordIds, followed by fsa state
diff --git a/decoder/ff_sample_fsa.h b/decoder/ff_sample_fsa.h
index d8aa7830..6e42b83b 100755
--- a/decoder/ff_sample_fsa.h
+++ b/decoder/ff_sample_fsa.h
@@ -29,7 +29,7 @@ struct WordPenaltyFsa : public FsaFeatureFunctionBase<WordPenaltyFsa> {
typedef FeatureFunctionFromFsa<WordPenaltyFsa> WordPenaltyFromFsa;
-//
+// appears to be buggy right now: give it a bonus weight (-) and it overstates how many
struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
typedef FsaFeatureFunctionBase<LongerThanPrev> Base;
static std::string usage(bool param,bool verbose) {
@@ -40,11 +40,11 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
param,verbose);
}
- static inline int &wordlen(void *state) {
- return *(int*)state;
+ static inline int &state(void *st) {
+ return *(int*)st;
}
- static inline int wordlen(void const* state) {
- return *(int const*)state;
+ static inline int state(void const* st) {
+ return *(int const*)st;
}
static inline int wordlen(WordID w) {
return std::strlen(TD::Convert(w));
@@ -62,23 +62,23 @@ struct LongerThanPrev : public FsaFeatureFunctionBase<LongerThanPrev> {
to_state(h_start.begin(),&ss,1);
}
- wordlen(start.begin())=3;
- wordlen(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous
+ state(start.begin())=3;
+ state(h_start.begin())=4; // estimate: anything >4 chars is usually longer than previous
}
static const float val_per_target_word=-1;
- void Scan(SentenceMetadata const& smeta,WordID w,void const* state,void *next_state,FeatureVector *features) const {
- int prevlen=wordlen(state);
+ void Scan(SentenceMetadata const& smeta,WordID w,void const* from,void *next_state,FeatureVector *features) const {
+ int prevlen=state(from);
int len=wordlen(w);
- wordlen(next_state)=len;
if (len>prevlen)
features->add_value(fid_,val_per_target_word);
+ state(next_state)=len;
}
-
};
// similar example feature; base type exposes stateful type, defines markov_order 1, state size = sizeof(State)
+// also buggy right now: give it a bonus weight (-) and it overstates how many
struct ShorterThanPrev : FsaTypedBase<int,ShorterThanPrev> {
typedef FsaTypedBase<int,ShorterThanPrev> Base;
static std::string usage(bool param,bool verbose) {
diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h
index f8310fc1..1733a4bd 100644
--- a/decoder/sparse_vector.h
+++ b/decoder/sparse_vector.h
@@ -1,7 +1,7 @@
#ifndef _SPARSE_VECTOR_H_
#define _SPARSE_VECTOR_H_
-#define SPARSE_VECTOR_HASH
+//#define SPARSE_VECTOR_HASH
#ifdef SPARSE_VECTOR_HASH
#include "hash.h"