summaryrefslogtreecommitdiff
path: root/decoder/ff.cc
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-09 15:12:50 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-09 15:12:50 +0000
commit15a4bf8c8014b7df4847c6e4d0d6863f531179f6 (patch)
treeb7b5f9db002142aa03a625dc2d780b4f6ad3b635 /decoder/ff.cc
parent31d21ec50cb9689a73ee58617d282741396ca510 (diff)
optional max arity param for Arity_N features (no hardcoded max of N=9); FF::rule_feature() should be true for non-source-span dependent 0-state features.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@196 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/ff.cc')
-rw-r--r--decoder/ff.cc42
1 files changed, 29 insertions, 13 deletions
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 3f433dfb..b323ab27 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -1,5 +1,6 @@
//TODO: 0 size state != rule-local feature, i.e. still may depend on source span loc/context. identify truly rule-local features so if we want they can be added to grammar rules (minor speedup)
+#include <boost/lexical_cast.hpp>
#include "ff.h"
#include "tdict.h"
@@ -33,7 +34,7 @@ FeatureFunction::Features FeatureFunction::single_feature(WordID feat) {
return Features(1,feat);
}
-FeatureFunction::Features ModelSet::all_features(std::ostream *warn) {
+FeatureFunction::Features ModelSet::all_features(std::ostream *warn,bool warn0) {
typedef FeatureFunction::Features FFS;
FFS ffs;
#define WARNFF(x) do { if (warn) { *warn << "WARNING: "<< x ; *warn<<endl; } } while(0)
@@ -46,17 +47,26 @@ FeatureFunction::Features ModelSet::all_features(std::ostream *warn) {
if (si.empty()) {
WARNFF(ffname<<" doesn't yet report any feature IDs - implement features() method?");
}
+ unsigned n0=0;
for (unsigned j=0;j<si.size();++j) {
WordID fid=si[j];
+ if (!fid) ++n0;
if (fid >= weights_.size())
weights_.resize(fid+1);
- pair<FFM::iterator,bool> i_new=ff_from.insert(FFM::value_type(fid,ffname));
- if (i_new.second)
- ffs.push_back(fid);
- else {
- WARNFF(ffname<<" models["<<i<<"] tried to define feature "<<FD::Convert(fid)<<" already defined earlier by "<<i_new.first->second);
+ if (warn0 || fid) {
+ pair<FFM::iterator,bool> i_new=ff_from.insert(FFM::value_type(fid,ffname));
+ if (i_new.second) {
+ if (fid)
+ ffs.push_back(fid);
+ else
+ WARNFF("Feature id 0 for "<<ffname<<" (models["<<i<<"]) - probably no weight provided. Don't freeze feature ids to see the name");
+ } else {
+ WARNFF(ffname<<" (models["<<i<<"]) tried to define feature "<<FD::Convert(fid)<<" already defined earlier by "<<i_new.first->second);
+ }
}
}
+ if (n0)
+ WARNFF(ffname<<" (models["<<i<<"]) had "<<n0<<" unused features (--no_freeze_feature_set to see them)");
}
return ffs;
#undef WARNFF
@@ -130,17 +140,22 @@ void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
features->set_value(fid_, edge.rule_->FWords() * value_);
}
-ArityPenalty::ArityPenalty(const std::string& /* param */) :
+ArityPenalty::ArityPenalty(const std::string& param) :
value_(-1.0 / log(10)) {
- string fname = "Arity_X";
- for (int i = 0; i < N_ARITIES; ++i) {
- fname[6]=i + '0';
- fids_[i] = FD::Convert(fname);
+ string fname = "Arity_";
+ unsigned MAX=DEFAULT_MAX_ARITY;
+ using namespace boost;
+ if (!param.empty())
+ MAX=lexical_cast<unsigned>(param);
+ for (unsigned i = 0; i <= MAX; ++i) {
+ WordID fid=FD::Convert(fname+lexical_cast<string>(i));
+ fids_.push_back(fid);
}
+ while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen. doesn't change anything
}
FeatureFunction::Features ArityPenalty::features() const {
- return Features(&fids_[0],&fids_[N_ARITIES]);
+ return Features(fids_.begin(),fids_.end());
}
void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
@@ -153,7 +168,8 @@ void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
(void) ant_states;
(void) state;
(void) estimated_features;
- features->set_value(fids_[edge.Arity()], value_);
+ unsigned a=edge.Arity();
+ features->set_value(a<fids_.size()?fids_[a]:0, value_);
}
ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) :