3 files changed, 50 insertions, 26 deletions
diff --git a/decoder/cdec.cc b/decoder/cdec.cc
index 079b270b..79d51939 100644
--- a/decoder/cdec.cc
+++ b/decoder/cdec.cc
@@ -390,6 +390,10 @@ int main(int argc, char** argv) {
     }
 //    cerr << "+LM weights: " << FeatureVector(feature_weights)<<endl;
   }
+  if (!conf.count("no_freeze_feature_set")) {
+    cerr << "Freezing feature set (use --no_freeze_feature_set to change)." << endl;
+    FD::Freeze(); // this means we can't see the feature names of not-weighted features
+  }
 
   // set up translation back end
   if (formalism == "scfg")
@@ -443,10 +447,6 @@ int main(int argc, char** argv) {
   ModelSet prelm_models(prelm_feature_weights, prelm_ffs);
   if (has_prelm_models)
     show_models(conf,prelm_models,"prelm ");
-  if (!conf.count("no_freeze_feature_set")) { // this used to happen immediately after loading weights, but now show_models will extend weight vector nicely.
-    cerr << "Freezing feature set (use --no_freeze_feature_set to change)." << endl;
-    FD::Freeze();
-  }
 
   int palg = 1;
   if (LowercaseString(str("intersection_strategy",conf)) == "full") {
@@ -518,6 +518,7 @@ int main(int argc, char** argv) {
     Timer t("Translation");
     const bool translation_successful =
       translator->Translate(to_translate, &smeta, feature_weights, &forest);
+    //TODO: modify translator to incorporate all 0-state model scores immediately?
     translator->SentenceComplete();
     if (!translation_successful) {
       cerr << "  NO PARSE FOUND.\n";
@@ -550,8 +551,7 @@ int main(int argc, char** argv) {
       ApplyModelSet(forest,
                     smeta,
                     prelm_models,
-                    IntersectionConfiguration(exhaustive_t()),
-// avoid overhead of best-first
+                    inter_conf, // this is now reduced to exhaustive if all are stateless
                     &prelm_forest);
       forest.swap(prelm_forest);
       forest.Reweight(prelm_feature_weights);
diff --git a/decoder/ff.cc b/decoder/ff.cc
index 3f433dfb..b323ab27 100644
--- a/decoder/ff.cc
+++ b/decoder/ff.cc
@@ -1,5 +1,6 @@
 //TODO: 0 size state != rule-local feature, i.e. still may depend on source span loc/context.  identify truly rule-local features so if we want they can be added to grammar rules (minor speedup)
 
+#include <boost/lexical_cast.hpp>
 #include "ff.h"
 
 #include "tdict.h"
@@ -33,7 +34,7 @@ FeatureFunction::Features FeatureFunction::single_feature(WordID feat) {
   return Features(1,feat);
 }
 
-FeatureFunction::Features ModelSet::all_features(std::ostream *warn) {
+FeatureFunction::Features ModelSet::all_features(std::ostream *warn,bool warn0) {
   typedef FeatureFunction::Features FFS;
   FFS ffs;
 #define WARNFF(x) do { if (warn) { *warn << "WARNING: "<< x ; *warn<<endl; } } while(0)
@@ -46,17 +47,26 @@ FeatureFunction::Features ModelSet::all_features(std::ostream *warn) {
     if (si.empty()) {
       WARNFF(ffname<<" doesn't yet report any feature IDs - implement features() method?");
     }
+    unsigned n0=0;
     for (unsigned j=0;j<si.size();++j) {
       WordID fid=si[j];
+      if (!fid) ++n0;
       if (fid >= weights_.size())
         weights_.resize(fid+1);
-      pair<FFM::iterator,bool> i_new=ff_from.insert(FFM::value_type(fid,ffname));
-      if (i_new.second)
-        ffs.push_back(fid);
-      else {
-        WARNFF(ffname<<" models["<<i<<"] tried to define feature "<<FD::Convert(fid)<<" already defined earlier by "<<i_new.first->second);
+      if (warn0 || fid) {
+        pair<FFM::iterator,bool> i_new=ff_from.insert(FFM::value_type(fid,ffname));
+        if (i_new.second) {
+          if (fid)
+            ffs.push_back(fid);
+          else
+            WARNFF("Feature id 0 for "<<ffname<<" (models["<<i<<"]) - probably no weight provided.  Don't freeze feature ids to see the name");
+        } else {
+          WARNFF(ffname<<" (models["<<i<<"]) tried to define feature "<<FD::Convert(fid)<<" already defined earlier by "<<i_new.first->second);
+        }
       }
     }
+    if (n0)
+      WARNFF(ffname<<" (models["<<i<<"]) had "<<n0<<" unused features (--no_freeze_feature_set to see them)");
   }
   return ffs;
 #undef WARNFF
@@ -130,17 +140,22 @@ void SourceWordPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
   features->set_value(fid_, edge.rule_->FWords() * value_);
 }
 
-ArityPenalty::ArityPenalty(const std::string& /* param */) :
+ArityPenalty::ArityPenalty(const std::string& param) :
     value_(-1.0 / log(10)) {
-  string fname = "Arity_X";
-  for (int i = 0; i < N_ARITIES; ++i) {
-    fname[6]=i + '0';
-    fids_[i] = FD::Convert(fname);
+  string fname = "Arity_";
+  unsigned MAX=DEFAULT_MAX_ARITY;
+  using namespace boost;
+  if (!param.empty())
+    MAX=lexical_cast<unsigned>(param);
+  for (unsigned i = 0; i <= MAX; ++i) {
+    WordID fid=FD::Convert(fname+lexical_cast<string>(i));
+    fids_.push_back(fid);
   }
+  while (!fids_.empty() && fids_.back()==0) fids_.pop_back(); // pretty up features vector in case FD was frozen.  doesn't change anything
 }
 
 FeatureFunction::Features ArityPenalty::features() const {
-  return Features(&fids_[0],&fids_[N_ARITIES]);
+  return Features(fids_.begin(),fids_.end());
 }
 
 void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
@@ -153,7 +168,8 @@ void ArityPenalty::TraversalFeaturesImpl(const SentenceMetadata& smeta,
   (void) ant_states;
   (void) state;
   (void) estimated_features;
-  features->set_value(fids_[edge.Arity()], value_);
+  unsigned a=edge.Arity();
+  features->set_value(a<fids_.size()?fids_[a]:0, value_);
 }
 
 ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>& models) :
diff --git a/decoder/ff.h b/decoder/ff.h
index 6f8b8626..2cf96d39 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -2,7 +2,6 @@
 #define _FF_H_
 
 #include <vector>
-
 #include "fdict.h"
 #include "hg.h"
 
@@ -31,7 +30,10 @@ protected:
   static std::string usage_helper(std::string const& name,std::string const& params,std::string const& details,bool show_params,bool show_details);
   static Features single_feature(WordID feat);
 public:
+  // stateless feature that doesn't depend on source span: override and return true.  then your feature can be precomputed over rules.
+  virtual bool rule_feature() const { return false; }
 
+  //OVERRIDE THIS:
   virtual Features features() const { return Features(); }
   // returns the number of bytes of context that this feature function will
   // (maximally) use.  By default, 0 ("stateless" models in Hiero/Joshua).
@@ -95,6 +97,7 @@ class WordPenalty : public FeatureFunction {
   static std::string usage(bool p,bool d) {
     return usage_helper("WordPenalty","","number of target words (local feature)",p,d);
   }
+  bool rule_feature() const { return true; }
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
                                      const Hypergraph::Edge& edge,
@@ -109,6 +112,7 @@ class WordPenalty : public FeatureFunction {
 
 class SourceWordPenalty : public FeatureFunction {
  public:
+  bool rule_feature() const { return true; }
   Features features() const;
   SourceWordPenalty(const std::string& param);
   static std::string usage(bool p,bool d) {
@@ -126,12 +130,18 @@ class SourceWordPenalty : public FeatureFunction {
   const double value_;
 };
 
+#define DEFAULT_MAX_ARITY 9
+#define DEFAULT_MAX_ARITY_STRINGIZE(x) #x
+#define DEFAULT_MAX_ARITY_STRINGIZE_EVAL(x) DEFAULT_MAX_ARITY_STRINGIZE(x)
+#define DEFAULT_MAX_ARITY_STR DEFAULT_MAX_ARITY_STRINGIZE_EVAL(DEFAULT_MAX_ARITY)
+
 class ArityPenalty : public FeatureFunction {
  public:
+  bool rule_feature() const { return true; }
   Features features() const;
   ArityPenalty(const std::string& param);
   static std::string usage(bool p,bool d) {
-    return usage_helper("ArityPenalty","","Indicator feature Arity_N=1 for rule of arity N (local feature)",p,d);
+    return usage_helper("ArityPenalty","[MaxArity(default " DEFAULT_MAX_ARITY_STR ")]","Indicator feature Arity_N=1 for rule of arity N (local feature).  0<=N<=MaxArity(default " DEFAULT_MAX_ARITY_STR ")",p,d);
   }
 
  protected:
@@ -142,10 +152,7 @@ class ArityPenalty : public FeatureFunction {
                                      SparseVector<double>* estimated_features,
                                      void* context) const;
  private:
-  enum {N_ARITIES=10};
-
-
-  int fids_[N_ARITIES];
+  std::vector<WordID> fids_;
   const double value_;
 };
 
@@ -173,7 +180,8 @@ class ModelSet {
 
   bool empty() const { return models_.empty(); }
 
-  FeatureFunction::Features all_features(std::ostream *warnings=0); // this will warn about duplicate features as well (one function overwrites the feature of another).  also resizes weights_ so it is large enough to hold the (0) weight for the largest reported feature id
+  bool stateless() const { return !state_size_; }
+  FeatureFunction::Features all_features(std::ostream *warnings=0,bool warn_fid_zero=false); // this will warn about duplicate features as well (one function overwrites the feature of another).  also resizes weights_ so it is large enough to hold the (0) weight for the largest reported feature id.  since 0 is a NULL feature id, it's never included.  if warn_fid_zero, then even the first 0 id is
   void show_features(std::ostream &out,std::ostream &warn,bool warn_zero_wt=true); //show features and weights
  private:
   std::vector<const FeatureFunction*> models_;