7 files changed, 96 insertions, 121 deletions
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc
index 3f3f6a79..18c83fd4 100644
--- a/decoder/apply_models.cc
+++ b/decoder/apply_models.cc
@@ -233,24 +233,19 @@ public:
   void IncorporateIntoPlusLMForest(size_t head_node_hash, Candidate* item, State2Node* s2n, CandidateList* freelist) {
     Hypergraph::Edge* new_edge = out.AddEdge(item->out_edge_);
     new_edge->edge_prob_ = item->out_edge_.edge_prob_;
-    //start: new code by lijunhui
-    FFState real_state;
-    FFState* real_state_ref;
-    if (models.HaveEraseState()) {
-       models.GetRealFFState(item->state_, real_state);
-       real_state_ref = &real_state;
+
+    Candidate** o_item_ptr = nullptr;
+    if (item->state_.size() && models.NeedsStateErasure()) {
+      // When erasure of certain state bytes is needed, we must make a copy of
+      // the state instead of doing the erasure in-place because future
+      // candidates may require the information in the bytes to be erased.
+      FFState state(item->state_);
+      models.EraseIgnoredBytes(&state);
+      o_item_ptr = &(*s2n)[state];
+    } else {
+      o_item_ptr = &(*s2n)[item->state_];
     }
-    else
-       real_state_ref = &(item->state_);
-    Candidate*& o_item = (*s2n)[(*real_state_ref)];
-    /*FFState real_state;
-    models.GetRealFFState(item->state_, real_state);
-    Candidate*& o_item = (*s2n)[real_state];*/
-    //end: new code by lijunhui
-
-    //start: original code
-    //Candidate*& o_item = (*s2n)[item->state_];
-    //end: original code
+    Candidate*& o_item = *o_item_ptr;
 
     if (!o_item) o_item = item;
 
@@ -272,18 +267,17 @@ public:
     // score is the same for all items with a common residual DP
     // state
     if (item->vit_prob_ > o_item->vit_prob_) {
-      //start: new code by lijunhui
-      if (models.HaveEraseState()) {
-        assert(models.GetRealFFState(o_item->state_) == models.GetRealFFState(item->state_));    // sanity check!
+      if (item->state_.size() && models.NeedsStateErasure()) {
+        // node_states_ should still point to the unerased state.
         node_states_[o_item->node_index_] = item->state_;
+        // sanity check!
+        FFState item_state(item->state_), o_item_state(o_item->state_);
+        models.EraseIgnoredBytes(&item_state);
+        models.EraseIgnoredBytes(&o_item_state);
+        assert(item_state == o_item_state);
       } else {
-        assert(o_item->state_ == item->state_);    // sanity check!
+        assert(o_item->state_ == item->state_);  // sanity check!
       }
-      //end: new code by lijunhui
-
-      //start: original code
-      //assert(o_item->state_ == item->state_);    // sanity check!
-      //end: original code
 
       o_item->est_prob_ = item->est_prob_;
       o_item->vit_prob_ = item->vit_prob_;
@@ -629,9 +623,10 @@ void ApplyModelSet(const Hypergraph& in,
   if (models.stateless() || config.algorithm == IntersectionConfiguration::FULL) {
     NoPruningRescorer ma(models, smeta, in, out); // avoid overhead of best-first when no state
     ma.Apply();
-  } else if (config.algorithm == IntersectionConfiguration::CUBE
-             || config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING
-             || config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING_2) {
+  } else if (config.algorithm == IntersectionConfiguration::CUBE ||
+             config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING ||
+             config.algorithm ==
+                 IntersectionConfiguration::FAST_CUBE_PRUNING_2) {
     int pl = config.pop_limit;
     const int max_pl_for_large=50;
     if (pl > max_pl_for_large && in.nodes_.size() > 80000) {
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index e7a8bbc1..6f7227aa 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -78,6 +78,6 @@ void register_feature_functions() {
   ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);
   ff_registry.Register("SourcePathFeatures", new FFFactory<SourcePathFeatures>);
   ff_registry.Register("WordSet", new FFFactory<WordSet>);
-  ff_registry.Register("ConstReorderFeature", new ConstReorderFeatureFactory());
+  ff_registry.Register("ConstReorderFeature", new FFFactory<ConstReorderFeature>);
   ff_registry.Register("External", new FFFactory<ExternalFeature>);
 }
diff --git a/decoder/ff.h b/decoder/ff.h
index 3280592e..afa3dbca 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -17,11 +17,17 @@ class FeatureFunction {
   friend class ExternalFeature;
  public:
   std::string name_; // set by FF factory using usage()
-  FeatureFunction() : state_size_() {}
-  explicit FeatureFunction(int state_size) : state_size_(state_size) {}
+  FeatureFunction() : state_size_(), ignored_state_size_() {}
+  explicit FeatureFunction(int state_size, int ignored_state_size = 0)
+      : state_size_(state_size), ignored_state_size_(ignored_state_size) {}
   virtual ~FeatureFunction();
   bool IsStateful() const { return state_size_ > 0; }
   int StateSize() const { return state_size_; }
+  // Returns the number of bytes in the state that should be ignored during
+  // search. When non-zero, the last N bytes in the state should be ignored when
+  // splitting a hypernode by the state. This allows the feature function to
+  // store some side data and later retrieve it via the state bytes.
+  int IgnoredStateSize() const { return ignored_state_size_; }
 
   // override this.  not virtual because we want to expose this to factory template for help before creating a FF
   static std::string usage(bool show_params,bool show_details) {
@@ -71,12 +77,17 @@ class FeatureFunction {
                                      SparseVector<double>* estimated_features,
                                      void* context) const;
 
-  // !!! ONLY call this from subclass *CONSTRUCTORS* !!!
+  // !!! ONLY call these from subclass *CONSTRUCTORS* !!!
   void SetStateSize(size_t state_size) {
     state_size_ = state_size;
   }
+
+  void SetIgnoredStateSize(size_t ignored_state_size) {
+    ignored_state_size_ = ignored_state_size;
+  }
+
  private:
-  int state_size_;
+  int state_size_, ignored_state_size_;
 };
 
 #endif
diff --git a/decoder/ff_const_reorder.cc b/decoder/ff_const_reorder.cc
index 8bd3f4e2..c0269125 100644
--- a/decoder/ff_const_reorder.cc
+++ b/decoder/ff_const_reorder.cc
@@ -9,6 +9,7 @@
 #include "hash.h"
 #include "argument_reorder_model.h"
 
+#include <sstream>
 #include <string>
 #include <vector>
 #include <stdio.h>
@@ -187,14 +188,6 @@ struct TargetTranslation {
         vec_f_align_bit_array_(end_pos - begin_pos + 1, NULL),
         vec_e_align_bit_array_(e_num_word, NULL) {
     int len = end_pos - begin_pos + 1;
-
-    /*vec_f_align_bit_array_.reserve(len);
-    for (int i = 0; i < len; i++)
-            vec_f_align_bit_array_.push_back(NULL);
-
-    vec_e_align_bit_array_.reserve(e_num_word);
-    for (int i = 0; i < e_num_word; i++)
-            vec_e_align_bit_array_.push_back(NULL);*/
     align_.reserve(1.5 * len);
   }
   ~TargetTranslation() {
@@ -304,17 +297,16 @@ struct TargetTranslation {
     if (target_end == -1) target_begin = -1;
   }
 
-  const uint16_t begin_pos_, end_pos_;  // the position in parse
+  const uint16_t begin_pos_, end_pos_;              // the position in parse
   const uint16_t input_begin_pos_, input_end_pos_;  // the position in input
   const uint16_t e_num_words_;
   vector<AlignmentPoint*> align_;
 
  private:
-  vector<SBitArray*> vec_f_align_bit_array_;
-  vector<SBitArray*> vec_e_align_bit_array_;
-
   vector<short> vec_left_most_;
   vector<short> vec_right_most_;
+  vector<SBitArray*> vec_f_align_bit_array_;
+  vector<SBitArray*> vec_e_align_bit_array_;
 };
 
 struct FocusedConstituent {
@@ -710,7 +702,7 @@ struct ConstReorderFeatureImpl {
                               SparseVector<double>* features,
                               const TargetTranslation* target_translation,
                               const vector<const TargetTranslation*>& vec_node,
-                              std::vector<int>& findex) {
+                              std::vector<int>& /*findex*/) {
     if (b_srl_block_feature_ || b_srl_order_feature_) {
       double logprob_srl_reorder_left = 0.0, logprob_srl_reorder_right = 0.0;
       for (size_t i = 0; i < focused_srl_->focus_predicates_.size(); i++) {
@@ -1327,6 +1319,7 @@ struct ConstReorderFeatureImpl {
 ConstReorderFeature::ConstReorderFeature(const std::string& param) {
   pimpl_ = new ConstReorderFeatureImpl(param);
   SetStateSize(ConstReorderFeatureImpl::ReserveStateSize());
+  SetIgnoredStateSize(ConstReorderFeatureImpl::ReserveStateSize());
   name_ = "ConstReorderFeature";
 }
 
@@ -1346,12 +1339,32 @@ void ConstReorderFeature::PrepareForInput(const SentenceMetadata& smeta) {
 void ConstReorderFeature::TraversalFeaturesImpl(
     const SentenceMetadata& /* smeta */, const Hypergraph::Edge& edge,
     const vector<const void*>& ant_states, SparseVector<double>* features,
-    SparseVector<double>* estimated_features, void* state) const {
+    SparseVector<double>* /*estimated_features*/, void* state) const {
   pimpl_->SetConstReorderFeature(edge, features, ant_states, state);
 }
 
-string ConstReorderFeature::usage(bool /*param*/, bool /*verbose*/) {
-  return "ConstReorderFeature";
+string ConstReorderFeature::usage(bool show_params, bool show_details) {
+  ostringstream out;
+  out << "ConstReorderFeature";
+  if (show_params) {
+    out << " model_file_prefix [const_block=1 const_order=1] [srl_block=0 "
+           "srl_order=0]"
+        << "\nParameters:\n"
+        << "  const_{block,order}: enable/disable constituency constraints.\n"
+        << "  src_{block,order}: enable/disable semantic role labeling "
+           "constraints.\n";
+  }
+  if (show_details) {
+    out << "\n"
+        << "Soft reordering constraint features from "
+           "http://www.aclweb.org/anthology/P14-1106. To train the classifers, "
+           "use utils/const_reorder_model_trainer for constituency reordering "
+           "constraints and utils/argument_reorder_model_trainer for semantic "
+           "role labeling reordering constraints.\n"
+        << "Input segments should provide path to parse tree (resp. SRL parse) "
+           "as \"parse\" (resp. \"srl\") properties.\n";
+  }
+  return out.str();
 }
 
 boost::shared_ptr<FeatureFunction> CreateConstReorderModel(
@@ -1359,12 +1372,3 @@ boost::shared_ptr<FeatureFunction> CreateConstReorderModel(
   ConstReorderFeature* ret = new ConstReorderFeature(param);
   return boost::shared_ptr<FeatureFunction>(ret);
 }
-
-boost::shared_ptr<FeatureFunction> ConstReorderFeatureFactory::Create(
-    std::string param) const {
-  return CreateConstReorderModel(param);
-}
-
-std::string ConstReorderFeatureFactory::usage(bool params, bool verbose) const {
-  return ConstReorderFeature::usage(params, verbose);
-}
diff --git a/decoder/ff_const_reorder.h b/decoder/ff_const_reorder.h
index 6ed35768..a5be02d0 100644
--- a/decoder/ff_const_reorder.h
+++ b/decoder/ff_const_reorder.h
@@ -13,9 +13,16 @@
 
 struct ConstReorderFeatureImpl;
 
+// Soft reordering constraint features from
+// http://www.aclweb.org/anthology/P14-1106. To train the classifers,
+// use utils/const_reorder_model_trainer for constituency reordering
+// constraints and utils/argument_reorder_model_trainer for SRL
+// reordering constraints.
+//
+// Input segments should provide path to parse tree (resp. SRL parse)
+// as "parse" (resp. "srl") properties.
 class ConstReorderFeature : public FeatureFunction {
  public:
-  // param = "filename n"
   ConstReorderFeature(const std::string& param);
   ~ConstReorderFeature();
   static std::string usage(bool param, bool verbose);
@@ -33,9 +40,4 @@ class ConstReorderFeature : public FeatureFunction {
   ConstReorderFeatureImpl* pimpl_;
 };
 
-struct ConstReorderFeatureFactory : public FactoryBase<FeatureFunction> {
-  FP Create(std::string param) const;
-  std::string usage(bool params, bool verbose) const;
-};
-
 #endif /* FF_CONST_REORDER_H_ */
diff --git a/decoder/ffset.cc b/decoder/ffset.cc
index 488346bc..8ba70389 100644
--- a/decoder/ffset.cc
+++ b/decoder/ffset.cc
@@ -14,25 +14,10 @@ ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>
   for (int i = 0; i < models_.size(); ++i) {
     model_state_pos_[i] = state_size_;
     state_size_ += models_[i]->StateSize();
-  }
-
-  //added by lijunhui
-  //erase the states for SRLReorderFeature and DepPrnFeatures
-  for (int i = 0; i < models_.size(); i++) {
-    if (models_[i]->name_ == string("SRLReorderFeature")
-      || models_[i]->name_ == string("DepPrnFeatures")
-      || models_[i]->name_ == string("SyntacticContextFeature")
-      || models_[i]->name_ == string("ArgtReorderFeature")
-      || models_[i]->name_ == string("ConstReorderSparseFeature")
-      || models_[i]->name_ == string("ConstReorderFeature")) {
-      int start_pos = model_state_pos_[i];
-      int end_pos;
-      if (i == models_.size() - 1)
-        end_pos = state_size_;
-      else
-        end_pos = model_state_pos_[i + 1];
-      erase_state_start_pos_.push_back(start_pos);
-      erase_state_end_pos_.push_back(end_pos);
+    int num_ignored_bytes = models_[i]->IgnoredStateSize();
+    if (num_ignored_bytes > 0) {
+      ranges_to_erase_.push_back(
+          {state_size_ - num_ignored_bytes, state_size_});
     }
   }
 }
@@ -90,32 +75,13 @@ void ModelSet::AddFinalFeatures(const FFState& state, HG::Edge* edge,SentenceMet
   edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
 }
 
-bool ModelSet::HaveEraseState() const {
-       if (erase_state_start_pos_.size() == 0) return false;
-       return true;
-}
-
-void ModelSet::GetRealFFState(const FFState& state, FFState& real_state) const {
-       real_state.resize(state.size());
-       for (int i = 0; i < state.size(); i++) {
-               real_state[i] = state[i];
-       }
-
-       if (state.size() == 0)
-               return;
-       assert(state.size() == state_size_);
+bool ModelSet::NeedsStateErasure() const { return !ranges_to_erase_.empty(); }
 
-       //erase the states for SRLReorderFeature and DepPrnFeatures and SyntacticContextFeature
-       for (int i = 0; i < erase_state_start_pos_.size(); i++){
-               int start_pos = erase_state_start_pos_[i];
-               int end_pos = erase_state_end_pos_[i];
-               for (int j = start_pos; j < end_pos; j++)
-                       real_state[j] = 0;
-       }
-}
-
-FFState ModelSet::GetRealFFState(const FFState& state) const {
-       FFState real_state;
-       GetRealFFState(state, real_state);
-       return real_state;
+void ModelSet::EraseIgnoredBytes(FFState* state) const {
+  // TODO: can we memset?
+  for (const auto& range : ranges_to_erase_) {
+    for (int i = range.first; i < range.second; ++i) {
+      (*state)[i] = 0;
+    }
+  }
 }
diff --git a/decoder/ffset.h b/decoder/ffset.h
index 0467b01d..a69a75fa 100644
--- a/decoder/ffset.h
+++ b/decoder/ffset.h
@@ -1,6 +1,7 @@
 #ifndef _FFSET_H_
 #define _FFSET_H_
 
+#include <utility>
 #include <vector>
 #include "value_array.h"
 #include "prob.h"
@@ -47,22 +48,18 @@ class ModelSet {
 
   bool stateless() const { return !state_size_; }
 
-  //added by ljh
-  //some states of features are not contextual, but used for storing some useful information for calculate feature val
-  //it needs to erase these states
-  //this function is only called by IncorporateIntoPlusLMForest(...) in apply_models.cc
-  void GetRealFFState(const FFState& state, FFState& real_state) const;
-  FFState GetRealFFState(const FFState& state) const;
-  bool HaveEraseState() const;
- private:
-  std::vector<int> erase_state_start_pos_;
-  std::vector<int> erase_state_end_pos_;
+  // Part of a feature state may be used for storing some side data for
+  // calculating feature values but not necessary for splitting hypernodes. Such
+  // bytes needs to be erased for hypernode splitting.
+  bool NeedsStateErasure() const;
+  void EraseIgnoredBytes(FFState* state) const;
 
  private:
   std::vector<const FeatureFunction*> models_;
   const std::vector<double>& weights_;
   int state_size_;
   std::vector<int> model_state_pos_;
+  std::vector<std::pair<int, int> > ranges_to_erase_;
 };
 
 #endif