summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWu, Ke <wuke@cs.umd.edu>2014-10-10 11:53:38 -0400
committerWu, Ke <wuke@cs.umd.edu>2014-10-10 11:53:38 -0400
commit4b6cdb02245c97cce70f60d778059c6c0fbab5a6 (patch)
tree7ac9f39be47215c785377685c0414ee562276fdb
parent5405acffd9c86ec80be4306807eda8e6ad284b89 (diff)
Allow erasure of part of state
-rw-r--r--decoder/apply_models.cc44
-rw-r--r--decoder/ff.h19
-rw-r--r--decoder/ff_const_reorder.cc1
-rw-r--r--decoder/ffset.cc58
-rw-r--r--decoder/ffset.h17
5 files changed, 54 insertions, 85 deletions
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc
index 3f3f6a79..278516c1 100644
--- a/decoder/apply_models.cc
+++ b/decoder/apply_models.cc
@@ -233,24 +233,19 @@ public:
void IncorporateIntoPlusLMForest(size_t head_node_hash, Candidate* item, State2Node* s2n, CandidateList* freelist) {
Hypergraph::Edge* new_edge = out.AddEdge(item->out_edge_);
new_edge->edge_prob_ = item->out_edge_.edge_prob_;
- //start: new code by lijunhui
- FFState real_state;
- FFState* real_state_ref;
- if (models.HaveEraseState()) {
- models.GetRealFFState(item->state_, real_state);
- real_state_ref = &real_state;
+
+ Candidate** o_item_ptr = nullptr;
+ if (item->state_.size() && models.NeedsStateErasure()) {
+ // When erasure of certain state bytes is needed, we must make a copy of
+ // the state instead of doing the erasure in-place because future
+ // candidates may require the information in the bytes to be erased.
+ FFState state(item->state_);
+ models.EraseIgnoredBytes(&state);
+ o_item_ptr = &(*s2n)[state];
+ } else {
+ o_item_ptr = &(*s2n)[item->state_];
}
- else
- real_state_ref = &(item->state_);
- Candidate*& o_item = (*s2n)[(*real_state_ref)];
- /*FFState real_state;
- models.GetRealFFState(item->state_, real_state);
- Candidate*& o_item = (*s2n)[real_state];*/
- //end: new code by lijunhui
-
- //start: original code
- //Candidate*& o_item = (*s2n)[item->state_];
- //end: original code
+ Candidate*& o_item = *o_item_ptr;
if (!o_item) o_item = item;
@@ -272,18 +267,17 @@ public:
// score is the same for all items with a common residual DP
// state
if (item->vit_prob_ > o_item->vit_prob_) {
- //start: new code by lijunhui
- if (models.HaveEraseState()) {
- assert(models.GetRealFFState(o_item->state_) == models.GetRealFFState(item->state_)); // sanity check!
+ if (item->state_.size() && models.NeedsStateErasure()) {
+ // node_states_ should still point to the unerased state.
node_states_[o_item->node_index_] = item->state_;
+ // sanity check!
+ FFState item_state(item->state_), o_item_state(o_item->state_);
+ models.EraseIgnoredBytes(&item_state);
+ models.EraseIgnoredBytes(&o_item_state);
+ assert(item_state == o_item_state);
} else {
assert(o_item->state_ == item->state_); // sanity check!
}
- //end: new code by lijunhui
-
- //start: original code
- //assert(o_item->state_ == item->state_); // sanity check!
- //end: original code
o_item->est_prob_ = item->est_prob_;
o_item->vit_prob_ = item->vit_prob_;
diff --git a/decoder/ff.h b/decoder/ff.h
index 3280592e..afa3dbca 100644
--- a/decoder/ff.h
+++ b/decoder/ff.h
@@ -17,11 +17,17 @@ class FeatureFunction {
friend class ExternalFeature;
public:
std::string name_; // set by FF factory using usage()
- FeatureFunction() : state_size_() {}
- explicit FeatureFunction(int state_size) : state_size_(state_size) {}
+ FeatureFunction() : state_size_(), ignored_state_size_() {}
+ explicit FeatureFunction(int state_size, int ignored_state_size = 0)
+ : state_size_(state_size), ignored_state_size_(ignored_state_size) {}
virtual ~FeatureFunction();
bool IsStateful() const { return state_size_ > 0; }
int StateSize() const { return state_size_; }
+ // Returns the number of bytes in the state that should be ignored during
+ // search. When non-zero, the last N bytes in the state should be ignored when
+ // splitting a hypernode by the state. This allows the feature function to
+ // store some side data and later retrieve it via the state bytes.
+ int IgnoredStateSize() const { return ignored_state_size_; }
// override this. not virtual because we want to expose this to factory template for help before creating a FF
static std::string usage(bool show_params,bool show_details) {
@@ -71,12 +77,17 @@ class FeatureFunction {
SparseVector<double>* estimated_features,
void* context) const;
- // !!! ONLY call this from subclass *CONSTRUCTORS* !!!
+ // !!! ONLY call these from subclass *CONSTRUCTORS* !!!
void SetStateSize(size_t state_size) {
state_size_ = state_size;
}
+
+ void SetIgnoredStateSize(size_t ignored_state_size) {
+ ignored_state_size_ = ignored_state_size;
+ }
+
private:
- int state_size_;
+ int state_size_, ignored_state_size_;
};
#endif
diff --git a/decoder/ff_const_reorder.cc b/decoder/ff_const_reorder.cc
index 8bd3f4e2..fc774c3a 100644
--- a/decoder/ff_const_reorder.cc
+++ b/decoder/ff_const_reorder.cc
@@ -1327,6 +1327,7 @@ struct ConstReorderFeatureImpl {
ConstReorderFeature::ConstReorderFeature(const std::string& param) {
pimpl_ = new ConstReorderFeatureImpl(param);
SetStateSize(ConstReorderFeatureImpl::ReserveStateSize());
+ SetIgnoredStateSize(ConstReorderFeatureImpl::ReserveStateSize());
name_ = "ConstReorderFeature";
}
diff --git a/decoder/ffset.cc b/decoder/ffset.cc
index 488346bc..8ba70389 100644
--- a/decoder/ffset.cc
+++ b/decoder/ffset.cc
@@ -14,25 +14,10 @@ ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>
for (int i = 0; i < models_.size(); ++i) {
model_state_pos_[i] = state_size_;
state_size_ += models_[i]->StateSize();
- }
-
- //added by lijunhui
- //erase the states for SRLReorderFeature and DepPrnFeatures
- for (int i = 0; i < models_.size(); i++) {
- if (models_[i]->name_ == string("SRLReorderFeature")
- || models_[i]->name_ == string("DepPrnFeatures")
- || models_[i]->name_ == string("SyntacticContextFeature")
- || models_[i]->name_ == string("ArgtReorderFeature")
- || models_[i]->name_ == string("ConstReorderSparseFeature")
- || models_[i]->name_ == string("ConstReorderFeature")) {
- int start_pos = model_state_pos_[i];
- int end_pos;
- if (i == models_.size() - 1)
- end_pos = state_size_;
- else
- end_pos = model_state_pos_[i + 1];
- erase_state_start_pos_.push_back(start_pos);
- erase_state_end_pos_.push_back(end_pos);
+ int num_ignored_bytes = models_[i]->IgnoredStateSize();
+ if (num_ignored_bytes > 0) {
+ ranges_to_erase_.push_back(
+ {state_size_ - num_ignored_bytes, state_size_});
}
}
}
@@ -90,32 +75,13 @@ void ModelSet::AddFinalFeatures(const FFState& state, HG::Edge* edge,SentenceMet
edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
}
-bool ModelSet::HaveEraseState() const {
- if (erase_state_start_pos_.size() == 0) return false;
- return true;
-}
-
-void ModelSet::GetRealFFState(const FFState& state, FFState& real_state) const {
- real_state.resize(state.size());
- for (int i = 0; i < state.size(); i++) {
- real_state[i] = state[i];
- }
-
- if (state.size() == 0)
- return;
- assert(state.size() == state_size_);
+bool ModelSet::NeedsStateErasure() const { return !ranges_to_erase_.empty(); }
- //erase the states for SRLReorderFeature and DepPrnFeatures and SyntacticContextFeature
- for (int i = 0; i < erase_state_start_pos_.size(); i++){
- int start_pos = erase_state_start_pos_[i];
- int end_pos = erase_state_end_pos_[i];
- for (int j = start_pos; j < end_pos; j++)
- real_state[j] = 0;
- }
-}
-
-FFState ModelSet::GetRealFFState(const FFState& state) const {
- FFState real_state;
- GetRealFFState(state, real_state);
- return real_state;
+void ModelSet::EraseIgnoredBytes(FFState* state) const {
+ // TODO: can we memset?
+ for (const auto& range : ranges_to_erase_) {
+ for (int i = range.first; i < range.second; ++i) {
+ (*state)[i] = 0;
+ }
+ }
}
diff --git a/decoder/ffset.h b/decoder/ffset.h
index 0467b01d..a69a75fa 100644
--- a/decoder/ffset.h
+++ b/decoder/ffset.h
@@ -1,6 +1,7 @@
#ifndef _FFSET_H_
#define _FFSET_H_
+#include <utility>
#include <vector>
#include "value_array.h"
#include "prob.h"
@@ -47,22 +48,18 @@ class ModelSet {
bool stateless() const { return !state_size_; }
- //added by ljh
- //some states of features are not contextual, but used for storing some useful information for calculate feature val
- //it needs to erase these states
- //this function is only called by IncorporateIntoPlusLMForest(...) in apply_models.cc
- void GetRealFFState(const FFState& state, FFState& real_state) const;
- FFState GetRealFFState(const FFState& state) const;
- bool HaveEraseState() const;
- private:
- std::vector<int> erase_state_start_pos_;
- std::vector<int> erase_state_end_pos_;
+ // Part of a feature state may be used for storing some side data for
+ // calculating feature values but not necessary for splitting hypernodes. Such
+ // bytes needs to be erased for hypernode splitting.
+ bool NeedsStateErasure() const;
+ void EraseIgnoredBytes(FFState* state) const;
private:
std::vector<const FeatureFunction*> models_;
const std::vector<double>& weights_;
int state_size_;
std::vector<int> model_state_pos_;
+ std::vector<std::pair<int, int> > ranges_to_erase_;
};
#endif