Add ConstReorderFeature

author: Wu, Ke <wuke@cs.umd.edu> 2014-10-07 18:05:58 -0400
committer: Wu, Ke <wuke@cs.umd.edu> 2014-10-07 18:05:58 -0400
commit: 0900cac418f7e46889336d137e6ba1bb84651544 (patch)
tree: 12738df89e6ca5f495229a195760827f92c5fdee
parent: ebeda1e75c77dd9044f1b9902770896e3009ae55 (diff)
7 files changed, 1457 insertions, 6 deletions
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index e46a7120..6577cc12 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -21,7 +21,7 @@ trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEW
 
 cdec_SOURCES = cdec.cc
 cdec_LDFLAGS= -rdynamic $(STATIC_FLAGS)
-cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a
+cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../utils/synutils/maxent-3.0/libtsuruoka_maxent.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a
 
 AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm
 
@@ -45,6 +45,7 @@ libcdec_a_SOURCES = \
   ff_basic.h \
   ff_bleu.h \
   ff_charset.h \
+  ff_const_reorder.h \
   ff_context.h \
   ff_csplit.h \
   ff_external.h \
@@ -111,6 +112,7 @@ libcdec_a_SOURCES = \
   ff_bleu.cc \
   ff_charset.cc \
   ff_context.cc \
+  ff_const_reorder.cc \
   ff_csplit.cc \
   ff_external.cc \
   ff_factory.cc \
diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc
index 9f8bbead..3f3f6a79 100644
--- a/decoder/apply_models.cc
+++ b/decoder/apply_models.cc
@@ -233,7 +233,25 @@ public:
   void IncorporateIntoPlusLMForest(size_t head_node_hash, Candidate* item, State2Node* s2n, CandidateList* freelist) {
     Hypergraph::Edge* new_edge = out.AddEdge(item->out_edge_);
     new_edge->edge_prob_ = item->out_edge_.edge_prob_;
-    Candidate*& o_item = (*s2n)[item->state_];
+    //start: new code by lijunhui
+    FFState real_state;
+    FFState* real_state_ref;
+    if (models.HaveEraseState()) {
+       models.GetRealFFState(item->state_, real_state);
+       real_state_ref = &real_state;
+    }
+    else
+       real_state_ref = &(item->state_);
+    Candidate*& o_item = (*s2n)[(*real_state_ref)];
+    /*FFState real_state;
+    models.GetRealFFState(item->state_, real_state);
+    Candidate*& o_item = (*s2n)[real_state];*/
+    //end: new code by lijunhui
+
+    //start: original code
+    //Candidate*& o_item = (*s2n)[item->state_];
+    //end: original code
+
     if (!o_item) o_item = item;
 
     int& node_id = o_item->node_index_;
@@ -254,7 +272,19 @@ public:
     // score is the same for all items with a common residual DP
     // state
     if (item->vit_prob_ > o_item->vit_prob_) {
-      assert(o_item->state_ == item->state_);    // sanity check!
+      //start: new code by lijunhui
+      if (models.HaveEraseState()) {
+        assert(models.GetRealFFState(o_item->state_) == models.GetRealFFState(item->state_));    // sanity check!
+        node_states_[o_item->node_index_] = item->state_;
+      } else {
+        assert(o_item->state_ == item->state_);    // sanity check!
+      }
+      //end: new code by lijunhui
+
+      //start: original code
+      //assert(o_item->state_ == item->state_);    // sanity check!
+      //end: original code
+
       o_item->est_prob_ = item->est_prob_;
       o_item->vit_prob_ = item->vit_prob_;
     }
@@ -599,7 +629,7 @@ void ApplyModelSet(const Hypergraph& in,
   if (models.stateless() || config.algorithm == IntersectionConfiguration::FULL) {
     NoPruningRescorer ma(models, smeta, in, out); // avoid overhead of best-first when no state
     ma.Apply();
-  } else if (config.algorithm == IntersectionConfiguration::CUBE 
+  } else if (config.algorithm == IntersectionConfiguration::CUBE
              || config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING
              || config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING_2) {
     int pl = config.pop_limit;
@@ -628,4 +658,3 @@ void ApplyModelSet(const Hypergraph& in,
   out->is_linear_chain_ = in.is_linear_chain_;  // TODO remove when this is computed
                                                 // automatically
 }
-
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index 7f7e075b..e7a8bbc1 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -3,6 +3,7 @@
 #include "ff.h"
 #include "ff_basic.h"
 #include "ff_context.h"
+#include "ff_const_reorder.h"
 #include "ff_spans.h"
 #include "ff_lm.h"
 #include "ff_klm.h"
@@ -77,6 +78,6 @@ void register_feature_functions() {
   ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);
   ff_registry.Register("SourcePathFeatures", new FFFactory<SourcePathFeatures>);
   ff_registry.Register("WordSet", new FFFactory<WordSet>);
+  ff_registry.Register("ConstReorderFeature", new ConstReorderFeatureFactory());
   ff_registry.Register("External", new FFFactory<ExternalFeature>);
 }
-
diff --git a/decoder/ff_const_reorder.cc b/decoder/ff_const_reorder.cc
new file mode 100644
index 00000000..77582b7c
--- /dev/null
+++ b/decoder/ff_const_reorder.cc
@@ -0,0 +1,1318 @@
+#include "ff_const_reorder.h"
+
+#include "stringlib.h"
+#include "hg.h"
+#include "sentence_metadata.h"
+#include "synutils/tree.h"
+#include "synutils/srl_sentence.h"
+#include "synutils/tsuruoka_maxent.h"
+#include "hash.h"
+#include "synutils/argument_reorder_model.h"
+
+#include <string>
+#include <vector>
+#include <stdio.h>
+
+using namespace std;
+
+typedef HASH_MAP<std::string, vector<double> > MapClassifier;
+
+struct SBitArray{
+	SBitArray(int size) :
+	size_(size){
+		int bit_size = size / 8;
+		if (size % 8 > 0)
+			bit_size++;
+
+		char_ = new unsigned char[bit_size];
+		memset(char_, 0, bit_size);
+	}
+	~SBitArray() {
+		delete [] char_;
+	}
+
+	int Get(int index) const {
+		int i;
+
+		i = index;
+		if (i < 0)
+			i = size_ + i;
+		assert(i > -1 && i < size_);
+
+		int byte_index, bit_index;
+		byte_index = i/8;
+		bit_index = i%8;
+		unsigned char res;
+		if (bit_index == 0)
+			res = char_[byte_index] & 0x01;
+		else if (bit_index == 1)
+			res = char_[byte_index] & 0x02;
+		else if (bit_index == 2)
+			res = char_[byte_index] & 0x04;
+		else if (bit_index == 3)
+			res = char_[byte_index] & 0x08;
+		else if (bit_index == 4)
+			res = char_[byte_index] & 0x10;
+		else if (bit_index == 5)
+			res = char_[byte_index] & 0x20;
+		else if (bit_index == 6)
+			res = char_[byte_index] & 0x40;
+		else if (bit_index == 7)
+			res = char_[byte_index] & 0x80;
+		else
+			assert(false);
+		if (res != 0)
+			return 1;
+		else
+			return 0;
+	}
+
+	void Set(int index, int val) {
+		assert(val == 0 || val == 1);
+		int i;
+
+		i = index;
+		if (i < 0)
+			i = size_ + i;
+		assert(i > -1 && i < size_);
+
+		int byte_index, bit_index;
+		byte_index = i/8;
+		bit_index = i%8;
+		unsigned char res;
+
+		if (bit_index == 0) {
+			if (val == 0)
+				res = char_[byte_index] & 0xFE;
+			else
+				res = char_[byte_index] | 0x01;
+		} else if (bit_index == 1) {
+			if (val == 0)
+				res = char_[byte_index] & 0xFD;
+			else
+				res = char_[byte_index] | 0x02;
+		} else if (bit_index == 2) {
+			if (val == 0)
+				res = char_[byte_index] & 0xFB;
+			else
+				res = char_[byte_index] | 0x04;
+		} else if (bit_index == 3) {
+			if (val == 0)
+				res = char_[byte_index] & 0xF7;
+			else
+				res = char_[byte_index] | 0x08;
+		} else if (bit_index == 4) {
+			if (val == 0)
+				res = char_[byte_index] & 0xEF;
+			else
+				res = char_[byte_index] | 0x10;
+		} else if (bit_index == 5) {
+			if (val == 0)
+				res = char_[byte_index] & 0xDF;
+			else
+				res = char_[byte_index] | 0x20;
+		} else if (bit_index == 6) {
+			if (val == 0)
+				res = char_[byte_index] & 0xBF;
+			else
+				res = char_[byte_index] | 0x40;
+		} else if (bit_index == 7) {
+			if (val == 0)
+				res = char_[byte_index] & 0x7F;
+			else
+				res = char_[byte_index] | 0x80;
+		} else
+			assert(false);
+		char_[byte_index] = res;
+	}
+
+private:
+	const int size_;
+	unsigned char *char_;
+};
+
+inline bool is_inside(int i, int left, int right) {
+	if ( i < left || i > right )
+		return false;
+	return true;
+}
+
+/*
+ * assume i <= j
+ * [i, j] is inside [left, right] or [i, j] equates to [left, right]
+ */
+inline bool is_inside(int i, int j, int left, int right) {
+	if ( i >= left && j <= right )
+		return true;
+	return false;
+}
+
+/*
+ * assume i <= j
+ * [i, j] is inside [left, right], but [i, j] not equal to [left, right]
+ */
+inline bool is_proper_inside(int i, int j, int left, int right) {
+	if ( i >= left && j <= right && right - left > j - i)
+		return true;
+	return false;
+}
+
+/*
+ * assume i <= j
+ * [i, j] is proper proper inside [left, right]
+ */
+inline bool is_proper_proper_inside(int i, int j, int left, int right) {
+	if ( i > left && j < right)
+		return true;
+	return false;
+}
+
+inline bool is_overlap(int left1, int right1, int left2, int right2) {
+	if (is_inside(left1, left2, right2) || is_inside(left2, left1, right1))
+		return true;
+
+	return false;
+}
+
+inline void NewAndCopyCharArray(char**p, const char* q) {
+	if (q != NULL) {
+		(*p) = new char[strlen(q) + 1];
+		strcpy((*p), q);
+	} else
+		(*p) = NULL;
+}
+
+//TODO:to make the alignment more efficient
+struct TargetTranslation{
+	typedef vector<int> SingleWordAlign;
+	TargetTranslation(int begin_pos, int end_pos, int input_begin_pos, int input_end_pos, int e_num_word):
+		begin_pos_(begin_pos),
+		end_pos_(end_pos),
+		input_begin_pos_(input_begin_pos),
+		input_end_pos_(input_end_pos),
+		e_num_words_(e_num_word),
+		vec_left_most_(end_pos - begin_pos + 1, e_num_word),
+		vec_right_most_(end_pos - begin_pos + 1, -1),
+		vec_f_align_bit_array_(end_pos - begin_pos + 1, NULL),
+		vec_e_align_bit_array_(e_num_word, NULL) {
+		int len = end_pos - begin_pos + 1;
+
+		/*vec_f_align_bit_array_.reserve(len);
+		for (int i = 0; i < len; i++)
+			vec_f_align_bit_array_.push_back(NULL);
+
+		vec_e_align_bit_array_.reserve(e_num_word);
+		for (int i = 0; i < e_num_word; i++)
+			vec_e_align_bit_array_.push_back(NULL);*/
+		align_.reserve(1.5 * len);
+	}
+	~TargetTranslation( ) {
+		for (size_t i = 0; i < vec_f_align_bit_array_.size(); i++)
+			if (vec_f_align_bit_array_[i] != NULL)
+				delete vec_f_align_bit_array_[i];
+
+		for (size_t i = 0; i < vec_e_align_bit_array_.size(); i++)
+			if (vec_e_align_bit_array_[i] != NULL)
+				delete vec_e_align_bit_array_[i];
+
+		for (size_t i = 0; i < align_.size(); i++)
+			delete align_[i];
+	}
+
+	void InsertAlignmentPoint(int s, int t) {
+		int i = s - begin_pos_;
+
+		SBitArray* &b = vec_f_align_bit_array_[i];
+		if (b == NULL)
+			b = new SBitArray(e_num_words_);
+		b->Set(t, 1);
+
+		SBitArray* &a = vec_e_align_bit_array_[t];
+		if (a == NULL)
+			a = new SBitArray(end_pos_ - begin_pos_ + 1);
+		a->Set(i, 1);
+
+		align_.push_back(new AlignmentPoint(s, t));
+
+		if (t > vec_right_most_[i])
+			vec_right_most_[i] = t;
+		if (t < vec_left_most_[i])
+			vec_left_most_[i] = t;
+	}
+
+	/*
+	 * given a source span [begin, end], whether its target side is continuous,
+	 * return "0": the source span is translated silently
+	 * return "1": there is at least on word inside its target span, this word doesn't align to any word inside [begin, end], but outside [begin, end]
+	 * return "2": otherwise
+	 */
+	string IsTargetConstinousSpan(int begin, int end) const {
+		int target_begin, target_end;
+		FindLeftRightMostTargetSpan(begin, end, target_begin, target_end);
+		if (target_begin == -1) return "0";
+
+		for (int i = target_begin; i <= target_end; i++) {
+			if (vec_e_align_bit_array_[i] == NULL) continue;
+			int j = begin;
+			for (; j <= end; j++) {
+				if (vec_e_align_bit_array_[i]->Get(j - begin_pos_))
+					break;
+			}
+			if (j == end + 1) //e[i] is aligned, but e[i] doesn't align to any source word in [begin_pos, end_pos]
+				return "1";
+		}
+		return "2";
+	}
+
+	string IsTargetConstinousSpan2(int begin, int end) const {
+		int target_begin, target_end;
+		FindLeftRightMostTargetSpan(begin, end, target_begin, target_end);
+		if (target_begin == -1) return "Unaligned";
+
+		for (int i = target_begin; i <= target_end; i++) {
+			if (vec_e_align_bit_array_[i] == NULL) continue;
+			int j = begin;
+			for (; j <= end; j++) {
+				if (vec_e_align_bit_array_[i]->Get(j - begin_pos_))
+					break;
+			}
+			if (j == end + 1) //e[i] is aligned, but e[i] doesn't align to any source word in [begin_pos, end_pos]
+				return "Discon't";
+		}
+		return "Con't";
+	}
+
+
+
+	void FindLeftRightMostTargetSpan(int begin, int end, int& target_begin, int& target_end) const {
+		int b = begin - begin_pos_;
+		int e = end - begin_pos_ + 1;
+
+		target_begin = vec_left_most_[b];
+		target_end = vec_right_most_[b];
+		for (int i = b + 1; i < e; i++) {
+			if (target_begin > vec_left_most_[i])
+				target_begin = vec_left_most_[i];
+			if (target_end < vec_right_most_[i])
+				target_end = vec_right_most_[i];
+		}
+		if (target_end == -1)
+			target_begin = -1;
+		return;
+
+		target_begin = e_num_words_;
+		target_end = -1;
+
+		for (int i = begin - begin_pos_; i < end - begin_pos_ + 1; i++) {
+			if (vec_f_align_bit_array_[i] == NULL) continue;
+			for (int j = 0; j < target_begin; j++)
+				if (vec_f_align_bit_array_[i]->Get(j)) {
+					target_begin = j;
+					break;
+				}
+		}
+		for (int i = end - begin_pos_; i > begin - begin_pos_ -1; i--) {
+			if (vec_f_align_bit_array_[i] == NULL) continue;
+			for (int j = e_num_words_ - 1; j > target_end; j--)
+				if (vec_f_align_bit_array_[i]->Get(j)) {
+					target_end = j;
+					break;
+				}
+		}
+
+		if (target_end == -1)
+			target_begin = -1;
+	}
+
+	const uint16_t begin_pos_, end_pos_;              //the position in parse
+	const uint16_t input_begin_pos_, input_end_pos_;  //the position in input
+	const uint16_t e_num_words_;
+	vector<AlignmentPoint*> align_;
+private:
+	vector<SBitArray*> vec_f_align_bit_array_;
+	vector<SBitArray*> vec_e_align_bit_array_;
+
+	vector<short> vec_left_most_;
+	vector<short> vec_right_most_;
+};
+
+struct FocusedConstituent{
+	FocusedConstituent(const SParsedTree *pTree) {
+		if (pTree == NULL) return;
+		for (size_t i = 0; i < pTree->m_vecTerminals.size(); i++) {
+			STreeItem *pParent = pTree->m_vecTerminals[i]->m_ptParent;
+
+			while (pParent != NULL) {
+				//if (pParent->m_vecChildren.size() > 1 && pParent->m_iEnd - pParent->m_iBegin > 5) {
+				//if (pParent->m_vecChildren.size() > 1) {
+				if (true) {
+
+					//do constituent reordering for all children of pParent
+					if (strcmp(pParent->m_pszTerm, "ROOT"))
+						focus_parents_.push_back(pParent);
+				}
+				if (pParent->m_iBrotherIndex != 0) break;
+				pParent = pParent->m_ptParent;
+			}
+		}
+	}
+
+	~FocusedConstituent() { //TODO
+		focus_parents_.clear();
+	}
+
+	vector<STreeItem*> focus_parents_;
+};
+
+
+typedef SPredicateItem FocusedPredicate;
+
+struct FocusedSRL{
+	FocusedSRL(const SSrlSentence *srl) {
+		if (srl == NULL) return;
+		for (size_t i = 0; i < srl->m_vecPred.size(); i++) {
+			if (strcmp(srl->m_pTree->m_vecTerminals[srl->m_vecPred[i]->m_iPosition]->m_ptParent->m_pszTerm, "VA") == 0)
+				continue;
+			focus_predicates_.push_back(new FocusedPredicate(srl->m_pTree, srl->m_vecPred[i]));
+		}
+	}
+
+	~FocusedSRL() {
+		focus_predicates_.clear();
+	}
+
+	vector<const FocusedPredicate*> focus_predicates_;
+};
+
+/*
+ * Note:
+ *      In BOLT experiments, we need to merged some sequence words into one term (like from "1999 nian 1 yue 10 ri" to "1999_nian_1_yue_10_ri") due to some reasons;
+ *      but in the parse file, we still use the parse tree before merging any words;
+ *      therefore, the words in source sentence and parse tree diverse and we need to map a word in merged sentence into its original index;
+ *      a word in source sentence maps 1 or more words in parse tree
+ *      the index map info is stored at variable index_map_;
+ *      if the index_map_ is NULL, indicating the word index in source sentence and parse tree is always same.
+ *
+ *      In ConstReorderFeatureImpl, as to store alignment info, we use the word index of the parse tree
+ */
+
+struct SIndexMap{
+	SIndexMap(const string& index_map_file) {
+		if (index_map_file == "") {
+			index_map_input_2_parse = NULL;
+			index_map_parse_2_input = NULL;
+			return;
+		}
+		STxtFileReader *reader = new STxtFileReader(index_map_file.c_str());
+		char szLine[10001];
+		szLine[0] = '\0';
+		reader->fnReadNextLine(szLine, NULL);
+		delete reader;
+		vector<string> terms;
+		SplitOnWhitespace(string(szLine), &terms);
+
+		index_map_input_2_parse = new short int[terms.size() + 1];
+		int ix = 0;
+		size_t i;
+		for (i = 0; i < terms.size(); i++) {
+			index_map_input_2_parse[i] = ix;
+			ix += atoi(terms[i].c_str());
+		}
+		index_map_input_2_parse[i] = ix;
+		//assert(ix == parsed_tree_->m_vecTerminals.size());
+
+		index_map_parse_2_input = new short int[ix+1];
+		int jx = 0;
+		for (i = 0; i < terms.size(); i++) {
+			int num_word = atoi(terms[i].c_str());
+			for (int j = 0; j < num_word; j++)
+				index_map_parse_2_input[jx++] = i;
+		}
+		index_map_parse_2_input[jx] = i;
+		assert(jx == ix);
+	}
+
+	~SIndexMap() {
+		if (index_map_input_2_parse != NULL)
+			delete index_map_input_2_parse;
+		if (index_map_parse_2_input != NULL)
+			delete index_map_parse_2_input;
+	}
+
+	/*
+	 * an input word maps to 1 or more words in parse
+	 */
+	void MapIndex_Input_2_Parse(short int ix, short int& mapped_begin, short int& mapped_end) {
+		MapIndex_Input_2_Parse(ix, ix, mapped_begin, mapped_end);
+	}
+
+	/*
+	 * given the indices in input,
+	 * return the indices in parse tree
+	 */
+	void MapIndex_Input_2_Parse(short int begin, short int end, short int& mapped_begin, short int& mapped_end) {
+		if (index_map_input_2_parse == NULL) {
+			mapped_begin = begin;
+			mapped_end = end;
+			return;
+		}
+
+		mapped_begin = index_map_input_2_parse[begin];
+		mapped_end = index_map_input_2_parse[end + 1] - 1;
+	}
+
+	/*
+	 * given the indices in input,
+	 * return the indices in parse tree
+	 */
+	void MapIndex_Parse_2_Input(short int mapped_begin, short int mapped_end, short int& begin, short int& end) {
+		if (index_map_parse_2_input == NULL) {
+			begin = mapped_begin;
+			end = mapped_end;
+			return;
+		}
+
+		begin = index_map_parse_2_input[mapped_begin];
+		end = index_map_parse_2_input[mapped_end];
+
+		assert(mapped_begin == 0 || index_map_parse_2_input[mapped_begin - 1] != index_map_parse_2_input[mapped_begin]);
+		assert(index_map_parse_2_input[mapped_end + 1] != index_map_parse_2_input[mapped_end]);
+	}
+
+	/*
+	 * given a index in input
+	 * return the number of its corresponding words in parse tree
+	 */
+	int MapIndexWordCount(int ix) {
+		if (index_map_input_2_parse == NULL)
+			return 1;
+		return index_map_input_2_parse[ix + 1] - index_map_input_2_parse[ix];
+	}
+
+private:
+
+	short int *index_map_input_2_parse;
+	short int *index_map_parse_2_input;
+};
+
+struct ConstReorderFeatureImpl{
+	ConstReorderFeatureImpl(const std::string& param) {
+
+		b_block_feature_ = false;
+		b_order_feature_ = false;
+		b_srl_block_feature_ = false;
+		b_srl_order_feature_ = false;
+
+
+		vector<string> terms;
+		SplitOnWhitespace(param, &terms);
+		if (terms.size() == 1) {
+			b_block_feature_ = true;
+			b_order_feature_ = true;
+		} else if (terms.size() >= 3) {
+			if (terms[1].compare("1") == 0)
+				b_block_feature_ = true;
+			if (terms[2].compare("1") == 0)
+				b_order_feature_ = true;
+			if (terms.size() == 6) {
+				if (terms[4].compare("1") == 0)
+					b_srl_block_feature_ = true;
+				if (terms[5].compare("1") == 0)
+					b_srl_order_feature_ = true;
+
+				assert(b_srl_block_feature_ || b_srl_order_feature_);
+			}
+
+		} else {
+			assert("ERROR");
+		}
+
+		const_reorder_classifier_left_ = NULL;
+		const_reorder_classifier_right_ = NULL;
+
+		srl_reorder_classifier_left_ = NULL;
+		srl_reorder_classifier_right_ = NULL;
+
+		if (b_order_feature_) {
+			InitializeClassifier((terms[0] + string(".left")).c_str(), &const_reorder_classifier_left_);
+			InitializeClassifier((terms[0] + string(".right")).c_str(), &const_reorder_classifier_right_);
+		}
+
+		if (b_srl_order_feature_) {
+			InitializeClassifier((terms[3] + string(".left")).c_str(), &srl_reorder_classifier_left_);
+			InitializeClassifier((terms[3] + string(".right")).c_str(), &srl_reorder_classifier_right_);
+		}
+
+		parsed_tree_ = NULL;
+		focused_consts_ = NULL;
+		index_map_ = NULL;
+
+		srl_sentence_ = NULL;
+		focused_srl_ = NULL;
+
+		map_left_ = NULL;
+		map_right_ = NULL;
+
+		map_srl_left_ = NULL;
+		map_srl_right_ = NULL;
+
+		dict_block_status_ = new Dict();
+		dict_block_status_->Convert("Unaligned", false);
+		dict_block_status_->Convert("Discon't", false);
+		dict_block_status_->Convert("Con't", false);
+	}
+	~ConstReorderFeatureImpl() {
+		if (const_reorder_classifier_left_)
+			delete const_reorder_classifier_left_;
+		if (const_reorder_classifier_right_)
+			delete const_reorder_classifier_right_;
+		if (srl_reorder_classifier_left_)
+			delete srl_reorder_classifier_left_;
+		if (srl_reorder_classifier_right_)
+			delete srl_reorder_classifier_right_;
+		FreeSentenceVariables();
+
+		delete dict_block_status_;
+	}
+
+	static int ReserveStateSize() {
+	    return 1 * sizeof(TargetTranslation*);
+	}
+
+	void InitializeInputSentence(const std::string& parse_file, const std::string& srl_file, const std::string& index_map_file) {
+		FreeSentenceVariables();
+		if (b_srl_block_feature_ || b_srl_order_feature_) {
+			assert(srl_file != "");
+			srl_sentence_ = ReadSRLSentence(srl_file);
+			parsed_tree_ = srl_sentence_->m_pTree;
+		} else {
+			assert(parse_file != "");
+			srl_sentence_ = NULL;
+			parsed_tree_ = ReadParseTree(parse_file);
+		}
+
+		if (b_block_feature_ || b_order_feature_) {
+			focused_consts_ = new FocusedConstituent(parsed_tree_);
+
+			if (b_order_feature_) {
+				//we can do the classifier "off-line"
+				map_left_ = new MapClassifier();
+				map_right_ = new MapClassifier();
+				InitializeConstReorderClassifierOutput();
+			}
+		}
+
+		if (b_srl_block_feature_ || b_srl_order_feature_) {
+			focused_srl_ = new FocusedSRL(srl_sentence_);
+
+			if (b_srl_order_feature_) {
+				map_srl_left_ = new MapClassifier();
+				map_srl_right_ = new MapClassifier();
+				InitializeSRLReorderClassifierOutput();
+			}
+		}
+
+		index_map_ = new SIndexMap(index_map_file);
+
+		if (parsed_tree_ != NULL) {
+			size_t i = parsed_tree_->m_vecTerminals.size();
+			vec_target_tran_.reserve(20 * i * i * i);
+		} else
+			vec_target_tran_.reserve(1000000);
+	}
+
+	void SetConstReorderFeature(const Hypergraph::Edge& edge, SparseVector<double>* features, const vector<const void*>& ant_states, void* state) {
+		if (parsed_tree_ == NULL) return;
+
+		short int mapped_begin, mapped_end;
+		index_map_->MapIndex_Input_2_Parse(edge.i_, edge.j_ - 1, mapped_begin, mapped_end);
+
+		typedef TargetTranslation* PtrTargetTranslation;
+		PtrTargetTranslation* remnant = reinterpret_cast<PtrTargetTranslation*>(state);
+
+		vector<const TargetTranslation*> vec_node;
+		vec_node.reserve(edge.tail_nodes_.size());
+		for (size_t i = 0; i < edge.tail_nodes_.size(); i++) {
+			const PtrTargetTranslation* astate = reinterpret_cast<const PtrTargetTranslation*>(ant_states[i]);
+			vec_node.push_back(astate[0]);
+		}
+
+		int e_num_word = edge.rule_->e_.size();
+		for (size_t i = 0; i < vec_node.size(); i++) {
+			e_num_word += vec_node[i]->e_num_words_;
+			e_num_word--;
+		}
+
+		remnant[0] = new TargetTranslation(mapped_begin, mapped_end, edge.i_, edge.j_ - 1, e_num_word);
+		vec_target_tran_.push_back(remnant[0]);
+
+		//reset the alignment
+		//for the source side, we know its position in source sentence
+		//for the target side, we always assume its starting position is 0
+		unsigned vc = 0;
+		const TRulePtr rule = edge.rule_;
+		std::vector<int> f_index(rule->f_.size());
+		int index = edge.i_;
+		for (unsigned i = 0; i < rule->f_.size(); i++) {
+			f_index[i] = index;
+			const WordID& c = rule->f_[i];
+			if (c < 1)
+				index = vec_node[vc++]->input_end_pos_ + 1;
+			else
+				index++;
+		}
+		assert(vc == vec_node.size());
+		assert(index == edge.j_);
+
+		std::vector<int> e_index(rule->e_.size());
+		index = 0;
+		vc = 0;
+		for (unsigned i = 0; i < rule->e_.size(); i++) {
+			e_index[i] = index;
+			const WordID& c = rule->e_[i];
+			if (c < 1) {
+			    index += vec_node[-c]->e_num_words_;
+			    vc++;
+			}
+			else
+			index++;
+		}
+		assert(vc == vec_node.size());
+
+		size_t nt_pos = 0;
+		for (size_t i = 0; i < edge.rule_->f_.size(); i++) {
+			if (edge.rule_->f_[i] > 0) continue;
+
+			//it's an NT
+			size_t j;
+			for (j = 0; j < edge.rule_->e_.size(); j++)
+				if (edge.rule_->e_[j] * -1 == nt_pos)
+					break;
+			assert(j != edge.rule_->e_.size());
+			nt_pos++;
+
+			//i aligns j
+	        int eindex = e_index[j];
+	        const vector<AlignmentPoint*>& align = vec_node[-1 * edge.rule_->e_[j]]->align_;
+	        for (size_t k = 0; k < align.size(); k++) {
+	        	remnant[0]->InsertAlignmentPoint(align[k]->s_, eindex + align[k]->t_);
+	        }
+		}
+		for (size_t i = 0; i < edge.rule_->a_.size(); i++) {
+			short int parse_index_begin, parse_index_end;
+			index_map_->MapIndex_Input_2_Parse(f_index[edge.rule_->a_[i].s_], parse_index_begin, parse_index_end);
+	        int findex = parse_index_begin;
+	        int eindex = e_index[edge.rule_->a_[i].t_];
+	        int word_count = index_map_->MapIndexWordCount(f_index[edge.rule_->a_[i].s_]);
+	        assert(word_count == parse_index_end - parse_index_begin + 1);
+	        for (int i = 0; i < word_count; i++)
+	        	remnant[0]->InsertAlignmentPoint(findex + i, eindex);
+		}
+
+
+	    //till now, we finished setting state values
+	    //next, use the state values to calculate constituent reorder feature
+	    SetConstReorderFeature(mapped_begin, mapped_end, features, remnant[0], vec_node, f_index);
+	}
+
+	void SetConstReorderFeature(short int mapped_begin, short int mapped_end, SparseVector<double>* features, const TargetTranslation* target_translation, const vector<const TargetTranslation*>& vec_node, std::vector<int>& findex) {
+		if (b_srl_block_feature_ || b_srl_order_feature_){
+			double logprob_srl_reorder_left = 0.0, logprob_srl_reorder_right = 0.0;
+			for (size_t i = 0; i < focused_srl_->focus_predicates_.size(); i++) {
+				const FocusedPredicate* pred = focused_srl_->focus_predicates_[i];
+				if (!is_overlap(mapped_begin, mapped_end, pred->begin_, pred->end_)) continue; //have no overlap between this predicate (with its argument) and the current edge
+
+				size_t j;
+				for (j = 0; j < vec_node.size(); j++) {
+					if (is_inside(pred->begin_, pred->end_, vec_node[j]->begin_pos_, vec_node[j]->end_pos_))
+						break;
+				}
+				if (j < vec_node.size()) continue;
+
+				vector<int> vecBlockStatus;
+				vecBlockStatus.reserve(pred->vec_items_.size());
+				for (j = 0; j < pred->vec_items_.size(); j++) {
+					const STreeItem *con1 = pred->vec_items_[j]->tree_item_;
+					if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {vecBlockStatus.push_back(0); continue;} //the node is partially outside the current edge
+
+					string type = target_translation->IsTargetConstinousSpan2(con1->m_iBegin, con1->m_iEnd);
+					vecBlockStatus.push_back(dict_block_status_->Convert(type, false));
+
+					if (!b_srl_block_feature_) continue;
+					//see if the node is covered by an NT
+					size_t k;
+					for (k = 0; k < vec_node.size(); k++) {
+						if (is_inside(con1->m_iBegin, con1->m_iEnd, vec_node[k]->begin_pos_, vec_node[k]->end_pos_))
+							break;
+					}
+					if (k < vec_node.size()) continue;
+					int f_id = FD::Convert(string(pred->vec_items_[j]->role_) + type);
+					if (f_id)
+						features->add_value(f_id, 1);
+				}
+
+				if (!b_srl_order_feature_) continue;
+
+				vector<int> vecPosition, vecRelativePosition;
+				vector<int> vecRightPosition, vecRelativeRightPosition;
+				vecPosition.reserve(pred->vec_items_.size());
+				vecRelativePosition.reserve(pred->vec_items_.size());
+				vecRightPosition.reserve(pred->vec_items_.size());
+				vecRelativeRightPosition.reserve(pred->vec_items_.size());
+				for (j = 0; j < pred->vec_items_.size(); j++) {
+					const STreeItem *con1 = pred->vec_items_[j]->tree_item_;
+					if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {vecPosition.push_back(-1);  vecRightPosition.push_back(-1);continue;} //the node is partially outside the current edge
+					int left1 = -1, right1 = -1;
+					target_translation->FindLeftRightMostTargetSpan(con1->m_iBegin, con1->m_iEnd, left1, right1);
+					vecPosition.push_back(left1);
+					vecRightPosition.push_back(right1);
+				}
+				fnGetRelativePosition(vecPosition, vecRelativePosition);
+				fnGetRelativePosition(vecRightPosition, vecRelativeRightPosition);
+
+				for (j = 1; j < pred->vec_items_.size(); j++) {
+					const STreeItem *con1 = pred->vec_items_[j - 1]->tree_item_;
+					const STreeItem *con2 = pred->vec_items_[j]->tree_item_;
+
+					if (con1->m_iBegin < mapped_begin || con2->m_iEnd > mapped_end) continue; //one of the two nodes is partially outside the current edge
+
+					//both con1 and con2 are covered, need to check if they are covered by the same NT
+					size_t k;
+					for (k = 0; k < vec_node.size(); k++) {
+						if (is_inside(con1->m_iBegin, con2->m_iEnd, vec_node[k]->begin_pos_, vec_node[k]->end_pos_))
+							break;
+					}
+					if (k < vec_node.size()) continue;
+
+					//they are not covered bye the same NT
+					string outcome;
+					string key;
+					GenerateKey(pred->vec_items_[j-1]->tree_item_, pred->vec_items_[j]->tree_item_, vecBlockStatus[j-1], vecBlockStatus[j], key);
+
+					fnGetOutcome(vecRelativePosition[j - 1], vecRelativePosition[j], outcome);
+					double prob = CalculateConstReorderProb(srl_reorder_classifier_left_, map_srl_left_, key, outcome);
+					//printf("%s %s %f\n", ostr.str().c_str(), outcome.c_str(), prob);
+					logprob_srl_reorder_left += log10(prob);
+
+
+					fnGetOutcome(vecRelativeRightPosition[j - 1], vecRelativeRightPosition[j], outcome);
+					prob = CalculateConstReorderProb(srl_reorder_classifier_right_, map_srl_right_, key, outcome);
+					logprob_srl_reorder_right += log10(prob);
+				}
+			}
+
+			if (b_srl_order_feature_) {
+				int f_id = FD::Convert("SRLReorderFeatureLeft");
+				if (f_id && logprob_srl_reorder_left != 0.0)
+					features->set_value(f_id, logprob_srl_reorder_left);
+				f_id = FD::Convert("SRLReorderFeatureRight");
+				if (f_id && logprob_srl_reorder_right != 0.0)
+					features->set_value(f_id, logprob_srl_reorder_right);
+			}
+		}
+
+		if (b_block_feature_ || b_order_feature_){
+			double logprob_const_reorder_left = 0.0, logprob_const_reorder_right = 0.0;
+
+			for (size_t i = 0; i < focused_consts_->focus_parents_.size(); i++) {
+				STreeItem* parent = focused_consts_->focus_parents_[i];
+				if (!is_overlap(mapped_begin, mapped_end, parent->m_iBegin, parent->m_iEnd)) continue; //have no overlap between this parent node and the current edge
+
+				size_t j;
+				for (j = 0; j < vec_node.size(); j++) {
+					if (is_inside(parent->m_iBegin, parent->m_iEnd, vec_node[j]->begin_pos_, vec_node[j]->end_pos_))
+						break;
+				}
+				if (j < vec_node.size()) continue;
+
+
+				if (b_block_feature_) {
+					if (parent->m_iBegin >= mapped_begin && parent->m_iEnd <= mapped_end) {
+						string type = target_translation->IsTargetConstinousSpan2(parent->m_iBegin, parent->m_iEnd);
+						int f_id = FD::Convert(string(parent->m_pszTerm) + type);
+						if (f_id)
+							features->add_value(f_id, 1);
+					}
+				}
+
+				if (parent->m_vecChildren.size() == 1 || !b_order_feature_) continue;
+
+				vector<int> vecChunkBlock;
+				vecChunkBlock.reserve(parent->m_vecChildren.size());
+
+				for (j = 0; j < parent->m_vecChildren.size(); j++) {
+					STreeItem *con1 = parent->m_vecChildren[j];
+					if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {vecChunkBlock.push_back(0); continue;} //the node is partially outside the current edge
+
+					string type = target_translation->IsTargetConstinousSpan2(con1->m_iBegin, con1->m_iEnd);
+					vecChunkBlock.push_back(dict_block_status_->Convert(type, false));
+
+					/*if (!b_block_feature_) continue;
+					//see if the node is covered by an NT
+					size_t k;
+					for (k = 0; k < vec_node.size(); k++) {
+						if (is_inside(con1->m_iBegin, con1->m_iEnd, vec_node[k]->begin_pos_, vec_node[k]->end_pos_))
+							break;
+					}
+					if (k < vec_node.size()) continue;
+					int f_id = FD::Convert(string(con1->m_pszTerm) + type);
+					if (f_id)
+						features->add_value(f_id, 1);*/
+				}
+
+				if (!b_order_feature_) continue;
+
+				vector<int> vecPosition, vecRelativePosition;
+				vector<int> vecRightPosition, vecRelativeRightPosition;
+				vecPosition.reserve(parent->m_vecChildren.size());
+				vecRelativePosition.reserve(parent->m_vecChildren.size());
+				vecRightPosition.reserve(parent->m_vecChildren.size());
+				vecRelativeRightPosition.reserve(parent->m_vecChildren.size());
+				for (j = 0; j < parent->m_vecChildren.size(); j++) {
+					STreeItem *con1 = parent->m_vecChildren[j];
+					if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {vecPosition.push_back(-1);  vecRightPosition.push_back(-1);continue;} //the node is partially outside the current edge
+					int left1 = -1, right1 = -1;
+					target_translation->FindLeftRightMostTargetSpan(con1->m_iBegin, con1->m_iEnd, left1, right1);
+					vecPosition.push_back(left1);
+					vecRightPosition.push_back(right1);
+				}
+				fnGetRelativePosition(vecPosition, vecRelativePosition);
+				fnGetRelativePosition(vecRightPosition, vecRelativeRightPosition);
+
+				for (j = 1; j < parent->m_vecChildren.size(); j++) {
+					STreeItem *con1 = parent->m_vecChildren[j - 1];
+					STreeItem *con2 = parent->m_vecChildren[j];
+
+					if (con1->m_iBegin < mapped_begin || con2->m_iEnd > mapped_end) continue; //one of the two nodes is partially outside the current edge
+
+					//both con1 and con2 are covered, need to check if they are covered by the same NT
+					size_t k;
+					for (k = 0; k < vec_node.size(); k++) {
+						if (is_inside(con1->m_iBegin, con2->m_iEnd, vec_node[k]->begin_pos_, vec_node[k]->end_pos_))
+							break;
+					}
+					if (k < vec_node.size()) continue;
+
+					//they are not covered bye the same NT
+					string outcome;
+					string key;
+					GenerateKey(parent->m_vecChildren[j-1], parent->m_vecChildren[j], vecChunkBlock[j-1], vecChunkBlock[j], key);
+
+					fnGetOutcome(vecRelativePosition[j - 1], vecRelativePosition[j], outcome);
+					double prob = CalculateConstReorderProb(const_reorder_classifier_left_, map_left_, key, outcome);
+					//printf("%s %s %f\n", ostr.str().c_str(), outcome.c_str(), prob);
+					logprob_const_reorder_left += log10(prob);
+
+
+					fnGetOutcome(vecRelativeRightPosition[j - 1], vecRelativeRightPosition[j], outcome);
+					prob = CalculateConstReorderProb(const_reorder_classifier_right_, map_right_, key, outcome);
+					logprob_const_reorder_right += log10(prob);
+				}
+			}
+
+			if (b_order_feature_) {
+				int f_id = FD::Convert("ConstReorderFeatureLeft");
+				if (f_id && logprob_const_reorder_left != 0.0)
+					features->set_value(f_id, logprob_const_reorder_left);
+				f_id = FD::Convert("ConstReorderFeatureRight");
+				if (f_id && logprob_const_reorder_right != 0.0)
+					features->set_value(f_id, logprob_const_reorder_right);
+			}
+		}
+
+	}
+
+
+private:
+	void Byte_to_Char(unsigned char *str, int n) {
+		str[0] = (n & 255);
+		str[1] = n / 256;
+	}
+	void GenerateKey(const STreeItem *pCon1, const STreeItem *pCon2, int iBlockStatus1, int iBlockStatus2, string& key) {
+		assert(iBlockStatus1 != 0);
+		assert(iBlockStatus2 != 0);
+		unsigned char szTerm[ 1001 ];
+		Byte_to_Char(szTerm, pCon1->m_iBegin);
+		Byte_to_Char(szTerm + 2, pCon2->m_iEnd);
+		szTerm[4] = (char)iBlockStatus1;
+		szTerm[5] = (char)iBlockStatus2;
+		szTerm[6] = '\0';
+		//sprintf(szTerm, "%d|%d|%d|%d|%s|%s", pCon1->m_iBegin, pCon1->m_iEnd, pCon2->m_iBegin, pCon2->m_iEnd, strBlockStatus1.c_str(), strBlockStatus2.c_str());
+		key = string(szTerm, szTerm + 6);
+
+	}
+	void InitializeConstReorderClassifierOutput( ) {
+		if (!b_order_feature_) return;
+		int size_block_status = dict_block_status_->max();
+
+		for (size_t i = 0; i < focused_consts_->focus_parents_.size(); i++) {
+			STreeItem* parent = focused_consts_->focus_parents_[i];
+
+			for (size_t j = 1; j < parent->m_vecChildren.size(); j++) {
+				for (size_t k = 1; k <= size_block_status; k++) {
+					for (size_t l = 1; l <= size_block_status; l++) {
+						ostringstream ostr;
+						GenerateFeature(parsed_tree_, parent, j, dict_block_status_->Convert(k), dict_block_status_->Convert(l), ostr);
+
+						string strKey;
+						GenerateKey(parent->m_vecChildren[j-1], parent->m_vecChildren[j], k, l, strKey);
+
+						vector<double> vecOutput;
+						const_reorder_classifier_left_->fnEval(ostr.str().c_str(), vecOutput);
+						(*map_left_)[strKey] = vecOutput;
+
+						const_reorder_classifier_right_->fnEval(ostr.str().c_str(), vecOutput);
+						(*map_right_)[strKey] = vecOutput;
+					}
+				}
+			}
+		}
+
+	}
+
+	void InitializeSRLReorderClassifierOutput() {
+		if (!b_srl_order_feature_) return;
+		int size_block_status = dict_block_status_->max();
+
+		for (size_t i = 0; i < focused_srl_->focus_predicates_.size(); i++) {
+			const FocusedPredicate *pred = focused_srl_->focus_predicates_[i];
+
+			for (size_t j = 1; j < pred->vec_items_.size(); j++) {
+				for (size_t k = 1; k <= size_block_status; k++) {
+					for (size_t l = 1; l <= size_block_status; l++) {
+						ostringstream ostr;
+
+						SArgumentReorderModel::fnGenerateFeature(parsed_tree_, pred->pred_, pred, j, dict_block_status_->Convert(k), dict_block_status_->Convert(l), ostr);
+
+						string strKey;
+						GenerateKey(pred->vec_items_[j - 1]->tree_item_, pred->vec_items_[j]->tree_item_, k, l, strKey);
+
+						vector<double> vecOutput;
+						srl_reorder_classifier_left_->fnEval(ostr.str().c_str(), vecOutput);
+						(*map_srl_left_)[strKey] = vecOutput;
+
+						srl_reorder_classifier_right_->fnEval(ostr.str().c_str(), vecOutput);
+						(*map_srl_right_)[strKey] = vecOutput;
+					}
+				}
+			}
+		}
+	}
+
+	double CalculateConstReorderProb(const Tsuruoka_Maxent *const_reorder_classifier, const MapClassifier *map, const string& key, const string& outcome) {
+		MapClassifier::const_iterator iter = (*map).find(key);
+		assert(iter != map->end());
+		int id = const_reorder_classifier->fnGetClassId(outcome);
+		return iter->second[id];
+	}
+
+	void FreeSentenceVariables( ) {
+		if (srl_sentence_ != NULL) {
+			delete srl_sentence_;
+			srl_sentence_ = NULL;
+		}
+		else {
+			if (parsed_tree_ != NULL)
+				delete parsed_tree_;
+			parsed_tree_ = NULL;
+		}
+
+		if (focused_consts_ != NULL)
+			delete focused_consts_;
+		focused_consts_ = NULL;
+
+		for (size_t i = 0; i < vec_target_tran_.size(); i++)
+			delete vec_target_tran_[i];
+		vec_target_tran_.clear();
+
+		if (index_map_ != NULL)
+			delete index_map_;
+		index_map_ = NULL;
+
+		if (map_left_ != NULL)
+			delete map_left_;
+		map_left_ = NULL;
+		if (map_right_ != NULL)
+			delete map_right_;
+		map_right_ = NULL;
+
+		if (map_srl_left_ != NULL)
+			delete map_srl_left_;
+		map_srl_left_ = NULL;
+		if (map_srl_right_ != NULL)
+			delete map_srl_right_;
+		map_srl_right_ = NULL;
+	}
+
+	void InitializeClassifier(const char* pszFname, Tsuruoka_Maxent **ppClassifier) {
+		(*ppClassifier) = new Tsuruoka_Maxent(pszFname);
+	}
+
+	void GenerateOutcome(const vector<int>& vecPos, vector<string>& vecOutcome) {
+		vecOutcome.clear();
+
+		for (size_t i = 1; i < vecPos.size(); i++) {
+			if (vecPos[i] == -1 || vecPos[i] == vecPos[i - 1]) {
+				vecOutcome.push_back("M"); //monotone
+				continue;
+			}
+
+
+			if (vecPos[i - 1] == -1) {
+				//vecPos[i] is not -1
+				size_t j = i - 2;
+				while (j > -1 && vecPos[j] == -1)
+					j--;
+
+				size_t k;
+				for (k = 0; k < j; k++) {
+					if (vecPos[k] > vecPos[j] || vecPos[k] <= vecPos[i])
+						break;
+				}
+				if (k < j) {
+					vecOutcome.push_back("DM");
+					continue;
+				}
+
+				for (k = i + 1; k < vecPos.size(); k++)
+					if (vecPos[k] < vecPos[i] && (j == -1 && vecPos[k] >= vecPos[j]))
+						break;
+				if (k < vecPos.size()) {
+					vecOutcome.push_back("DM");
+					continue;
+				}
+				vecOutcome.push_back("M");
+			} else {
+				//neither of vecPos[i-1] and vecPos[i] is -1
+				if (vecPos[i - 1] < vecPos[i]) {
+					//monotone or discon't monotone
+					size_t j;
+					for (j = 0; j < i - 1; j++)
+						if (vecPos[j] > vecPos[i - 1] && vecPos[j] <= vecPos[i])
+							break;
+					if (j < i - 1) {
+						vecOutcome.push_back("DM");
+						continue;
+					}
+					for (j = i + 1; j < vecPos.size(); j++)
+						if (vecPos[j] >= vecPos[i - 1] && vecPos[j] < vecPos[i])
+							break;
+					if (j < vecPos.size()) {
+						vecOutcome.push_back("DM");
+						continue;
+					}
+					vecOutcome.push_back("M");
+				} else {
+					//swap or discon't swap
+					size_t j;
+					for (j = 0; j < i - 1; j++)
+						if (vecPos[j] > vecPos[i] && vecPos[j] <= vecPos[i - 1])
+							break;
+					if (j < i - 1) {
+						vecOutcome.push_back("DS");
+						continue;
+					}
+					for (j = i + 1; j < vecPos.size(); j++)
+						if ( vecPos[j] >= vecPos[i] && vecPos[j] < vecPos[i - 1])
+							break;
+					if (j < vecPos.size()) {
+						vecOutcome.push_back("DS");
+						continue;
+					}
+					vecOutcome.push_back("S");
+				}
+			}
+		}
+
+		assert(vecOutcome.size() == vecPos.size() - 1);
+	}
+
+	void fnGetRelativePosition(const vector<int>& vecLeft, vector<int>& vecPosition) {
+		vecPosition.clear();
+
+		vector<float> vec;
+		vec.reserve(vecLeft.size());
+		for (size_t i = 0; i < vecLeft.size(); i++) {
+			if (vecLeft[i] == -1) {
+				if (i == 0)
+					vec.push_back(-1);
+				else
+					vec.push_back(vecLeft[i-1] + 0.1);
+			} else
+				vec.push_back(vecLeft[i]);
+		}
+
+		for (size_t i = 0; i < vecLeft.size(); i++) {
+			int count = 0;
+
+			for (size_t j = 0; j < vecLeft.size(); j++) {
+				if ( j == i) continue;
+				if (vec[j] < vec[i]) {
+					count++;
+				} else if (vec[j] == vec[i] && j < i) {
+					count++;
+				}
+			}
+			vecPosition.push_back(count);
+		}
+
+		for (size_t i = 1; i < vecPosition.size(); i++) {
+			if (vecPosition[i - 1] == vecPosition[i]) {
+				for (size_t j = 0; j < vecLeft.size(); j++)
+					cout << vecLeft[j] << " ";
+				cout << "\n";
+				assert(false);
+			}
+		}
+	}
+
+	inline void fnGetOutcome(int i1, int i2, string& strOutcome) {
+		assert(i1 != i2);
+		if (i1 < i2) {
+			if (i2 > i1 + 1) strOutcome = string("DM");
+			else strOutcome = string("M");
+		} else {
+			if (i1 > i2 + 1) strOutcome = string("DS");
+			else strOutcome = string("S");
+		}
+	}
+
+	//features in constituent_reorder_model.cc
+	void GenerateFeature(const SParsedTree *pTree, const STreeItem *pParent, int iPos, const string& strBlockStatus1, const string& strBlockStatus2, ostringstream& ostr) {
+		STreeItem *pCon1, *pCon2;
+		pCon1 = pParent->m_vecChildren[iPos - 1];
+		pCon2 = pParent->m_vecChildren[iPos];
+
+		string left_label = string(pCon1->m_pszTerm);
+		string right_label = string(pCon2->m_pszTerm);
+		string parent_label = string(pParent->m_pszTerm);
+
+		vector<string> vec_other_right_sibling;
+		for (int i = iPos + 1; i < pParent->m_vecChildren.size(); i++)
+			vec_other_right_sibling.push_back(string(pParent->m_vecChildren[i]->m_pszTerm));
+		if (vec_other_right_sibling.size() == 0)
+			vec_other_right_sibling.push_back(string("NULL"));
+		vector<string> vec_other_left_sibling;
+		for (int i = 0; i < iPos - 1; i++)
+			vec_other_left_sibling.push_back(string(pParent->m_vecChildren[i]->m_pszTerm));
+		if (vec_other_left_sibling.size() == 0)
+			vec_other_left_sibling.push_back(string("NULL"));
+
+		//generate features
+		//f1
+		ostr << "f1=" << left_label << "_" << right_label << "_" << parent_label;
+		//f2
+		for (int i = 0; i < vec_other_right_sibling.size(); i++)
+			ostr << " f2=" << left_label << "_" << right_label << "_" << parent_label << "_" << vec_other_right_sibling[i];
+		//f3
+		for (int i = 0; i < vec_other_left_sibling.size(); i++)
+			ostr << " f3=" << left_label << "_" << right_label << "_" << parent_label << "_" << vec_other_left_sibling[i];
+		//f4
+		ostr << " f4=" << left_label << "_" << right_label << "_" << pTree->m_vecTerminals[pCon1->m_iHeadWord]->m_ptParent->m_pszTerm;
+		//f5
+		ostr << " f5=" << left_label << "_" << right_label << "_" << pTree->m_vecTerminals[pCon1->m_iHeadWord]->m_pszTerm;
+		//f6
+		ostr << " f6=" << left_label << "_" << right_label << "_" << pTree->m_vecTerminals[pCon2->m_iHeadWord]->m_ptParent->m_pszTerm;
+		//f7
+		ostr << " f7=" << left_label << "_" << right_label << "_" << pTree->m_vecTerminals[pCon2->m_iHeadWord]->m_pszTerm;
+		//f8
+		ostr << " f8=" << left_label << "_" << right_label << "_" << strBlockStatus1;
+		//f9
+		ostr << " f9=" << left_label << "_" << right_label << "_" << strBlockStatus2;
+
+		//f10
+		ostr << " f10=" << left_label << "_" << parent_label;
+		//f11
+		ostr << " f11=" << right_label << "_" << parent_label;
+	}
+
+	SParsedTree* ReadParseTree(const std::string& parse_file) {
+                SParseReader *reader = new SParseReader(parse_file.c_str(), false);
+                SParsedTree *tree = reader->fnReadNextParseTree();
+                //assert(tree != NULL);
+                delete reader;
+                return tree;
+    }
+
+	SSrlSentence* ReadSRLSentence(const std::string& srl_file) {
+                SSrlSentenceReader *reader = new SSrlSentenceReader(srl_file.c_str());
+                SSrlSentence *srl = reader->fnReadNextSrlSentence();
+                //assert(srl != NULL);
+                delete reader;
+                return srl;
+    }
+
+private:
+	Tsuruoka_Maxent *const_reorder_classifier_left_;
+	Tsuruoka_Maxent *const_reorder_classifier_right_;
+
+	Tsuruoka_Maxent *srl_reorder_classifier_left_;
+	Tsuruoka_Maxent *srl_reorder_classifier_right_;
+
+	MapClassifier *map_left_;
+	MapClassifier *map_right_;
+
+    MapClassifier *map_srl_left_;
+    MapClassifier *map_srl_right_;
+
+	SParsedTree *parsed_tree_;
+	FocusedConstituent *focused_consts_;
+	vector<TargetTranslation*> vec_target_tran_;
+
+	bool b_order_feature_;
+	bool b_block_feature_;
+
+	bool b_srl_block_feature_;
+	bool b_srl_order_feature_;
+	SSrlSentence *srl_sentence_;
+	FocusedSRL *focused_srl_;
+
+	SIndexMap *index_map_;
+
+	Dict *dict_block_status_;
+};
+
+ConstReorderFeature::ConstReorderFeature(const std::string& param) {
+	pimpl_ = new ConstReorderFeatureImpl(param);
+	SetStateSize(ConstReorderFeatureImpl::ReserveStateSize());
+	name_ = "ConstReorderFeature";
+}
+
+ConstReorderFeature::~ConstReorderFeature( ) { //TODO
+	delete pimpl_;
+}
+
+void ConstReorderFeature::PrepareForInput(const SentenceMetadata& smeta) {
+	string parse_file = smeta.GetSGMLValue("parse");
+	string srl_file = smeta.GetSGMLValue("srl");
+	assert(!(parse_file == "" && srl_file == ""));
+
+	string indexmap_file = smeta.GetSGMLValue("index-map");
+	pimpl_->InitializeInputSentence(parse_file, srl_file, indexmap_file);
+}
+
+void ConstReorderFeature::TraversalFeaturesImpl(const SentenceMetadata& /* smeta */,
+                                          const Hypergraph::Edge& edge,
+                                          const vector<const void*>& ant_states,
+                                          SparseVector<double>* features,
+                                          SparseVector<double>* estimated_features,
+                                          void* state) const {
+	pimpl_->SetConstReorderFeature(edge, features, ant_states, state);
+}
+
+string ConstReorderFeature::usage(bool /*param*/,bool /*verbose*/) {
+  return "ConstReorderFeature";
+}
+
+boost::shared_ptr<FeatureFunction> CreateConstReorderModel(const std::string &param) {
+	ConstReorderFeature *ret = new ConstReorderFeature(param);
+	return boost::shared_ptr<FeatureFunction>(ret);
+}
+
+boost::shared_ptr<FeatureFunction> ConstReorderFeatureFactory::Create(std::string param) const {
+	return CreateConstReorderModel(param);
+}
+
+std::string  ConstReorderFeatureFactory::usage(bool params,bool verbose) const {
+  return ConstReorderFeature::usage(params, verbose);
+}
diff --git a/decoder/ff_const_reorder.h b/decoder/ff_const_reorder.h
new file mode 100644
index 00000000..1aed2584
--- /dev/null
+++ b/decoder/ff_const_reorder.h
@@ -0,0 +1,41 @@
+/*
+ * ff_const_reorder.h
+ *
+ *  Created on: Jul 11, 2013
+ *      Author: junhuili
+ */
+
+#ifndef FF_CONST_REORDER_H_
+#define FF_CONST_REORDER_H_
+
+#include "ff_factory.h"
+#include "ff.h"
+
+struct ConstReorderFeatureImpl;
+
+class ConstReorderFeature : public FeatureFunction {
+ public:
+  // param = "filename n"
+	ConstReorderFeature(const std::string& param);
+    ~ConstReorderFeature();
+    static std::string usage(bool param,bool verbose);
+ protected:
+  virtual void PrepareForInput(const SentenceMetadata& smeta);
+
+  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+                                     const HG::Edge& edge,
+                                     const std::vector<const void*>& ant_contexts,
+                                     SparseVector<double>* features,
+                                     SparseVector<double>* estimated_features,
+                                     void* out_context) const;
+ private:
+  ConstReorderFeatureImpl* pimpl_;
+};
+
+
+struct ConstReorderFeatureFactory : public FactoryBase<FeatureFunction> {
+  FP Create(std::string param) const;
+  std::string usage(bool params,bool verbose) const;
+};
+
+#endif /* FF_CONST_REORDER_H_ */
diff --git a/decoder/ffset.cc b/decoder/ffset.cc
index 5820f421..488346bc 100644
--- a/decoder/ffset.cc
+++ b/decoder/ffset.cc
@@ -15,6 +15,26 @@ ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>
     model_state_pos_[i] = state_size_;
     state_size_ += models_[i]->StateSize();
   }
+
+  //added by lijunhui
+  //erase the states for SRLReorderFeature and DepPrnFeatures
+  for (int i = 0; i < models_.size(); i++) {
+    if (models_[i]->name_ == string("SRLReorderFeature")
+      || models_[i]->name_ == string("DepPrnFeatures")
+      || models_[i]->name_ == string("SyntacticContextFeature")
+      || models_[i]->name_ == string("ArgtReorderFeature")
+      || models_[i]->name_ == string("ConstReorderSparseFeature")
+      || models_[i]->name_ == string("ConstReorderFeature")) {
+      int start_pos = model_state_pos_[i];
+      int end_pos;
+      if (i == models_.size() - 1)
+        end_pos = state_size_;
+      else
+        end_pos = model_state_pos_[i + 1];
+      erase_state_start_pos_.push_back(start_pos);
+      erase_state_end_pos_.push_back(end_pos);
+    }
+  }
 }
 
 void ModelSet::PrepareForInput(const SentenceMetadata& smeta) {
@@ -70,3 +90,32 @@ void ModelSet::AddFinalFeatures(const FFState& state, HG::Edge* edge,SentenceMet
   edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));
 }
 
+bool ModelSet::HaveEraseState() const {
+       if (erase_state_start_pos_.size() == 0) return false;
+       return true;
+}
+
+void ModelSet::GetRealFFState(const FFState& state, FFState& real_state) const {
+       real_state.resize(state.size());
+       for (int i = 0; i < state.size(); i++) {
+               real_state[i] = state[i];
+       }
+
+       if (state.size() == 0)
+               return;
+       assert(state.size() == state_size_);
+
+       //erase the states for SRLReorderFeature and DepPrnFeatures and SyntacticContextFeature
+       for (int i = 0; i < erase_state_start_pos_.size(); i++){
+               int start_pos = erase_state_start_pos_[i];
+               int end_pos = erase_state_end_pos_[i];
+               for (int j = start_pos; j < end_pos; j++)
+                       real_state[j] = 0;
+       }
+}
+
+FFState ModelSet::GetRealFFState(const FFState& state) const {
+       FFState real_state;
+       GetRealFFState(state, real_state);
+       return real_state;
+}
diff --git a/decoder/ffset.h b/decoder/ffset.h
index 28aef667..0467b01d 100644
--- a/decoder/ffset.h
+++ b/decoder/ffset.h
@@ -47,6 +47,17 @@ class ModelSet {
 
   bool stateless() const { return !state_size_; }
 
+  //added by ljh
+  //some states of features are not contextual, but used for storing some useful information for calculate feature val
+  //it needs to erase these states
+  //this function is only called by IncorporateIntoPlusLMForest(...) in apply_models.cc
+  void GetRealFFState(const FFState& state, FFState& real_state) const;
+  FFState GetRealFFState(const FFState& state) const;
+  bool HaveEraseState() const;
+ private:
+  std::vector<int> erase_state_start_pos_;
+  std::vector<int> erase_state_end_pos_;
+
  private:
   std::vector<const FeatureFunction*> models_;
   const std::vector<double>& weights_;
author	Wu, Ke <wuke@cs.umd.edu>	2014-10-07 18:05:58 -0400
committer	Wu, Ke <wuke@cs.umd.edu>	2014-10-07 18:05:58 -0400
commit	0900cac418f7e46889336d137e6ba1bb84651544 (patch)
tree	12738df89e6ca5f495229a195760827f92c5fdee
parent	ebeda1e75c77dd9044f1b9902770896e3009ae55 (diff)