diff options
Diffstat (limited to 'decoder')
| -rw-r--r-- | decoder/Makefile.am | 4 | ||||
| -rw-r--r-- | decoder/apply_models.cc | 37 | ||||
| -rw-r--r-- | decoder/cdec_ff.cc | 3 | ||||
| -rw-r--r-- | decoder/ff_const_reorder.cc | 1318 | ||||
| -rw-r--r-- | decoder/ff_const_reorder.h | 41 | ||||
| -rw-r--r-- | decoder/ffset.cc | 49 | ||||
| -rw-r--r-- | decoder/ffset.h | 11 | 
7 files changed, 1457 insertions, 6 deletions
diff --git a/decoder/Makefile.am b/decoder/Makefile.am index e46a7120..6577cc12 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -21,7 +21,7 @@ trule_test_LDADD = $(BOOST_UNIT_TEST_FRAMEWORK_LDFLAGS) $(BOOST_UNIT_TEST_FRAMEW  cdec_SOURCES = cdec.cc  cdec_LDFLAGS= -rdynamic $(STATIC_FLAGS) -cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a +cdec_LDADD = libcdec.a ../mteval/libmteval.a ../utils/libutils.a ../utils/synutils/maxent-3.0/libtsuruoka_maxent.a ../klm/search/libksearch.a ../klm/lm/libklm.a ../klm/util/libklm_util.a ../klm/util/double-conversion/libklm_util_double.a  AM_CPPFLAGS = -DTEST_DATA=\"$(top_srcdir)/decoder/test_data\" -DBOOST_TEST_DYN_LINK -W -Wno-sign-compare -I$(top_srcdir) -I$(top_srcdir)/mteval -I$(top_srcdir)/utils -I$(top_srcdir)/klm @@ -45,6 +45,7 @@ libcdec_a_SOURCES = \    ff_basic.h \    ff_bleu.h \    ff_charset.h \ +  ff_const_reorder.h \    ff_context.h \    ff_csplit.h \    ff_external.h \ @@ -111,6 +112,7 @@ libcdec_a_SOURCES = \    ff_bleu.cc \    ff_charset.cc \    ff_context.cc \ +  ff_const_reorder.cc \    ff_csplit.cc \    ff_external.cc \    ff_factory.cc \ diff --git a/decoder/apply_models.cc b/decoder/apply_models.cc index 9f8bbead..3f3f6a79 100644 --- a/decoder/apply_models.cc +++ b/decoder/apply_models.cc @@ -233,7 +233,25 @@ public:    void IncorporateIntoPlusLMForest(size_t head_node_hash, Candidate* item, State2Node* s2n, CandidateList* freelist) {      Hypergraph::Edge* new_edge = out.AddEdge(item->out_edge_);      new_edge->edge_prob_ = item->out_edge_.edge_prob_; -    Candidate*& o_item = (*s2n)[item->state_]; +    //start: new code by lijunhui +    FFState real_state; +    FFState* real_state_ref; +    if (models.HaveEraseState()) { +       models.GetRealFFState(item->state_, real_state); +       real_state_ref = &real_state; +    } +    else +       real_state_ref = &(item->state_); +    Candidate*& o_item = (*s2n)[(*real_state_ref)]; +    /*FFState real_state; +    models.GetRealFFState(item->state_, real_state); +    Candidate*& o_item = (*s2n)[real_state];*/ +    //end: new code by lijunhui + +    //start: original code +    //Candidate*& o_item = (*s2n)[item->state_]; +    //end: original code +      if (!o_item) o_item = item;      int& node_id = o_item->node_index_; @@ -254,7 +272,19 @@ public:      // score is the same for all items with a common residual DP      // state      if (item->vit_prob_ > o_item->vit_prob_) { -      assert(o_item->state_ == item->state_);    // sanity check! +      //start: new code by lijunhui +      if (models.HaveEraseState()) { +        assert(models.GetRealFFState(o_item->state_) == models.GetRealFFState(item->state_));    // sanity check! +        node_states_[o_item->node_index_] = item->state_; +      } else { +        assert(o_item->state_ == item->state_);    // sanity check! +      } +      //end: new code by lijunhui + +      //start: original code +      //assert(o_item->state_ == item->state_);    // sanity check! +      //end: original code +        o_item->est_prob_ = item->est_prob_;        o_item->vit_prob_ = item->vit_prob_;      } @@ -599,7 +629,7 @@ void ApplyModelSet(const Hypergraph& in,    if (models.stateless() || config.algorithm == IntersectionConfiguration::FULL) {      NoPruningRescorer ma(models, smeta, in, out); // avoid overhead of best-first when no state      ma.Apply(); -  } else if (config.algorithm == IntersectionConfiguration::CUBE  +  } else if (config.algorithm == IntersectionConfiguration::CUBE               || config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING               || config.algorithm == IntersectionConfiguration::FAST_CUBE_PRUNING_2) {      int pl = config.pop_limit; @@ -628,4 +658,3 @@ void ApplyModelSet(const Hypergraph& in,    out->is_linear_chain_ = in.is_linear_chain_;  // TODO remove when this is computed                                                  // automatically  } - diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 7f7e075b..e7a8bbc1 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -3,6 +3,7 @@  #include "ff.h"  #include "ff_basic.h"  #include "ff_context.h" +#include "ff_const_reorder.h"  #include "ff_spans.h"  #include "ff_lm.h"  #include "ff_klm.h" @@ -77,6 +78,6 @@ void register_feature_functions() {    ff_registry.Register("WordPairFeatures", new FFFactory<WordPairFeatures>);    ff_registry.Register("SourcePathFeatures", new FFFactory<SourcePathFeatures>);    ff_registry.Register("WordSet", new FFFactory<WordSet>); +  ff_registry.Register("ConstReorderFeature", new ConstReorderFeatureFactory());    ff_registry.Register("External", new FFFactory<ExternalFeature>);  } - diff --git a/decoder/ff_const_reorder.cc b/decoder/ff_const_reorder.cc new file mode 100644 index 00000000..77582b7c --- /dev/null +++ b/decoder/ff_const_reorder.cc @@ -0,0 +1,1318 @@ +#include "ff_const_reorder.h" + +#include "stringlib.h" +#include "hg.h" +#include "sentence_metadata.h" +#include "synutils/tree.h" +#include "synutils/srl_sentence.h" +#include "synutils/tsuruoka_maxent.h" +#include "hash.h" +#include "synutils/argument_reorder_model.h" + +#include <string> +#include <vector> +#include <stdio.h> + +using namespace std; + +typedef HASH_MAP<std::string, vector<double> > MapClassifier; + +struct SBitArray{ +	SBitArray(int size) : +	size_(size){ +		int bit_size = size / 8; +		if (size % 8 > 0) +			bit_size++; + +		char_ = new unsigned char[bit_size]; +		memset(char_, 0, bit_size); +	} +	~SBitArray() { +		delete [] char_; +	} + +	int Get(int index) const { +		int i; + +		i = index; +		if (i < 0) +			i = size_ + i; +		assert(i > -1 && i < size_); + +		int byte_index, bit_index; +		byte_index = i/8; +		bit_index = i%8; +		unsigned char res; +		if (bit_index == 0) +			res = char_[byte_index] & 0x01; +		else if (bit_index == 1) +			res = char_[byte_index] & 0x02; +		else if (bit_index == 2) +			res = char_[byte_index] & 0x04; +		else if (bit_index == 3) +			res = char_[byte_index] & 0x08; +		else if (bit_index == 4) +			res = char_[byte_index] & 0x10; +		else if (bit_index == 5) +			res = char_[byte_index] & 0x20; +		else if (bit_index == 6) +			res = char_[byte_index] & 0x40; +		else if (bit_index == 7) +			res = char_[byte_index] & 0x80; +		else +			assert(false); +		if (res != 0) +			return 1; +		else +			return 0; +	} + +	void Set(int index, int val) { +		assert(val == 0 || val == 1); +		int i; + +		i = index; +		if (i < 0) +			i = size_ + i; +		assert(i > -1 && i < size_); + +		int byte_index, bit_index; +		byte_index = i/8; +		bit_index = i%8; +		unsigned char res; + +		if (bit_index == 0) { +			if (val == 0) +				res = char_[byte_index] & 0xFE; +			else +				res = char_[byte_index] | 0x01; +		} else if (bit_index == 1) { +			if (val == 0) +				res = char_[byte_index] & 0xFD; +			else +				res = char_[byte_index] | 0x02; +		} else if (bit_index == 2) { +			if (val == 0) +				res = char_[byte_index] & 0xFB; +			else +				res = char_[byte_index] | 0x04; +		} else if (bit_index == 3) { +			if (val == 0) +				res = char_[byte_index] & 0xF7; +			else +				res = char_[byte_index] | 0x08; +		} else if (bit_index == 4) { +			if (val == 0) +				res = char_[byte_index] & 0xEF; +			else +				res = char_[byte_index] | 0x10; +		} else if (bit_index == 5) { +			if (val == 0) +				res = char_[byte_index] & 0xDF; +			else +				res = char_[byte_index] | 0x20; +		} else if (bit_index == 6) { +			if (val == 0) +				res = char_[byte_index] & 0xBF; +			else +				res = char_[byte_index] | 0x40; +		} else if (bit_index == 7) { +			if (val == 0) +				res = char_[byte_index] & 0x7F; +			else +				res = char_[byte_index] | 0x80; +		} else +			assert(false); +		char_[byte_index] = res; +	} + +private: +	const int size_; +	unsigned char *char_; +}; + +inline bool is_inside(int i, int left, int right) { +	if ( i < left || i > right ) +		return false; +	return true; +} + +/* + * assume i <= j + * [i, j] is inside [left, right] or [i, j] equates to [left, right] + */ +inline bool is_inside(int i, int j, int left, int right) { +	if ( i >= left && j <= right ) +		return true; +	return false; +} + +/* + * assume i <= j + * [i, j] is inside [left, right], but [i, j] not equal to [left, right] + */ +inline bool is_proper_inside(int i, int j, int left, int right) { +	if ( i >= left && j <= right && right - left > j - i) +		return true; +	return false; +} + +/* + * assume i <= j + * [i, j] is proper proper inside [left, right] + */ +inline bool is_proper_proper_inside(int i, int j, int left, int right) { +	if ( i > left && j < right) +		return true; +	return false; +} + +inline bool is_overlap(int left1, int right1, int left2, int right2) { +	if (is_inside(left1, left2, right2) || is_inside(left2, left1, right1)) +		return true; + +	return false; +} + +inline void NewAndCopyCharArray(char**p, const char* q) { +	if (q != NULL) { +		(*p) = new char[strlen(q) + 1]; +		strcpy((*p), q); +	} else +		(*p) = NULL; +} + +//TODO:to make the alignment more efficient +struct TargetTranslation{ +	typedef vector<int> SingleWordAlign; +	TargetTranslation(int begin_pos, int end_pos, int input_begin_pos, int input_end_pos, int e_num_word): +		begin_pos_(begin_pos), +		end_pos_(end_pos), +		input_begin_pos_(input_begin_pos), +		input_end_pos_(input_end_pos), +		e_num_words_(e_num_word), +		vec_left_most_(end_pos - begin_pos + 1, e_num_word), +		vec_right_most_(end_pos - begin_pos + 1, -1), +		vec_f_align_bit_array_(end_pos - begin_pos + 1, NULL), +		vec_e_align_bit_array_(e_num_word, NULL) { +		int len = end_pos - begin_pos + 1; + +		/*vec_f_align_bit_array_.reserve(len); +		for (int i = 0; i < len; i++) +			vec_f_align_bit_array_.push_back(NULL); + +		vec_e_align_bit_array_.reserve(e_num_word); +		for (int i = 0; i < e_num_word; i++) +			vec_e_align_bit_array_.push_back(NULL);*/ +		align_.reserve(1.5 * len); +	} +	~TargetTranslation( ) { +		for (size_t i = 0; i < vec_f_align_bit_array_.size(); i++) +			if (vec_f_align_bit_array_[i] != NULL) +				delete vec_f_align_bit_array_[i]; + +		for (size_t i = 0; i < vec_e_align_bit_array_.size(); i++) +			if (vec_e_align_bit_array_[i] != NULL) +				delete vec_e_align_bit_array_[i]; + +		for (size_t i = 0; i < align_.size(); i++) +			delete align_[i]; +	} + +	void InsertAlignmentPoint(int s, int t) { +		int i = s - begin_pos_; + +		SBitArray* &b = vec_f_align_bit_array_[i]; +		if (b == NULL) +			b = new SBitArray(e_num_words_); +		b->Set(t, 1); + +		SBitArray* &a = vec_e_align_bit_array_[t]; +		if (a == NULL) +			a = new SBitArray(end_pos_ - begin_pos_ + 1); +		a->Set(i, 1); + +		align_.push_back(new AlignmentPoint(s, t)); + +		if (t > vec_right_most_[i]) +			vec_right_most_[i] = t; +		if (t < vec_left_most_[i]) +			vec_left_most_[i] = t; +	} + +	/* +	 * given a source span [begin, end], whether its target side is continuous, +	 * return "0": the source span is translated silently +	 * return "1": there is at least on word inside its target span, this word doesn't align to any word inside [begin, end], but outside [begin, end] +	 * return "2": otherwise +	 */ +	string IsTargetConstinousSpan(int begin, int end) const { +		int target_begin, target_end; +		FindLeftRightMostTargetSpan(begin, end, target_begin, target_end); +		if (target_begin == -1) return "0"; + +		for (int i = target_begin; i <= target_end; i++) { +			if (vec_e_align_bit_array_[i] == NULL) continue; +			int j = begin; +			for (; j <= end; j++) { +				if (vec_e_align_bit_array_[i]->Get(j - begin_pos_)) +					break; +			} +			if (j == end + 1) //e[i] is aligned, but e[i] doesn't align to any source word in [begin_pos, end_pos] +				return "1"; +		} +		return "2"; +	} + +	string IsTargetConstinousSpan2(int begin, int end) const { +		int target_begin, target_end; +		FindLeftRightMostTargetSpan(begin, end, target_begin, target_end); +		if (target_begin == -1) return "Unaligned"; + +		for (int i = target_begin; i <= target_end; i++) { +			if (vec_e_align_bit_array_[i] == NULL) continue; +			int j = begin; +			for (; j <= end; j++) { +				if (vec_e_align_bit_array_[i]->Get(j - begin_pos_)) +					break; +			} +			if (j == end + 1) //e[i] is aligned, but e[i] doesn't align to any source word in [begin_pos, end_pos] +				return "Discon't"; +		} +		return "Con't"; +	} + + + +	void FindLeftRightMostTargetSpan(int begin, int end, int& target_begin, int& target_end) const { +		int b = begin - begin_pos_; +		int e = end - begin_pos_ + 1; + +		target_begin = vec_left_most_[b]; +		target_end = vec_right_most_[b]; +		for (int i = b + 1; i < e; i++) { +			if (target_begin > vec_left_most_[i]) +				target_begin = vec_left_most_[i]; +			if (target_end < vec_right_most_[i]) +				target_end = vec_right_most_[i]; +		} +		if (target_end == -1) +			target_begin = -1; +		return; + +		target_begin = e_num_words_; +		target_end = -1; + +		for (int i = begin - begin_pos_; i < end - begin_pos_ + 1; i++) { +			if (vec_f_align_bit_array_[i] == NULL) continue; +			for (int j = 0; j < target_begin; j++) +				if (vec_f_align_bit_array_[i]->Get(j)) { +					target_begin = j; +					break; +				} +		} +		for (int i = end - begin_pos_; i > begin - begin_pos_ -1; i--) { +			if (vec_f_align_bit_array_[i] == NULL) continue; +			for (int j = e_num_words_ - 1; j > target_end; j--) +				if (vec_f_align_bit_array_[i]->Get(j)) { +					target_end = j; +					break; +				} +		} + +		if (target_end == -1) +			target_begin = -1; +	} + +	const uint16_t begin_pos_, end_pos_;              //the position in parse +	const uint16_t input_begin_pos_, input_end_pos_;  //the position in input +	const uint16_t e_num_words_; +	vector<AlignmentPoint*> align_; +private: +	vector<SBitArray*> vec_f_align_bit_array_; +	vector<SBitArray*> vec_e_align_bit_array_; + +	vector<short> vec_left_most_; +	vector<short> vec_right_most_; +}; + +struct FocusedConstituent{ +	FocusedConstituent(const SParsedTree *pTree) { +		if (pTree == NULL) return; +		for (size_t i = 0; i < pTree->m_vecTerminals.size(); i++) { +			STreeItem *pParent = pTree->m_vecTerminals[i]->m_ptParent; + +			while (pParent != NULL) { +				//if (pParent->m_vecChildren.size() > 1 && pParent->m_iEnd - pParent->m_iBegin > 5) { +				//if (pParent->m_vecChildren.size() > 1) { +				if (true) { + +					//do constituent reordering for all children of pParent +					if (strcmp(pParent->m_pszTerm, "ROOT")) +						focus_parents_.push_back(pParent); +				} +				if (pParent->m_iBrotherIndex != 0) break; +				pParent = pParent->m_ptParent; +			} +		} +	} + +	~FocusedConstituent() { //TODO +		focus_parents_.clear(); +	} + +	vector<STreeItem*> focus_parents_; +}; + + +typedef SPredicateItem FocusedPredicate; + +struct FocusedSRL{ +	FocusedSRL(const SSrlSentence *srl) { +		if (srl == NULL) return; +		for (size_t i = 0; i < srl->m_vecPred.size(); i++) { +			if (strcmp(srl->m_pTree->m_vecTerminals[srl->m_vecPred[i]->m_iPosition]->m_ptParent->m_pszTerm, "VA") == 0) +				continue; +			focus_predicates_.push_back(new FocusedPredicate(srl->m_pTree, srl->m_vecPred[i])); +		} +	} + +	~FocusedSRL() { +		focus_predicates_.clear(); +	} + +	vector<const FocusedPredicate*> focus_predicates_; +}; + +/* + * Note: + *      In BOLT experiments, we need to merged some sequence words into one term (like from "1999 nian 1 yue 10 ri" to "1999_nian_1_yue_10_ri") due to some reasons; + *      but in the parse file, we still use the parse tree before merging any words; + *      therefore, the words in source sentence and parse tree diverse and we need to map a word in merged sentence into its original index; + *      a word in source sentence maps 1 or more words in parse tree + *      the index map info is stored at variable index_map_; + *      if the index_map_ is NULL, indicating the word index in source sentence and parse tree is always same. + * + *      In ConstReorderFeatureImpl, as to store alignment info, we use the word index of the parse tree + */ + +struct SIndexMap{ +	SIndexMap(const string& index_map_file) { +		if (index_map_file == "") { +			index_map_input_2_parse = NULL; +			index_map_parse_2_input = NULL; +			return; +		} +		STxtFileReader *reader = new STxtFileReader(index_map_file.c_str()); +		char szLine[10001]; +		szLine[0] = '\0'; +		reader->fnReadNextLine(szLine, NULL); +		delete reader; +		vector<string> terms; +		SplitOnWhitespace(string(szLine), &terms); + +		index_map_input_2_parse = new short int[terms.size() + 1]; +		int ix = 0; +		size_t i; +		for (i = 0; i < terms.size(); i++) { +			index_map_input_2_parse[i] = ix; +			ix += atoi(terms[i].c_str()); +		} +		index_map_input_2_parse[i] = ix; +		//assert(ix == parsed_tree_->m_vecTerminals.size()); + +		index_map_parse_2_input = new short int[ix+1]; +		int jx = 0; +		for (i = 0; i < terms.size(); i++) { +			int num_word = atoi(terms[i].c_str()); +			for (int j = 0; j < num_word; j++) +				index_map_parse_2_input[jx++] = i; +		} +		index_map_parse_2_input[jx] = i; +		assert(jx == ix); +	} + +	~SIndexMap() { +		if (index_map_input_2_parse != NULL) +			delete index_map_input_2_parse; +		if (index_map_parse_2_input != NULL) +			delete index_map_parse_2_input; +	} + +	/* +	 * an input word maps to 1 or more words in parse +	 */ +	void MapIndex_Input_2_Parse(short int ix, short int& mapped_begin, short int& mapped_end) { +		MapIndex_Input_2_Parse(ix, ix, mapped_begin, mapped_end); +	} + +	/* +	 * given the indices in input, +	 * return the indices in parse tree +	 */ +	void MapIndex_Input_2_Parse(short int begin, short int end, short int& mapped_begin, short int& mapped_end) { +		if (index_map_input_2_parse == NULL) { +			mapped_begin = begin; +			mapped_end = end; +			return; +		} + +		mapped_begin = index_map_input_2_parse[begin]; +		mapped_end = index_map_input_2_parse[end + 1] - 1; +	} + +	/* +	 * given the indices in input, +	 * return the indices in parse tree +	 */ +	void MapIndex_Parse_2_Input(short int mapped_begin, short int mapped_end, short int& begin, short int& end) { +		if (index_map_parse_2_input == NULL) { +			begin = mapped_begin; +			end = mapped_end; +			return; +		} + +		begin = index_map_parse_2_input[mapped_begin]; +		end = index_map_parse_2_input[mapped_end]; + +		assert(mapped_begin == 0 || index_map_parse_2_input[mapped_begin - 1] != index_map_parse_2_input[mapped_begin]); +		assert(index_map_parse_2_input[mapped_end + 1] != index_map_parse_2_input[mapped_end]); +	} + +	/* +	 * given a index in input +	 * return the number of its corresponding words in parse tree +	 */ +	int MapIndexWordCount(int ix) { +		if (index_map_input_2_parse == NULL) +			return 1; +		return index_map_input_2_parse[ix + 1] - index_map_input_2_parse[ix]; +	} + +private: + +	short int *index_map_input_2_parse; +	short int *index_map_parse_2_input; +}; + +struct ConstReorderFeatureImpl{ +	ConstReorderFeatureImpl(const std::string& param) { + +		b_block_feature_ = false; +		b_order_feature_ = false; +		b_srl_block_feature_ = false; +		b_srl_order_feature_ = false; + + +		vector<string> terms; +		SplitOnWhitespace(param, &terms); +		if (terms.size() == 1) { +			b_block_feature_ = true; +			b_order_feature_ = true; +		} else if (terms.size() >= 3) { +			if (terms[1].compare("1") == 0) +				b_block_feature_ = true; +			if (terms[2].compare("1") == 0) +				b_order_feature_ = true; +			if (terms.size() == 6) { +				if (terms[4].compare("1") == 0) +					b_srl_block_feature_ = true; +				if (terms[5].compare("1") == 0) +					b_srl_order_feature_ = true; + +				assert(b_srl_block_feature_ || b_srl_order_feature_); +			} + +		} else { +			assert("ERROR"); +		} + +		const_reorder_classifier_left_ = NULL; +		const_reorder_classifier_right_ = NULL; + +		srl_reorder_classifier_left_ = NULL; +		srl_reorder_classifier_right_ = NULL; + +		if (b_order_feature_) { +			InitializeClassifier((terms[0] + string(".left")).c_str(), &const_reorder_classifier_left_); +			InitializeClassifier((terms[0] + string(".right")).c_str(), &const_reorder_classifier_right_); +		} + +		if (b_srl_order_feature_) { +			InitializeClassifier((terms[3] + string(".left")).c_str(), &srl_reorder_classifier_left_); +			InitializeClassifier((terms[3] + string(".right")).c_str(), &srl_reorder_classifier_right_); +		} + +		parsed_tree_ = NULL; +		focused_consts_ = NULL; +		index_map_ = NULL; + +		srl_sentence_ = NULL; +		focused_srl_ = NULL; + +		map_left_ = NULL; +		map_right_ = NULL; + +		map_srl_left_ = NULL; +		map_srl_right_ = NULL; + +		dict_block_status_ = new Dict(); +		dict_block_status_->Convert("Unaligned", false); +		dict_block_status_->Convert("Discon't", false); +		dict_block_status_->Convert("Con't", false); +	} +	~ConstReorderFeatureImpl() { +		if (const_reorder_classifier_left_) +			delete const_reorder_classifier_left_; +		if (const_reorder_classifier_right_) +			delete const_reorder_classifier_right_; +		if (srl_reorder_classifier_left_) +			delete srl_reorder_classifier_left_; +		if (srl_reorder_classifier_right_) +			delete srl_reorder_classifier_right_; +		FreeSentenceVariables(); + +		delete dict_block_status_; +	} + +	static int ReserveStateSize() { +	    return 1 * sizeof(TargetTranslation*); +	} + +	void InitializeInputSentence(const std::string& parse_file, const std::string& srl_file, const std::string& index_map_file) { +		FreeSentenceVariables(); +		if (b_srl_block_feature_ || b_srl_order_feature_) { +			assert(srl_file != ""); +			srl_sentence_ = ReadSRLSentence(srl_file); +			parsed_tree_ = srl_sentence_->m_pTree; +		} else { +			assert(parse_file != ""); +			srl_sentence_ = NULL; +			parsed_tree_ = ReadParseTree(parse_file); +		} + +		if (b_block_feature_ || b_order_feature_) { +			focused_consts_ = new FocusedConstituent(parsed_tree_); + +			if (b_order_feature_) { +				//we can do the classifier "off-line" +				map_left_ = new MapClassifier(); +				map_right_ = new MapClassifier(); +				InitializeConstReorderClassifierOutput(); +			} +		} + +		if (b_srl_block_feature_ || b_srl_order_feature_) { +			focused_srl_ = new FocusedSRL(srl_sentence_); + +			if (b_srl_order_feature_) { +				map_srl_left_ = new MapClassifier(); +				map_srl_right_ = new MapClassifier(); +				InitializeSRLReorderClassifierOutput(); +			} +		} + +		index_map_ = new SIndexMap(index_map_file); + +		if (parsed_tree_ != NULL) { +			size_t i = parsed_tree_->m_vecTerminals.size(); +			vec_target_tran_.reserve(20 * i * i * i); +		} else +			vec_target_tran_.reserve(1000000); +	} + +	void SetConstReorderFeature(const Hypergraph::Edge& edge, SparseVector<double>* features, const vector<const void*>& ant_states, void* state) { +		if (parsed_tree_ == NULL) return; + +		short int mapped_begin, mapped_end; +		index_map_->MapIndex_Input_2_Parse(edge.i_, edge.j_ - 1, mapped_begin, mapped_end); + +		typedef TargetTranslation* PtrTargetTranslation; +		PtrTargetTranslation* remnant = reinterpret_cast<PtrTargetTranslation*>(state); + +		vector<const TargetTranslation*> vec_node; +		vec_node.reserve(edge.tail_nodes_.size()); +		for (size_t i = 0; i < edge.tail_nodes_.size(); i++) { +			const PtrTargetTranslation* astate = reinterpret_cast<const PtrTargetTranslation*>(ant_states[i]); +			vec_node.push_back(astate[0]); +		} + +		int e_num_word = edge.rule_->e_.size(); +		for (size_t i = 0; i < vec_node.size(); i++) { +			e_num_word += vec_node[i]->e_num_words_; +			e_num_word--; +		} + +		remnant[0] = new TargetTranslation(mapped_begin, mapped_end, edge.i_, edge.j_ - 1, e_num_word); +		vec_target_tran_.push_back(remnant[0]); + +		//reset the alignment +		//for the source side, we know its position in source sentence +		//for the target side, we always assume its starting position is 0 +		unsigned vc = 0; +		const TRulePtr rule = edge.rule_; +		std::vector<int> f_index(rule->f_.size()); +		int index = edge.i_; +		for (unsigned i = 0; i < rule->f_.size(); i++) { +			f_index[i] = index; +			const WordID& c = rule->f_[i]; +			if (c < 1) +				index = vec_node[vc++]->input_end_pos_ + 1; +			else +				index++; +		} +		assert(vc == vec_node.size()); +		assert(index == edge.j_); + +		std::vector<int> e_index(rule->e_.size()); +		index = 0; +		vc = 0; +		for (unsigned i = 0; i < rule->e_.size(); i++) { +			e_index[i] = index; +			const WordID& c = rule->e_[i]; +			if (c < 1) { +			    index += vec_node[-c]->e_num_words_; +			    vc++; +			} +			else +			index++; +		} +		assert(vc == vec_node.size()); + +		size_t nt_pos = 0; +		for (size_t i = 0; i < edge.rule_->f_.size(); i++) { +			if (edge.rule_->f_[i] > 0) continue; + +			//it's an NT +			size_t j; +			for (j = 0; j < edge.rule_->e_.size(); j++) +				if (edge.rule_->e_[j] * -1 == nt_pos) +					break; +			assert(j != edge.rule_->e_.size()); +			nt_pos++; + +			//i aligns j +	        int eindex = e_index[j]; +	        const vector<AlignmentPoint*>& align = vec_node[-1 * edge.rule_->e_[j]]->align_; +	        for (size_t k = 0; k < align.size(); k++) { +	        	remnant[0]->InsertAlignmentPoint(align[k]->s_, eindex + align[k]->t_); +	        } +		} +		for (size_t i = 0; i < edge.rule_->a_.size(); i++) { +			short int parse_index_begin, parse_index_end; +			index_map_->MapIndex_Input_2_Parse(f_index[edge.rule_->a_[i].s_], parse_index_begin, parse_index_end); +	        int findex = parse_index_begin; +	        int eindex = e_index[edge.rule_->a_[i].t_]; +	        int word_count = index_map_->MapIndexWordCount(f_index[edge.rule_->a_[i].s_]); +	        assert(word_count == parse_index_end - parse_index_begin + 1); +	        for (int i = 0; i < word_count; i++) +	        	remnant[0]->InsertAlignmentPoint(findex + i, eindex); +		} + + +	    //till now, we finished setting state values +	    //next, use the state values to calculate constituent reorder feature +	    SetConstReorderFeature(mapped_begin, mapped_end, features, remnant[0], vec_node, f_index); +	} + +	void SetConstReorderFeature(short int mapped_begin, short int mapped_end, SparseVector<double>* features, const TargetTranslation* target_translation, const vector<const TargetTranslation*>& vec_node, std::vector<int>& findex) { +		if (b_srl_block_feature_ || b_srl_order_feature_){ +			double logprob_srl_reorder_left = 0.0, logprob_srl_reorder_right = 0.0; +			for (size_t i = 0; i < focused_srl_->focus_predicates_.size(); i++) { +				const FocusedPredicate* pred = focused_srl_->focus_predicates_[i]; +				if (!is_overlap(mapped_begin, mapped_end, pred->begin_, pred->end_)) continue; //have no overlap between this predicate (with its argument) and the current edge + +				size_t j; +				for (j = 0; j < vec_node.size(); j++) { +					if (is_inside(pred->begin_, pred->end_, vec_node[j]->begin_pos_, vec_node[j]->end_pos_)) +						break; +				} +				if (j < vec_node.size()) continue; + +				vector<int> vecBlockStatus; +				vecBlockStatus.reserve(pred->vec_items_.size()); +				for (j = 0; j < pred->vec_items_.size(); j++) { +					const STreeItem *con1 = pred->vec_items_[j]->tree_item_; +					if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {vecBlockStatus.push_back(0); continue;} //the node is partially outside the current edge + +					string type = target_translation->IsTargetConstinousSpan2(con1->m_iBegin, con1->m_iEnd); +					vecBlockStatus.push_back(dict_block_status_->Convert(type, false)); + +					if (!b_srl_block_feature_) continue; +					//see if the node is covered by an NT +					size_t k; +					for (k = 0; k < vec_node.size(); k++) { +						if (is_inside(con1->m_iBegin, con1->m_iEnd, vec_node[k]->begin_pos_, vec_node[k]->end_pos_)) +							break; +					} +					if (k < vec_node.size()) continue; +					int f_id = FD::Convert(string(pred->vec_items_[j]->role_) + type); +					if (f_id) +						features->add_value(f_id, 1); +				} + +				if (!b_srl_order_feature_) continue; + +				vector<int> vecPosition, vecRelativePosition; +				vector<int> vecRightPosition, vecRelativeRightPosition; +				vecPosition.reserve(pred->vec_items_.size()); +				vecRelativePosition.reserve(pred->vec_items_.size()); +				vecRightPosition.reserve(pred->vec_items_.size()); +				vecRelativeRightPosition.reserve(pred->vec_items_.size()); +				for (j = 0; j < pred->vec_items_.size(); j++) { +					const STreeItem *con1 = pred->vec_items_[j]->tree_item_; +					if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {vecPosition.push_back(-1);  vecRightPosition.push_back(-1);continue;} //the node is partially outside the current edge +					int left1 = -1, right1 = -1; +					target_translation->FindLeftRightMostTargetSpan(con1->m_iBegin, con1->m_iEnd, left1, right1); +					vecPosition.push_back(left1); +					vecRightPosition.push_back(right1); +				} +				fnGetRelativePosition(vecPosition, vecRelativePosition); +				fnGetRelativePosition(vecRightPosition, vecRelativeRightPosition); + +				for (j = 1; j < pred->vec_items_.size(); j++) { +					const STreeItem *con1 = pred->vec_items_[j - 1]->tree_item_; +					const STreeItem *con2 = pred->vec_items_[j]->tree_item_; + +					if (con1->m_iBegin < mapped_begin || con2->m_iEnd > mapped_end) continue; //one of the two nodes is partially outside the current edge + +					//both con1 and con2 are covered, need to check if they are covered by the same NT +					size_t k; +					for (k = 0; k < vec_node.size(); k++) { +						if (is_inside(con1->m_iBegin, con2->m_iEnd, vec_node[k]->begin_pos_, vec_node[k]->end_pos_)) +							break; +					} +					if (k < vec_node.size()) continue; + +					//they are not covered bye the same NT +					string outcome; +					string key; +					GenerateKey(pred->vec_items_[j-1]->tree_item_, pred->vec_items_[j]->tree_item_, vecBlockStatus[j-1], vecBlockStatus[j], key); + +					fnGetOutcome(vecRelativePosition[j - 1], vecRelativePosition[j], outcome); +					double prob = CalculateConstReorderProb(srl_reorder_classifier_left_, map_srl_left_, key, outcome); +					//printf("%s %s %f\n", ostr.str().c_str(), outcome.c_str(), prob); +					logprob_srl_reorder_left += log10(prob); + + +					fnGetOutcome(vecRelativeRightPosition[j - 1], vecRelativeRightPosition[j], outcome); +					prob = CalculateConstReorderProb(srl_reorder_classifier_right_, map_srl_right_, key, outcome); +					logprob_srl_reorder_right += log10(prob); +				} +			} + +			if (b_srl_order_feature_) { +				int f_id = FD::Convert("SRLReorderFeatureLeft"); +				if (f_id && logprob_srl_reorder_left != 0.0) +					features->set_value(f_id, logprob_srl_reorder_left); +				f_id = FD::Convert("SRLReorderFeatureRight"); +				if (f_id && logprob_srl_reorder_right != 0.0) +					features->set_value(f_id, logprob_srl_reorder_right); +			} +		} + +		if (b_block_feature_ || b_order_feature_){ +			double logprob_const_reorder_left = 0.0, logprob_const_reorder_right = 0.0; + +			for (size_t i = 0; i < focused_consts_->focus_parents_.size(); i++) { +				STreeItem* parent = focused_consts_->focus_parents_[i]; +				if (!is_overlap(mapped_begin, mapped_end, parent->m_iBegin, parent->m_iEnd)) continue; //have no overlap between this parent node and the current edge + +				size_t j; +				for (j = 0; j < vec_node.size(); j++) { +					if (is_inside(parent->m_iBegin, parent->m_iEnd, vec_node[j]->begin_pos_, vec_node[j]->end_pos_)) +						break; +				} +				if (j < vec_node.size()) continue; + + +				if (b_block_feature_) { +					if (parent->m_iBegin >= mapped_begin && parent->m_iEnd <= mapped_end) { +						string type = target_translation->IsTargetConstinousSpan2(parent->m_iBegin, parent->m_iEnd); +						int f_id = FD::Convert(string(parent->m_pszTerm) + type); +						if (f_id) +							features->add_value(f_id, 1); +					} +				} + +				if (parent->m_vecChildren.size() == 1 || !b_order_feature_) continue; + +				vector<int> vecChunkBlock; +				vecChunkBlock.reserve(parent->m_vecChildren.size()); + +				for (j = 0; j < parent->m_vecChildren.size(); j++) { +					STreeItem *con1 = parent->m_vecChildren[j]; +					if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {vecChunkBlock.push_back(0); continue;} //the node is partially outside the current edge + +					string type = target_translation->IsTargetConstinousSpan2(con1->m_iBegin, con1->m_iEnd); +					vecChunkBlock.push_back(dict_block_status_->Convert(type, false)); + +					/*if (!b_block_feature_) continue; +					//see if the node is covered by an NT +					size_t k; +					for (k = 0; k < vec_node.size(); k++) { +						if (is_inside(con1->m_iBegin, con1->m_iEnd, vec_node[k]->begin_pos_, vec_node[k]->end_pos_)) +							break; +					} +					if (k < vec_node.size()) continue; +					int f_id = FD::Convert(string(con1->m_pszTerm) + type); +					if (f_id) +						features->add_value(f_id, 1);*/ +				} + +				if (!b_order_feature_) continue; + +				vector<int> vecPosition, vecRelativePosition; +				vector<int> vecRightPosition, vecRelativeRightPosition; +				vecPosition.reserve(parent->m_vecChildren.size()); +				vecRelativePosition.reserve(parent->m_vecChildren.size()); +				vecRightPosition.reserve(parent->m_vecChildren.size()); +				vecRelativeRightPosition.reserve(parent->m_vecChildren.size()); +				for (j = 0; j < parent->m_vecChildren.size(); j++) { +					STreeItem *con1 = parent->m_vecChildren[j]; +					if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {vecPosition.push_back(-1);  vecRightPosition.push_back(-1);continue;} //the node is partially outside the current edge +					int left1 = -1, right1 = -1; +					target_translation->FindLeftRightMostTargetSpan(con1->m_iBegin, con1->m_iEnd, left1, right1); +					vecPosition.push_back(left1); +					vecRightPosition.push_back(right1); +				} +				fnGetRelativePosition(vecPosition, vecRelativePosition); +				fnGetRelativePosition(vecRightPosition, vecRelativeRightPosition); + +				for (j = 1; j < parent->m_vecChildren.size(); j++) { +					STreeItem *con1 = parent->m_vecChildren[j - 1]; +					STreeItem *con2 = parent->m_vecChildren[j]; + +					if (con1->m_iBegin < mapped_begin || con2->m_iEnd > mapped_end) continue; //one of the two nodes is partially outside the current edge + +					//both con1 and con2 are covered, need to check if they are covered by the same NT +					size_t k; +					for (k = 0; k < vec_node.size(); k++) { +						if (is_inside(con1->m_iBegin, con2->m_iEnd, vec_node[k]->begin_pos_, vec_node[k]->end_pos_)) +							break; +					} +					if (k < vec_node.size()) continue; + +					//they are not covered bye the same NT +					string outcome; +					string key; +					GenerateKey(parent->m_vecChildren[j-1], parent->m_vecChildren[j], vecChunkBlock[j-1], vecChunkBlock[j], key); + +					fnGetOutcome(vecRelativePosition[j - 1], vecRelativePosition[j], outcome); +					double prob = CalculateConstReorderProb(const_reorder_classifier_left_, map_left_, key, outcome); +					//printf("%s %s %f\n", ostr.str().c_str(), outcome.c_str(), prob); +					logprob_const_reorder_left += log10(prob); + + +					fnGetOutcome(vecRelativeRightPosition[j - 1], vecRelativeRightPosition[j], outcome); +					prob = CalculateConstReorderProb(const_reorder_classifier_right_, map_right_, key, outcome); +					logprob_const_reorder_right += log10(prob); +				} +			} + +			if (b_order_feature_) { +				int f_id = FD::Convert("ConstReorderFeatureLeft"); +				if (f_id && logprob_const_reorder_left != 0.0) +					features->set_value(f_id, logprob_const_reorder_left); +				f_id = FD::Convert("ConstReorderFeatureRight"); +				if (f_id && logprob_const_reorder_right != 0.0) +					features->set_value(f_id, logprob_const_reorder_right); +			} +		} + +	} + + +private: +	void Byte_to_Char(unsigned char *str, int n) { +		str[0] = (n & 255); +		str[1] = n / 256; +	} +	void GenerateKey(const STreeItem *pCon1, const STreeItem *pCon2, int iBlockStatus1, int iBlockStatus2, string& key) { +		assert(iBlockStatus1 != 0); +		assert(iBlockStatus2 != 0); +		unsigned char szTerm[ 1001 ]; +		Byte_to_Char(szTerm, pCon1->m_iBegin); +		Byte_to_Char(szTerm + 2, pCon2->m_iEnd); +		szTerm[4] = (char)iBlockStatus1; +		szTerm[5] = (char)iBlockStatus2; +		szTerm[6] = '\0'; +		//sprintf(szTerm, "%d|%d|%d|%d|%s|%s", pCon1->m_iBegin, pCon1->m_iEnd, pCon2->m_iBegin, pCon2->m_iEnd, strBlockStatus1.c_str(), strBlockStatus2.c_str()); +		key = string(szTerm, szTerm + 6); + +	} +	void InitializeConstReorderClassifierOutput( ) { +		if (!b_order_feature_) return; +		int size_block_status = dict_block_status_->max(); + +		for (size_t i = 0; i < focused_consts_->focus_parents_.size(); i++) { +			STreeItem* parent = focused_consts_->focus_parents_[i]; + +			for (size_t j = 1; j < parent->m_vecChildren.size(); j++) { +				for (size_t k = 1; k <= size_block_status; k++) { +					for (size_t l = 1; l <= size_block_status; l++) { +						ostringstream ostr; +						GenerateFeature(parsed_tree_, parent, j, dict_block_status_->Convert(k), dict_block_status_->Convert(l), ostr); + +						string strKey; +						GenerateKey(parent->m_vecChildren[j-1], parent->m_vecChildren[j], k, l, strKey); + +						vector<double> vecOutput; +						const_reorder_classifier_left_->fnEval(ostr.str().c_str(), vecOutput); +						(*map_left_)[strKey] = vecOutput; + +						const_reorder_classifier_right_->fnEval(ostr.str().c_str(), vecOutput); +						(*map_right_)[strKey] = vecOutput; +					} +				} +			} +		} + +	} + +	void InitializeSRLReorderClassifierOutput() { +		if (!b_srl_order_feature_) return; +		int size_block_status = dict_block_status_->max(); + +		for (size_t i = 0; i < focused_srl_->focus_predicates_.size(); i++) { +			const FocusedPredicate *pred = focused_srl_->focus_predicates_[i]; + +			for (size_t j = 1; j < pred->vec_items_.size(); j++) { +				for (size_t k = 1; k <= size_block_status; k++) { +					for (size_t l = 1; l <= size_block_status; l++) { +						ostringstream ostr; + +						SArgumentReorderModel::fnGenerateFeature(parsed_tree_, pred->pred_, pred, j, dict_block_status_->Convert(k), dict_block_status_->Convert(l), ostr); + +						string strKey; +						GenerateKey(pred->vec_items_[j - 1]->tree_item_, pred->vec_items_[j]->tree_item_, k, l, strKey); + +						vector<double> vecOutput; +						srl_reorder_classifier_left_->fnEval(ostr.str().c_str(), vecOutput); +						(*map_srl_left_)[strKey] = vecOutput; + +						srl_reorder_classifier_right_->fnEval(ostr.str().c_str(), vecOutput); +						(*map_srl_right_)[strKey] = vecOutput; +					} +				} +			} +		} +	} + +	double CalculateConstReorderProb(const Tsuruoka_Maxent *const_reorder_classifier, const MapClassifier *map, const string& key, const string& outcome) { +		MapClassifier::const_iterator iter = (*map).find(key); +		assert(iter != map->end()); +		int id = const_reorder_classifier->fnGetClassId(outcome); +		return iter->second[id]; +	} + +	void FreeSentenceVariables( ) { +		if (srl_sentence_ != NULL) { +			delete srl_sentence_; +			srl_sentence_ = NULL; +		} +		else { +			if (parsed_tree_ != NULL) +				delete parsed_tree_; +			parsed_tree_ = NULL; +		} + +		if (focused_consts_ != NULL) +			delete focused_consts_; +		focused_consts_ = NULL; + +		for (size_t i = 0; i < vec_target_tran_.size(); i++) +			delete vec_target_tran_[i]; +		vec_target_tran_.clear(); + +		if (index_map_ != NULL) +			delete index_map_; +		index_map_ = NULL; + +		if (map_left_ != NULL) +			delete map_left_; +		map_left_ = NULL; +		if (map_right_ != NULL) +			delete map_right_; +		map_right_ = NULL; + +		if (map_srl_left_ != NULL) +			delete map_srl_left_; +		map_srl_left_ = NULL; +		if (map_srl_right_ != NULL) +			delete map_srl_right_; +		map_srl_right_ = NULL; +	} + +	void InitializeClassifier(const char* pszFname, Tsuruoka_Maxent **ppClassifier) { +		(*ppClassifier) = new Tsuruoka_Maxent(pszFname); +	} + +	void GenerateOutcome(const vector<int>& vecPos, vector<string>& vecOutcome) { +		vecOutcome.clear(); + +		for (size_t i = 1; i < vecPos.size(); i++) { +			if (vecPos[i] == -1 || vecPos[i] == vecPos[i - 1]) { +				vecOutcome.push_back("M"); //monotone +				continue; +			} + + +			if (vecPos[i - 1] == -1) { +				//vecPos[i] is not -1 +				size_t j = i - 2; +				while (j > -1 && vecPos[j] == -1) +					j--; + +				size_t k; +				for (k = 0; k < j; k++) { +					if (vecPos[k] > vecPos[j] || vecPos[k] <= vecPos[i]) +						break; +				} +				if (k < j) { +					vecOutcome.push_back("DM"); +					continue; +				} + +				for (k = i + 1; k < vecPos.size(); k++) +					if (vecPos[k] < vecPos[i] && (j == -1 && vecPos[k] >= vecPos[j])) +						break; +				if (k < vecPos.size()) { +					vecOutcome.push_back("DM"); +					continue; +				} +				vecOutcome.push_back("M"); +			} else { +				//neither of vecPos[i-1] and vecPos[i] is -1 +				if (vecPos[i - 1] < vecPos[i]) { +					//monotone or discon't monotone +					size_t j; +					for (j = 0; j < i - 1; j++) +						if (vecPos[j] > vecPos[i - 1] && vecPos[j] <= vecPos[i]) +							break; +					if (j < i - 1) { +						vecOutcome.push_back("DM"); +						continue; +					} +					for (j = i + 1; j < vecPos.size(); j++) +						if (vecPos[j] >= vecPos[i - 1] && vecPos[j] < vecPos[i]) +							break; +					if (j < vecPos.size()) { +						vecOutcome.push_back("DM"); +						continue; +					} +					vecOutcome.push_back("M"); +				} else { +					//swap or discon't swap +					size_t j; +					for (j = 0; j < i - 1; j++) +						if (vecPos[j] > vecPos[i] && vecPos[j] <= vecPos[i - 1]) +							break; +					if (j < i - 1) { +						vecOutcome.push_back("DS"); +						continue; +					} +					for (j = i + 1; j < vecPos.size(); j++) +						if ( vecPos[j] >= vecPos[i] && vecPos[j] < vecPos[i - 1]) +							break; +					if (j < vecPos.size()) { +						vecOutcome.push_back("DS"); +						continue; +					} +					vecOutcome.push_back("S"); +				} +			} +		} + +		assert(vecOutcome.size() == vecPos.size() - 1); +	} + +	void fnGetRelativePosition(const vector<int>& vecLeft, vector<int>& vecPosition) { +		vecPosition.clear(); + +		vector<float> vec; +		vec.reserve(vecLeft.size()); +		for (size_t i = 0; i < vecLeft.size(); i++) { +			if (vecLeft[i] == -1) { +				if (i == 0) +					vec.push_back(-1); +				else +					vec.push_back(vecLeft[i-1] + 0.1); +			} else +				vec.push_back(vecLeft[i]); +		} + +		for (size_t i = 0; i < vecLeft.size(); i++) { +			int count = 0; + +			for (size_t j = 0; j < vecLeft.size(); j++) { +				if ( j == i) continue; +				if (vec[j] < vec[i]) { +					count++; +				} else if (vec[j] == vec[i] && j < i) { +					count++; +				} +			} +			vecPosition.push_back(count); +		} + +		for (size_t i = 1; i < vecPosition.size(); i++) { +			if (vecPosition[i - 1] == vecPosition[i]) { +				for (size_t j = 0; j < vecLeft.size(); j++) +					cout << vecLeft[j] << " "; +				cout << "\n"; +				assert(false); +			} +		} +	} + +	inline void fnGetOutcome(int i1, int i2, string& strOutcome) { +		assert(i1 != i2); +		if (i1 < i2) { +			if (i2 > i1 + 1) strOutcome = string("DM"); +			else strOutcome = string("M"); +		} else { +			if (i1 > i2 + 1) strOutcome = string("DS"); +			else strOutcome = string("S"); +		} +	} + +	//features in constituent_reorder_model.cc +	void GenerateFeature(const SParsedTree *pTree, const STreeItem *pParent, int iPos, const string& strBlockStatus1, const string& strBlockStatus2, ostringstream& ostr) { +		STreeItem *pCon1, *pCon2; +		pCon1 = pParent->m_vecChildren[iPos - 1]; +		pCon2 = pParent->m_vecChildren[iPos]; + +		string left_label = string(pCon1->m_pszTerm); +		string right_label = string(pCon2->m_pszTerm); +		string parent_label = string(pParent->m_pszTerm); + +		vector<string> vec_other_right_sibling; +		for (int i = iPos + 1; i < pParent->m_vecChildren.size(); i++) +			vec_other_right_sibling.push_back(string(pParent->m_vecChildren[i]->m_pszTerm)); +		if (vec_other_right_sibling.size() == 0) +			vec_other_right_sibling.push_back(string("NULL")); +		vector<string> vec_other_left_sibling; +		for (int i = 0; i < iPos - 1; i++) +			vec_other_left_sibling.push_back(string(pParent->m_vecChildren[i]->m_pszTerm)); +		if (vec_other_left_sibling.size() == 0) +			vec_other_left_sibling.push_back(string("NULL")); + +		//generate features +		//f1 +		ostr << "f1=" << left_label << "_" << right_label << "_" << parent_label; +		//f2 +		for (int i = 0; i < vec_other_right_sibling.size(); i++) +			ostr << " f2=" << left_label << "_" << right_label << "_" << parent_label << "_" << vec_other_right_sibling[i]; +		//f3 +		for (int i = 0; i < vec_other_left_sibling.size(); i++) +			ostr << " f3=" << left_label << "_" << right_label << "_" << parent_label << "_" << vec_other_left_sibling[i]; +		//f4 +		ostr << " f4=" << left_label << "_" << right_label << "_" << pTree->m_vecTerminals[pCon1->m_iHeadWord]->m_ptParent->m_pszTerm; +		//f5 +		ostr << " f5=" << left_label << "_" << right_label << "_" << pTree->m_vecTerminals[pCon1->m_iHeadWord]->m_pszTerm; +		//f6 +		ostr << " f6=" << left_label << "_" << right_label << "_" << pTree->m_vecTerminals[pCon2->m_iHeadWord]->m_ptParent->m_pszTerm; +		//f7 +		ostr << " f7=" << left_label << "_" << right_label << "_" << pTree->m_vecTerminals[pCon2->m_iHeadWord]->m_pszTerm; +		//f8 +		ostr << " f8=" << left_label << "_" << right_label << "_" << strBlockStatus1; +		//f9 +		ostr << " f9=" << left_label << "_" << right_label << "_" << strBlockStatus2; + +		//f10 +		ostr << " f10=" << left_label << "_" << parent_label; +		//f11 +		ostr << " f11=" << right_label << "_" << parent_label; +	} + +	SParsedTree* ReadParseTree(const std::string& parse_file) { +                SParseReader *reader = new SParseReader(parse_file.c_str(), false); +                SParsedTree *tree = reader->fnReadNextParseTree(); +                //assert(tree != NULL); +                delete reader; +                return tree; +    } + +	SSrlSentence* ReadSRLSentence(const std::string& srl_file) { +                SSrlSentenceReader *reader = new SSrlSentenceReader(srl_file.c_str()); +                SSrlSentence *srl = reader->fnReadNextSrlSentence(); +                //assert(srl != NULL); +                delete reader; +                return srl; +    } + +private: +	Tsuruoka_Maxent *const_reorder_classifier_left_; +	Tsuruoka_Maxent *const_reorder_classifier_right_; + +	Tsuruoka_Maxent *srl_reorder_classifier_left_; +	Tsuruoka_Maxent *srl_reorder_classifier_right_; + +	MapClassifier *map_left_; +	MapClassifier *map_right_; + +    MapClassifier *map_srl_left_; +    MapClassifier *map_srl_right_; + +	SParsedTree *parsed_tree_; +	FocusedConstituent *focused_consts_; +	vector<TargetTranslation*> vec_target_tran_; + +	bool b_order_feature_; +	bool b_block_feature_; + +	bool b_srl_block_feature_; +	bool b_srl_order_feature_; +	SSrlSentence *srl_sentence_; +	FocusedSRL *focused_srl_; + +	SIndexMap *index_map_; + +	Dict *dict_block_status_; +}; + +ConstReorderFeature::ConstReorderFeature(const std::string& param) { +	pimpl_ = new ConstReorderFeatureImpl(param); +	SetStateSize(ConstReorderFeatureImpl::ReserveStateSize()); +	name_ = "ConstReorderFeature"; +} + +ConstReorderFeature::~ConstReorderFeature( ) { //TODO +	delete pimpl_; +} + +void ConstReorderFeature::PrepareForInput(const SentenceMetadata& smeta) { +	string parse_file = smeta.GetSGMLValue("parse"); +	string srl_file = smeta.GetSGMLValue("srl"); +	assert(!(parse_file == "" && srl_file == "")); + +	string indexmap_file = smeta.GetSGMLValue("index-map"); +	pimpl_->InitializeInputSentence(parse_file, srl_file, indexmap_file); +} + +void ConstReorderFeature::TraversalFeaturesImpl(const SentenceMetadata& /* smeta */, +                                          const Hypergraph::Edge& edge, +                                          const vector<const void*>& ant_states, +                                          SparseVector<double>* features, +                                          SparseVector<double>* estimated_features, +                                          void* state) const { +	pimpl_->SetConstReorderFeature(edge, features, ant_states, state); +} + +string ConstReorderFeature::usage(bool /*param*/,bool /*verbose*/) { +  return "ConstReorderFeature"; +} + +boost::shared_ptr<FeatureFunction> CreateConstReorderModel(const std::string ¶m) { +	ConstReorderFeature *ret = new ConstReorderFeature(param); +	return boost::shared_ptr<FeatureFunction>(ret); +} + +boost::shared_ptr<FeatureFunction> ConstReorderFeatureFactory::Create(std::string param) const { +	return CreateConstReorderModel(param); +} + +std::string  ConstReorderFeatureFactory::usage(bool params,bool verbose) const { +  return ConstReorderFeature::usage(params, verbose); +} diff --git a/decoder/ff_const_reorder.h b/decoder/ff_const_reorder.h new file mode 100644 index 00000000..1aed2584 --- /dev/null +++ b/decoder/ff_const_reorder.h @@ -0,0 +1,41 @@ +/* + * ff_const_reorder.h + * + *  Created on: Jul 11, 2013 + *      Author: junhuili + */ + +#ifndef FF_CONST_REORDER_H_ +#define FF_CONST_REORDER_H_ + +#include "ff_factory.h" +#include "ff.h" + +struct ConstReorderFeatureImpl; + +class ConstReorderFeature : public FeatureFunction { + public: +  // param = "filename n" +	ConstReorderFeature(const std::string& param); +    ~ConstReorderFeature(); +    static std::string usage(bool param,bool verbose); + protected: +  virtual void PrepareForInput(const SentenceMetadata& smeta); + +  virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, +                                     const HG::Edge& edge, +                                     const std::vector<const void*>& ant_contexts, +                                     SparseVector<double>* features, +                                     SparseVector<double>* estimated_features, +                                     void* out_context) const; + private: +  ConstReorderFeatureImpl* pimpl_; +}; + + +struct ConstReorderFeatureFactory : public FactoryBase<FeatureFunction> { +  FP Create(std::string param) const; +  std::string usage(bool params,bool verbose) const; +}; + +#endif /* FF_CONST_REORDER_H_ */ diff --git a/decoder/ffset.cc b/decoder/ffset.cc index 5820f421..488346bc 100644 --- a/decoder/ffset.cc +++ b/decoder/ffset.cc @@ -15,6 +15,26 @@ ModelSet::ModelSet(const vector<double>& w, const vector<const FeatureFunction*>      model_state_pos_[i] = state_size_;      state_size_ += models_[i]->StateSize();    } + +  //added by lijunhui +  //erase the states for SRLReorderFeature and DepPrnFeatures +  for (int i = 0; i < models_.size(); i++) { +    if (models_[i]->name_ == string("SRLReorderFeature") +      || models_[i]->name_ == string("DepPrnFeatures") +      || models_[i]->name_ == string("SyntacticContextFeature") +      || models_[i]->name_ == string("ArgtReorderFeature") +      || models_[i]->name_ == string("ConstReorderSparseFeature") +      || models_[i]->name_ == string("ConstReorderFeature")) { +      int start_pos = model_state_pos_[i]; +      int end_pos; +      if (i == models_.size() - 1) +        end_pos = state_size_; +      else +        end_pos = model_state_pos_[i + 1]; +      erase_state_start_pos_.push_back(start_pos); +      erase_state_end_pos_.push_back(end_pos); +    } +  }  }  void ModelSet::PrepareForInput(const SentenceMetadata& smeta) { @@ -70,3 +90,32 @@ void ModelSet::AddFinalFeatures(const FFState& state, HG::Edge* edge,SentenceMet    edge->edge_prob_.logeq(edge->feature_values_.dot(weights_));  } +bool ModelSet::HaveEraseState() const { +       if (erase_state_start_pos_.size() == 0) return false; +       return true; +} + +void ModelSet::GetRealFFState(const FFState& state, FFState& real_state) const { +       real_state.resize(state.size()); +       for (int i = 0; i < state.size(); i++) { +               real_state[i] = state[i]; +       } + +       if (state.size() == 0) +               return; +       assert(state.size() == state_size_); + +       //erase the states for SRLReorderFeature and DepPrnFeatures and SyntacticContextFeature +       for (int i = 0; i < erase_state_start_pos_.size(); i++){ +               int start_pos = erase_state_start_pos_[i]; +               int end_pos = erase_state_end_pos_[i]; +               for (int j = start_pos; j < end_pos; j++) +                       real_state[j] = 0; +       } +} + +FFState ModelSet::GetRealFFState(const FFState& state) const { +       FFState real_state; +       GetRealFFState(state, real_state); +       return real_state; +} diff --git a/decoder/ffset.h b/decoder/ffset.h index 28aef667..0467b01d 100644 --- a/decoder/ffset.h +++ b/decoder/ffset.h @@ -47,6 +47,17 @@ class ModelSet {    bool stateless() const { return !state_size_; } +  //added by ljh +  //some states of features are not contextual, but used for storing some useful information for calculate feature val +  //it needs to erase these states +  //this function is only called by IncorporateIntoPlusLMForest(...) in apply_models.cc +  void GetRealFFState(const FFState& state, FFState& real_state) const; +  FFState GetRealFFState(const FFState& state) const; +  bool HaveEraseState() const; + private: +  std::vector<int> erase_state_start_pos_; +  std::vector<int> erase_state_end_pos_; +   private:    std::vector<const FeatureFunction*> models_;    const std::vector<double>& weights_;  | 
