summaryrefslogtreecommitdiff
path: root/decoder
diff options
context:
space:
mode:
Diffstat (limited to 'decoder')
-rw-r--r--decoder/ff_const_reorder.cc2484
-rw-r--r--decoder/ff_const_reorder.h22
2 files changed, 1279 insertions, 1227 deletions
diff --git a/decoder/ff_const_reorder.cc b/decoder/ff_const_reorder.cc
index 97b76f7f..8bd3f4e2 100644
--- a/decoder/ff_const_reorder.cc
+++ b/decoder/ff_const_reorder.cc
@@ -17,124 +17,117 @@ using namespace std;
typedef HASH_MAP<std::string, vector<double> > MapClassifier;
-struct SBitArray{
- SBitArray(int size) :
- size_(size){
- int bit_size = size / 8;
- if (size % 8 > 0)
- bit_size++;
-
- char_ = new unsigned char[bit_size];
- memset(char_, 0, bit_size);
- }
- ~SBitArray() {
- delete [] char_;
- }
-
- int Get(int index) const {
- int i;
-
- i = index;
- if (i < 0)
- i = size_ + i;
- assert(i > -1 && i < size_);
-
- int byte_index, bit_index;
- byte_index = i/8;
- bit_index = i%8;
- unsigned char res;
- if (bit_index == 0)
- res = char_[byte_index] & 0x01;
- else if (bit_index == 1)
- res = char_[byte_index] & 0x02;
- else if (bit_index == 2)
- res = char_[byte_index] & 0x04;
- else if (bit_index == 3)
- res = char_[byte_index] & 0x08;
- else if (bit_index == 4)
- res = char_[byte_index] & 0x10;
- else if (bit_index == 5)
- res = char_[byte_index] & 0x20;
- else if (bit_index == 6)
- res = char_[byte_index] & 0x40;
- else if (bit_index == 7)
- res = char_[byte_index] & 0x80;
- else
- assert(false);
- if (res != 0)
- return 1;
- else
- return 0;
- }
-
- void Set(int index, int val) {
- assert(val == 0 || val == 1);
- int i;
-
- i = index;
- if (i < 0)
- i = size_ + i;
- assert(i > -1 && i < size_);
-
- int byte_index, bit_index;
- byte_index = i/8;
- bit_index = i%8;
- unsigned char res;
-
- if (bit_index == 0) {
- if (val == 0)
- res = char_[byte_index] & 0xFE;
- else
- res = char_[byte_index] | 0x01;
- } else if (bit_index == 1) {
- if (val == 0)
- res = char_[byte_index] & 0xFD;
- else
- res = char_[byte_index] | 0x02;
- } else if (bit_index == 2) {
- if (val == 0)
- res = char_[byte_index] & 0xFB;
- else
- res = char_[byte_index] | 0x04;
- } else if (bit_index == 3) {
- if (val == 0)
- res = char_[byte_index] & 0xF7;
- else
- res = char_[byte_index] | 0x08;
- } else if (bit_index == 4) {
- if (val == 0)
- res = char_[byte_index] & 0xEF;
- else
- res = char_[byte_index] | 0x10;
- } else if (bit_index == 5) {
- if (val == 0)
- res = char_[byte_index] & 0xDF;
- else
- res = char_[byte_index] | 0x20;
- } else if (bit_index == 6) {
- if (val == 0)
- res = char_[byte_index] & 0xBF;
- else
- res = char_[byte_index] | 0x40;
- } else if (bit_index == 7) {
- if (val == 0)
- res = char_[byte_index] & 0x7F;
- else
- res = char_[byte_index] | 0x80;
- } else
- assert(false);
- char_[byte_index] = res;
- }
-
-private:
- const int size_;
- unsigned char *char_;
+struct SBitArray {
+ SBitArray(int size) : size_(size) {
+ int bit_size = size / 8;
+ if (size % 8 > 0) bit_size++;
+
+ char_ = new unsigned char[bit_size];
+ memset(char_, 0, bit_size);
+ }
+ ~SBitArray() { delete[] char_; }
+
+ int Get(int index) const {
+ int i;
+
+ i = index;
+ if (i < 0) i = size_ + i;
+ assert(i > -1 && i < size_);
+
+ int byte_index, bit_index;
+ byte_index = i / 8;
+ bit_index = i % 8;
+ unsigned char res;
+ if (bit_index == 0)
+ res = char_[byte_index] & 0x01;
+ else if (bit_index == 1)
+ res = char_[byte_index] & 0x02;
+ else if (bit_index == 2)
+ res = char_[byte_index] & 0x04;
+ else if (bit_index == 3)
+ res = char_[byte_index] & 0x08;
+ else if (bit_index == 4)
+ res = char_[byte_index] & 0x10;
+ else if (bit_index == 5)
+ res = char_[byte_index] & 0x20;
+ else if (bit_index == 6)
+ res = char_[byte_index] & 0x40;
+ else if (bit_index == 7)
+ res = char_[byte_index] & 0x80;
+ else
+ assert(false);
+ if (res != 0)
+ return 1;
+ else
+ return 0;
+ }
+
+ void Set(int index, int val) {
+ assert(val == 0 || val == 1);
+ int i;
+
+ i = index;
+ if (i < 0) i = size_ + i;
+ assert(i > -1 && i < size_);
+
+ int byte_index, bit_index;
+ byte_index = i / 8;
+ bit_index = i % 8;
+ unsigned char res;
+
+ if (bit_index == 0) {
+ if (val == 0)
+ res = char_[byte_index] & 0xFE;
+ else
+ res = char_[byte_index] | 0x01;
+ } else if (bit_index == 1) {
+ if (val == 0)
+ res = char_[byte_index] & 0xFD;
+ else
+ res = char_[byte_index] | 0x02;
+ } else if (bit_index == 2) {
+ if (val == 0)
+ res = char_[byte_index] & 0xFB;
+ else
+ res = char_[byte_index] | 0x04;
+ } else if (bit_index == 3) {
+ if (val == 0)
+ res = char_[byte_index] & 0xF7;
+ else
+ res = char_[byte_index] | 0x08;
+ } else if (bit_index == 4) {
+ if (val == 0)
+ res = char_[byte_index] & 0xEF;
+ else
+ res = char_[byte_index] | 0x10;
+ } else if (bit_index == 5) {
+ if (val == 0)
+ res = char_[byte_index] & 0xDF;
+ else
+ res = char_[byte_index] | 0x20;
+ } else if (bit_index == 6) {
+ if (val == 0)
+ res = char_[byte_index] & 0xBF;
+ else
+ res = char_[byte_index] | 0x40;
+ } else if (bit_index == 7) {
+ if (val == 0)
+ res = char_[byte_index] & 0x7F;
+ else
+ res = char_[byte_index] | 0x80;
+ } else
+ assert(false);
+ char_[byte_index] = res;
+ }
+
+ private:
+ const int size_;
+ unsigned char* char_;
};
inline bool is_inside(int i, int left, int right) {
- if ( i < left || i > right )
- return false;
- return true;
+ if (i < left || i > right) return false;
+ return true;
}
/*
@@ -142,9 +135,8 @@ inline bool is_inside(int i, int left, int right) {
* [i, j] is inside [left, right] or [i, j] equates to [left, right]
*/
inline bool is_inside(int i, int j, int left, int right) {
- if ( i >= left && j <= right )
- return true;
- return false;
+ if (i >= left && j <= right) return true;
+ return false;
}
/*
@@ -152,9 +144,8 @@ inline bool is_inside(int i, int j, int left, int right) {
* [i, j] is inside [left, right], but [i, j] not equal to [left, right]
*/
inline bool is_proper_inside(int i, int j, int left, int right) {
- if ( i >= left && j <= right && right - left > j - i)
- return true;
- return false;
+ if (i >= left && j <= right && right - left > j - i) return true;
+ return false;
}
/*
@@ -162,1157 +153,1218 @@ inline bool is_proper_inside(int i, int j, int left, int right) {
* [i, j] is proper proper inside [left, right]
*/
inline bool is_proper_proper_inside(int i, int j, int left, int right) {
- if ( i > left && j < right)
- return true;
- return false;
+ if (i > left && j < right) return true;
+ return false;
}
inline bool is_overlap(int left1, int right1, int left2, int right2) {
- if (is_inside(left1, left2, right2) || is_inside(left2, left1, right1))
- return true;
+ if (is_inside(left1, left2, right2) || is_inside(left2, left1, right1))
+ return true;
- return false;
+ return false;
}
-inline void NewAndCopyCharArray(char**p, const char* q) {
- if (q != NULL) {
- (*p) = new char[strlen(q) + 1];
- strcpy((*p), q);
- } else
- (*p) = NULL;
+inline void NewAndCopyCharArray(char** p, const char* q) {
+ if (q != NULL) {
+ (*p) = new char[strlen(q) + 1];
+ strcpy((*p), q);
+ } else
+ (*p) = NULL;
}
-//TODO:to make the alignment more efficient
-struct TargetTranslation{
- typedef vector<int> SingleWordAlign;
- TargetTranslation(int begin_pos, int end_pos, int input_begin_pos, int input_end_pos, int e_num_word):
- begin_pos_(begin_pos),
- end_pos_(end_pos),
- input_begin_pos_(input_begin_pos),
- input_end_pos_(input_end_pos),
- e_num_words_(e_num_word),
- vec_left_most_(end_pos - begin_pos + 1, e_num_word),
- vec_right_most_(end_pos - begin_pos + 1, -1),
- vec_f_align_bit_array_(end_pos - begin_pos + 1, NULL),
- vec_e_align_bit_array_(e_num_word, NULL) {
- int len = end_pos - begin_pos + 1;
-
- /*vec_f_align_bit_array_.reserve(len);
- for (int i = 0; i < len; i++)
- vec_f_align_bit_array_.push_back(NULL);
-
- vec_e_align_bit_array_.reserve(e_num_word);
- for (int i = 0; i < e_num_word; i++)
- vec_e_align_bit_array_.push_back(NULL);*/
- align_.reserve(1.5 * len);
- }
- ~TargetTranslation( ) {
- for (size_t i = 0; i < vec_f_align_bit_array_.size(); i++)
- if (vec_f_align_bit_array_[i] != NULL)
- delete vec_f_align_bit_array_[i];
-
- for (size_t i = 0; i < vec_e_align_bit_array_.size(); i++)
- if (vec_e_align_bit_array_[i] != NULL)
- delete vec_e_align_bit_array_[i];
-
- for (size_t i = 0; i < align_.size(); i++)
- delete align_[i];
- }
-
- void InsertAlignmentPoint(int s, int t) {
- int i = s - begin_pos_;
-
- SBitArray* &b = vec_f_align_bit_array_[i];
- if (b == NULL)
- b = new SBitArray(e_num_words_);
- b->Set(t, 1);
-
- SBitArray* &a = vec_e_align_bit_array_[t];
- if (a == NULL)
- a = new SBitArray(end_pos_ - begin_pos_ + 1);
- a->Set(i, 1);
-
- align_.push_back(new AlignmentPoint(s, t));
-
- if (t > vec_right_most_[i])
- vec_right_most_[i] = t;
- if (t < vec_left_most_[i])
- vec_left_most_[i] = t;
- }
-
- /*
- * given a source span [begin, end], whether its target side is continuous,
- * return "0": the source span is translated silently
- * return "1": there is at least on word inside its target span, this word doesn't align to any word inside [begin, end], but outside [begin, end]
- * return "2": otherwise
- */
- string IsTargetConstinousSpan(int begin, int end) const {
- int target_begin, target_end;
- FindLeftRightMostTargetSpan(begin, end, target_begin, target_end);
- if (target_begin == -1) return "0";
-
- for (int i = target_begin; i <= target_end; i++) {
- if (vec_e_align_bit_array_[i] == NULL) continue;
- int j = begin;
- for (; j <= end; j++) {
- if (vec_e_align_bit_array_[i]->Get(j - begin_pos_))
- break;
- }
- if (j == end + 1) //e[i] is aligned, but e[i] doesn't align to any source word in [begin_pos, end_pos]
- return "1";
- }
- return "2";
- }
-
- string IsTargetConstinousSpan2(int begin, int end) const {
- int target_begin, target_end;
- FindLeftRightMostTargetSpan(begin, end, target_begin, target_end);
- if (target_begin == -1) return "Unaligned";
-
- for (int i = target_begin; i <= target_end; i++) {
- if (vec_e_align_bit_array_[i] == NULL) continue;
- int j = begin;
- for (; j <= end; j++) {
- if (vec_e_align_bit_array_[i]->Get(j - begin_pos_))
- break;
- }
- if (j == end + 1) //e[i] is aligned, but e[i] doesn't align to any source word in [begin_pos, end_pos]
- return "Discon't";
- }
- return "Con't";
- }
-
-
-
- void FindLeftRightMostTargetSpan(int begin, int end, int& target_begin, int& target_end) const {
- int b = begin - begin_pos_;
- int e = end - begin_pos_ + 1;
-
- target_begin = vec_left_most_[b];
- target_end = vec_right_most_[b];
- for (int i = b + 1; i < e; i++) {
- if (target_begin > vec_left_most_[i])
- target_begin = vec_left_most_[i];
- if (target_end < vec_right_most_[i])
- target_end = vec_right_most_[i];
- }
- if (target_end == -1)
- target_begin = -1;
- return;
-
- target_begin = e_num_words_;
- target_end = -1;
-
- for (int i = begin - begin_pos_; i < end - begin_pos_ + 1; i++) {
- if (vec_f_align_bit_array_[i] == NULL) continue;
- for (int j = 0; j < target_begin; j++)
- if (vec_f_align_bit_array_[i]->Get(j)) {
- target_begin = j;
- break;
- }
- }
- for (int i = end - begin_pos_; i > begin - begin_pos_ -1; i--) {
- if (vec_f_align_bit_array_[i] == NULL) continue;
- for (int j = e_num_words_ - 1; j > target_end; j--)
- if (vec_f_align_bit_array_[i]->Get(j)) {
- target_end = j;
- break;
- }
- }
-
- if (target_end == -1)
- target_begin = -1;
- }
-
- const uint16_t begin_pos_, end_pos_; //the position in parse
- const uint16_t input_begin_pos_, input_end_pos_; //the position in input
- const uint16_t e_num_words_;
- vector<AlignmentPoint*> align_;
-private:
- vector<SBitArray*> vec_f_align_bit_array_;
- vector<SBitArray*> vec_e_align_bit_array_;
-
- vector<short> vec_left_most_;
- vector<short> vec_right_most_;
-};
+// TODO:to make the alignment more efficient
+struct TargetTranslation {
+ typedef vector<int> SingleWordAlign;
+ TargetTranslation(int begin_pos, int end_pos, int input_begin_pos,
+ int input_end_pos, int e_num_word)
+ : begin_pos_(begin_pos),
+ end_pos_(end_pos),
+ input_begin_pos_(input_begin_pos),
+ input_end_pos_(input_end_pos),
+ e_num_words_(e_num_word),
+ vec_left_most_(end_pos - begin_pos + 1, e_num_word),
+ vec_right_most_(end_pos - begin_pos + 1, -1),
+ vec_f_align_bit_array_(end_pos - begin_pos + 1, NULL),
+ vec_e_align_bit_array_(e_num_word, NULL) {
+ int len = end_pos - begin_pos + 1;
+
+ /*vec_f_align_bit_array_.reserve(len);
+ for (int i = 0; i < len; i++)
+ vec_f_align_bit_array_.push_back(NULL);
+
+ vec_e_align_bit_array_.reserve(e_num_word);
+ for (int i = 0; i < e_num_word; i++)
+ vec_e_align_bit_array_.push_back(NULL);*/
+ align_.reserve(1.5 * len);
+ }
+ ~TargetTranslation() {
+ for (size_t i = 0; i < vec_f_align_bit_array_.size(); i++)
+ if (vec_f_align_bit_array_[i] != NULL) delete vec_f_align_bit_array_[i];
+
+ for (size_t i = 0; i < vec_e_align_bit_array_.size(); i++)
+ if (vec_e_align_bit_array_[i] != NULL) delete vec_e_align_bit_array_[i];
+
+ for (size_t i = 0; i < align_.size(); i++) delete align_[i];
+ }
+
+ void InsertAlignmentPoint(int s, int t) {
+ int i = s - begin_pos_;
+
+ SBitArray*& b = vec_f_align_bit_array_[i];
+ if (b == NULL) b = new SBitArray(e_num_words_);
+ b->Set(t, 1);
+
+ SBitArray*& a = vec_e_align_bit_array_[t];
+ if (a == NULL) a = new SBitArray(end_pos_ - begin_pos_ + 1);
+ a->Set(i, 1);
+
+ align_.push_back(new AlignmentPoint(s, t));
+
+ if (t > vec_right_most_[i]) vec_right_most_[i] = t;
+ if (t < vec_left_most_[i]) vec_left_most_[i] = t;
+ }
+
+ /*
+ * given a source span [begin, end], whether its target side is continuous,
+ * return "0": the source span is translated silently
+ * return "1": there is at least on word inside its target span, this word
+ * doesn't align to any word inside [begin, end], but outside [begin, end]
+ * return "2": otherwise
+ */
+ string IsTargetConstinousSpan(int begin, int end) const {
+ int target_begin, target_end;
+ FindLeftRightMostTargetSpan(begin, end, target_begin, target_end);
+ if (target_begin == -1) return "0";
+
+ for (int i = target_begin; i <= target_end; i++) {
+ if (vec_e_align_bit_array_[i] == NULL) continue;
+ int j = begin;
+ for (; j <= end; j++) {
+ if (vec_e_align_bit_array_[i]->Get(j - begin_pos_)) break;
+ }
+ if (j == end + 1) // e[i] is aligned, but e[i] doesn't align to any
+ // source word in [begin_pos, end_pos]
+ return "1";
+ }
+ return "2";
+ }
+
+ string IsTargetConstinousSpan2(int begin, int end) const {
+ int target_begin, target_end;
+ FindLeftRightMostTargetSpan(begin, end, target_begin, target_end);
+ if (target_begin == -1) return "Unaligned";
+
+ for (int i = target_begin; i <= target_end; i++) {
+ if (vec_e_align_bit_array_[i] == NULL) continue;
+ int j = begin;
+ for (; j <= end; j++) {
+ if (vec_e_align_bit_array_[i]->Get(j - begin_pos_)) break;
+ }
+ if (j == end + 1) // e[i] is aligned, but e[i] doesn't align to any
+ // source word in [begin_pos, end_pos]
+ return "Discon't";
+ }
+ return "Con't";
+ }
+
+ void FindLeftRightMostTargetSpan(int begin, int end, int& target_begin,
+ int& target_end) const {
+ int b = begin - begin_pos_;
+ int e = end - begin_pos_ + 1;
+
+ target_begin = vec_left_most_[b];
+ target_end = vec_right_most_[b];
+ for (int i = b + 1; i < e; i++) {
+ if (target_begin > vec_left_most_[i]) target_begin = vec_left_most_[i];
+ if (target_end < vec_right_most_[i]) target_end = vec_right_most_[i];
+ }
+ if (target_end == -1) target_begin = -1;
+ return;
+
+ target_begin = e_num_words_;
+ target_end = -1;
+
+ for (int i = begin - begin_pos_; i < end - begin_pos_ + 1; i++) {
+ if (vec_f_align_bit_array_[i] == NULL) continue;
+ for (int j = 0; j < target_begin; j++)
+ if (vec_f_align_bit_array_[i]->Get(j)) {
+ target_begin = j;
+ break;
+ }
+ }
+ for (int i = end - begin_pos_; i > begin - begin_pos_ - 1; i--) {
+ if (vec_f_align_bit_array_[i] == NULL) continue;
+ for (int j = e_num_words_ - 1; j > target_end; j--)
+ if (vec_f_align_bit_array_[i]->Get(j)) {
+ target_end = j;
+ break;
+ }
+ }
+
+ if (target_end == -1) target_begin = -1;
+ }
-struct FocusedConstituent{
- FocusedConstituent(const SParsedTree *pTree) {
- if (pTree == NULL) return;
- for (size_t i = 0; i < pTree->m_vecTerminals.size(); i++) {
- STreeItem *pParent = pTree->m_vecTerminals[i]->m_ptParent;
-
- while (pParent != NULL) {
- //if (pParent->m_vecChildren.size() > 1 && pParent->m_iEnd - pParent->m_iBegin > 5) {
- //if (pParent->m_vecChildren.size() > 1) {
- if (true) {
-
- //do constituent reordering for all children of pParent
- if (strcmp(pParent->m_pszTerm, "ROOT"))
- focus_parents_.push_back(pParent);
- }
- if (pParent->m_iBrotherIndex != 0) break;
- pParent = pParent->m_ptParent;
- }
- }
- }
-
- ~FocusedConstituent() { //TODO
- focus_parents_.clear();
- }
-
- vector<STreeItem*> focus_parents_;
+ const uint16_t begin_pos_, end_pos_; // the position in parse
+ const uint16_t input_begin_pos_, input_end_pos_; // the position in input
+ const uint16_t e_num_words_;
+ vector<AlignmentPoint*> align_;
+
+ private:
+ vector<SBitArray*> vec_f_align_bit_array_;
+ vector<SBitArray*> vec_e_align_bit_array_;
+
+ vector<short> vec_left_most_;
+ vector<short> vec_right_most_;
};
+struct FocusedConstituent {
+ FocusedConstituent(const SParsedTree* pTree) {
+ if (pTree == NULL) return;
+ for (size_t i = 0; i < pTree->m_vecTerminals.size(); i++) {
+ STreeItem* pParent = pTree->m_vecTerminals[i]->m_ptParent;
+
+ while (pParent != NULL) {
+ // if (pParent->m_vecChildren.size() > 1 && pParent->m_iEnd -
+ // pParent->m_iBegin > 5) {
+ // if (pParent->m_vecChildren.size() > 1) {
+ if (true) {
+
+ // do constituent reordering for all children of pParent
+ if (strcmp(pParent->m_pszTerm, "ROOT"))
+ focus_parents_.push_back(pParent);
+ }
+ if (pParent->m_iBrotherIndex != 0) break;
+ pParent = pParent->m_ptParent;
+ }
+ }
+ }
+
+ ~FocusedConstituent() { // TODO
+ focus_parents_.clear();
+ }
+
+ vector<STreeItem*> focus_parents_;
+};
typedef SPredicateItem FocusedPredicate;
-struct FocusedSRL{
- FocusedSRL(const SSrlSentence *srl) {
- if (srl == NULL) return;
- for (size_t i = 0; i < srl->m_vecPred.size(); i++) {
- if (strcmp(srl->m_pTree->m_vecTerminals[srl->m_vecPred[i]->m_iPosition]->m_ptParent->m_pszTerm, "VA") == 0)
- continue;
- focus_predicates_.push_back(new FocusedPredicate(srl->m_pTree, srl->m_vecPred[i]));
- }
- }
-
- ~FocusedSRL() {
- focus_predicates_.clear();
- }
-
- vector<const FocusedPredicate*> focus_predicates_;
+struct FocusedSRL {
+ FocusedSRL(const SSrlSentence* srl) {
+ if (srl == NULL) return;
+ for (size_t i = 0; i < srl->m_vecPred.size(); i++) {
+ if (strcmp(srl->m_pTree->m_vecTerminals[srl->m_vecPred[i]->m_iPosition]
+ ->m_ptParent->m_pszTerm,
+ "VA") == 0)
+ continue;
+ focus_predicates_.push_back(
+ new FocusedPredicate(srl->m_pTree, srl->m_vecPred[i]));
+ }
+ }
+
+ ~FocusedSRL() { focus_predicates_.clear(); }
+
+ vector<const FocusedPredicate*> focus_predicates_;
};
/*
* Note:
- * In BOLT experiments, we need to merged some sequence words into one term (like from "1999 nian 1 yue 10 ri" to "1999_nian_1_yue_10_ri") due to some reasons;
- * but in the parse file, we still use the parse tree before merging any words;
- * therefore, the words in source sentence and parse tree diverse and we need to map a word in merged sentence into its original index;
+ * In BOLT experiments, we need to merged some sequence words into one term
+ *(like from "1999 nian 1 yue 10 ri" to "1999_nian_1_yue_10_ri") due to some
+ *reasons;
+ * but in the parse file, we still use the parse tree before merging any
+ *words;
+ * therefore, the words in source sentence and parse tree diverse and we
+ *need to map a word in merged sentence into its original index;
* a word in source sentence maps 1 or more words in parse tree
* the index map info is stored at variable index_map_;
- * if the index_map_ is NULL, indicating the word index in source sentence and parse tree is always same.
+ * if the index_map_ is NULL, indicating the word index in source sentence
+ *and parse tree is always same.
*
- * In ConstReorderFeatureImpl, as to store alignment info, we use the word index of the parse tree
+ * In ConstReorderFeatureImpl, as to store alignment info, we use the word
+ *index of the parse tree
*/
-struct SIndexMap{
- SIndexMap(const string& index_map_file) {
- if (index_map_file == "") {
- index_map_input_2_parse = NULL;
- index_map_parse_2_input = NULL;
- return;
- }
- STxtFileReader *reader = new STxtFileReader(index_map_file.c_str());
- char szLine[10001];
- szLine[0] = '\0';
- reader->fnReadNextLine(szLine, NULL);
- delete reader;
- vector<string> terms;
- SplitOnWhitespace(string(szLine), &terms);
-
- index_map_input_2_parse = new short int[terms.size() + 1];
- int ix = 0;
- size_t i;
- for (i = 0; i < terms.size(); i++) {
- index_map_input_2_parse[i] = ix;
- ix += atoi(terms[i].c_str());
- }
- index_map_input_2_parse[i] = ix;
- //assert(ix == parsed_tree_->m_vecTerminals.size());
-
- index_map_parse_2_input = new short int[ix+1];
- int jx = 0;
- for (i = 0; i < terms.size(); i++) {
- int num_word = atoi(terms[i].c_str());
- for (int j = 0; j < num_word; j++)
- index_map_parse_2_input[jx++] = i;
- }
- index_map_parse_2_input[jx] = i;
- assert(jx == ix);
- }
-
- ~SIndexMap() {
- if (index_map_input_2_parse != NULL)
- delete index_map_input_2_parse;
- if (index_map_parse_2_input != NULL)
- delete index_map_parse_2_input;
- }
-
- /*
- * an input word maps to 1 or more words in parse
- */
- void MapIndex_Input_2_Parse(short int ix, short int& mapped_begin, short int& mapped_end) {
- MapIndex_Input_2_Parse(ix, ix, mapped_begin, mapped_end);
- }
-
- /*
- * given the indices in input,
- * return the indices in parse tree
- */
- void MapIndex_Input_2_Parse(short int begin, short int end, short int& mapped_begin, short int& mapped_end) {
- if (index_map_input_2_parse == NULL) {
- mapped_begin = begin;
- mapped_end = end;
- return;
- }
-
- mapped_begin = index_map_input_2_parse[begin];
- mapped_end = index_map_input_2_parse[end + 1] - 1;
- }
-
- /*
- * given the indices in input,
- * return the indices in parse tree
- */
- void MapIndex_Parse_2_Input(short int mapped_begin, short int mapped_end, short int& begin, short int& end) {
- if (index_map_parse_2_input == NULL) {
- begin = mapped_begin;
- end = mapped_end;
- return;
- }
-
- begin = index_map_parse_2_input[mapped_begin];
- end = index_map_parse_2_input[mapped_end];
-
- assert(mapped_begin == 0 || index_map_parse_2_input[mapped_begin - 1] != index_map_parse_2_input[mapped_begin]);
- assert(index_map_parse_2_input[mapped_end + 1] != index_map_parse_2_input[mapped_end]);
- }
-
- /*
- * given a index in input
- * return the number of its corresponding words in parse tree
- */
- int MapIndexWordCount(int ix) {
- if (index_map_input_2_parse == NULL)
- return 1;
- return index_map_input_2_parse[ix + 1] - index_map_input_2_parse[ix];
- }
-
-private:
-
- short int *index_map_input_2_parse;
- short int *index_map_parse_2_input;
+struct SIndexMap {
+ SIndexMap(const string& index_map_file) {
+ if (index_map_file == "") {
+ index_map_input_2_parse = NULL;
+ index_map_parse_2_input = NULL;
+ return;
+ }
+ STxtFileReader* reader = new STxtFileReader(index_map_file.c_str());
+ char szLine[10001];
+ szLine[0] = '\0';
+ reader->fnReadNextLine(szLine, NULL);
+ delete reader;
+ vector<string> terms;
+ SplitOnWhitespace(string(szLine), &terms);
+
+ index_map_input_2_parse = new short int[terms.size() + 1];
+ int ix = 0;
+ size_t i;
+ for (i = 0; i < terms.size(); i++) {
+ index_map_input_2_parse[i] = ix;
+ ix += atoi(terms[i].c_str());
+ }
+ index_map_input_2_parse[i] = ix;
+ // assert(ix == parsed_tree_->m_vecTerminals.size());
+
+ index_map_parse_2_input = new short int[ix + 1];
+ int jx = 0;
+ for (i = 0; i < terms.size(); i++) {
+ int num_word = atoi(terms[i].c_str());
+ for (int j = 0; j < num_word; j++) index_map_parse_2_input[jx++] = i;
+ }
+ index_map_parse_2_input[jx] = i;
+ assert(jx == ix);
+ }
+
+ ~SIndexMap() {
+ if (index_map_input_2_parse != NULL) delete index_map_input_2_parse;
+ if (index_map_parse_2_input != NULL) delete index_map_parse_2_input;
+ }
+
+ /*
+ * an input word maps to 1 or more words in parse
+ */
+ void MapIndex_Input_2_Parse(short int ix, short int& mapped_begin,
+ short int& mapped_end) {
+ MapIndex_Input_2_Parse(ix, ix, mapped_begin, mapped_end);
+ }
+
+ /*
+ * given the indices in input,
+ * return the indices in parse tree
+ */
+ void MapIndex_Input_2_Parse(short int begin, short int end,
+ short int& mapped_begin, short int& mapped_end) {
+ if (index_map_input_2_parse == NULL) {
+ mapped_begin = begin;
+ mapped_end = end;
+ return;
+ }
+
+ mapped_begin = index_map_input_2_parse[begin];
+ mapped_end = index_map_input_2_parse[end + 1] - 1;
+ }
+
+ /*
+ * given the indices in input,
+ * return the indices in parse tree
+ */
+ void MapIndex_Parse_2_Input(short int mapped_begin, short int mapped_end,
+ short int& begin, short int& end) {
+ if (index_map_parse_2_input == NULL) {
+ begin = mapped_begin;
+ end = mapped_end;
+ return;
+ }
+
+ begin = index_map_parse_2_input[mapped_begin];
+ end = index_map_parse_2_input[mapped_end];
+
+ assert(mapped_begin == 0 || index_map_parse_2_input[mapped_begin - 1] !=
+ index_map_parse_2_input[mapped_begin]);
+ assert(index_map_parse_2_input[mapped_end + 1] !=
+ index_map_parse_2_input[mapped_end]);
+ }
+
+ /*
+ * given a index in input
+ * return the number of its corresponding words in parse tree
+ */
+ int MapIndexWordCount(int ix) {
+ if (index_map_input_2_parse == NULL) return 1;
+ return index_map_input_2_parse[ix + 1] - index_map_input_2_parse[ix];
+ }
+
+ private:
+ short int* index_map_input_2_parse;
+ short int* index_map_parse_2_input;
};
-struct ConstReorderFeatureImpl{
- ConstReorderFeatureImpl(const std::string& param) {
-
- b_block_feature_ = false;
- b_order_feature_ = false;
- b_srl_block_feature_ = false;
- b_srl_order_feature_ = false;
-
-
- vector<string> terms;
- SplitOnWhitespace(param, &terms);
- if (terms.size() == 1) {
- b_block_feature_ = true;
- b_order_feature_ = true;
- } else if (terms.size() >= 3) {
- if (terms[1].compare("1") == 0)
- b_block_feature_ = true;
- if (terms[2].compare("1") == 0)
- b_order_feature_ = true;
- if (terms.size() == 6) {
- if (terms[4].compare("1") == 0)
- b_srl_block_feature_ = true;
- if (terms[5].compare("1") == 0)
- b_srl_order_feature_ = true;
-
- assert(b_srl_block_feature_ || b_srl_order_feature_);
- }
-
- } else {
- assert("ERROR");
- }
-
- const_reorder_classifier_left_ = NULL;
- const_reorder_classifier_right_ = NULL;
-
- srl_reorder_classifier_left_ = NULL;
- srl_reorder_classifier_right_ = NULL;
-
- if (b_order_feature_) {
- InitializeClassifier((terms[0] + string(".left")).c_str(), &const_reorder_classifier_left_);
- InitializeClassifier((terms[0] + string(".right")).c_str(), &const_reorder_classifier_right_);
- }
-
- if (b_srl_order_feature_) {
- InitializeClassifier((terms[3] + string(".left")).c_str(), &srl_reorder_classifier_left_);
- InitializeClassifier((terms[3] + string(".right")).c_str(), &srl_reorder_classifier_right_);
- }
-
- parsed_tree_ = NULL;
- focused_consts_ = NULL;
- index_map_ = NULL;
-
- srl_sentence_ = NULL;
- focused_srl_ = NULL;
-
- map_left_ = NULL;
- map_right_ = NULL;
-
- map_srl_left_ = NULL;
- map_srl_right_ = NULL;
-
- dict_block_status_ = new Dict();
- dict_block_status_->Convert("Unaligned", false);
- dict_block_status_->Convert("Discon't", false);
- dict_block_status_->Convert("Con't", false);
- }
- ~ConstReorderFeatureImpl() {
- if (const_reorder_classifier_left_)
- delete const_reorder_classifier_left_;
- if (const_reorder_classifier_right_)
- delete const_reorder_classifier_right_;
- if (srl_reorder_classifier_left_)
- delete srl_reorder_classifier_left_;
- if (srl_reorder_classifier_right_)
- delete srl_reorder_classifier_right_;
- FreeSentenceVariables();
-
- delete dict_block_status_;
- }
-
- static int ReserveStateSize() {
- return 1 * sizeof(TargetTranslation*);
- }
-
- void InitializeInputSentence(const std::string& parse_file, const std::string& srl_file, const std::string& index_map_file) {
- FreeSentenceVariables();
- if (b_srl_block_feature_ || b_srl_order_feature_) {
- assert(srl_file != "");
- srl_sentence_ = ReadSRLSentence(srl_file);
- parsed_tree_ = srl_sentence_->m_pTree;
- } else {
- assert(parse_file != "");
- srl_sentence_ = NULL;
- parsed_tree_ = ReadParseTree(parse_file);
- }
-
- if (b_block_feature_ || b_order_feature_) {
- focused_consts_ = new FocusedConstituent(parsed_tree_);
-
- if (b_order_feature_) {
- //we can do the classifier "off-line"
- map_left_ = new MapClassifier();
- map_right_ = new MapClassifier();
- InitializeConstReorderClassifierOutput();
- }
- }
-
- if (b_srl_block_feature_ || b_srl_order_feature_) {
- focused_srl_ = new FocusedSRL(srl_sentence_);
-
- if (b_srl_order_feature_) {
- map_srl_left_ = new MapClassifier();
- map_srl_right_ = new MapClassifier();
- InitializeSRLReorderClassifierOutput();
- }
- }
-
- index_map_ = new SIndexMap(index_map_file);
-
- if (parsed_tree_ != NULL) {
- size_t i = parsed_tree_->m_vecTerminals.size();
- vec_target_tran_.reserve(20 * i * i * i);
- } else
- vec_target_tran_.reserve(1000000);
- }
-
- void SetConstReorderFeature(const Hypergraph::Edge& edge, SparseVector<double>* features, const vector<const void*>& ant_states, void* state) {
- if (parsed_tree_ == NULL) return;
-
- short int mapped_begin, mapped_end;
- index_map_->MapIndex_Input_2_Parse(edge.i_, edge.j_ - 1, mapped_begin, mapped_end);
-
- typedef TargetTranslation* PtrTargetTranslation;
- PtrTargetTranslation* remnant = reinterpret_cast<PtrTargetTranslation*>(state);
-
- vector<const TargetTranslation*> vec_node;
- vec_node.reserve(edge.tail_nodes_.size());
- for (size_t i = 0; i < edge.tail_nodes_.size(); i++) {
- const PtrTargetTranslation* astate = reinterpret_cast<const PtrTargetTranslation*>(ant_states[i]);
- vec_node.push_back(astate[0]);
- }
-
- int e_num_word = edge.rule_->e_.size();
- for (size_t i = 0; i < vec_node.size(); i++) {
- e_num_word += vec_node[i]->e_num_words_;
- e_num_word--;
- }
-
- remnant[0] = new TargetTranslation(mapped_begin, mapped_end, edge.i_, edge.j_ - 1, e_num_word);
- vec_target_tran_.push_back(remnant[0]);
-
- //reset the alignment
- //for the source side, we know its position in source sentence
- //for the target side, we always assume its starting position is 0
- unsigned vc = 0;
- const TRulePtr rule = edge.rule_;
- std::vector<int> f_index(rule->f_.size());
- int index = edge.i_;
- for (unsigned i = 0; i < rule->f_.size(); i++) {
- f_index[i] = index;
- const WordID& c = rule->f_[i];
- if (c < 1)
- index = vec_node[vc++]->input_end_pos_ + 1;
- else
- index++;
- }
- assert(vc == vec_node.size());
- assert(index == edge.j_);
-
- std::vector<int> e_index(rule->e_.size());
- index = 0;
- vc = 0;
- for (unsigned i = 0; i < rule->e_.size(); i++) {
- e_index[i] = index;
- const WordID& c = rule->e_[i];
- if (c < 1) {
- index += vec_node[-c]->e_num_words_;
- vc++;
- }
- else
- index++;
- }
- assert(vc == vec_node.size());
-
- size_t nt_pos = 0;
- for (size_t i = 0; i < edge.rule_->f_.size(); i++) {
- if (edge.rule_->f_[i] > 0) continue;
-
- //it's an NT
- size_t j;
- for (j = 0; j < edge.rule_->e_.size(); j++)
- if (edge.rule_->e_[j] * -1 == nt_pos)
- break;
- assert(j != edge.rule_->e_.size());
- nt_pos++;
-
- //i aligns j
- int eindex = e_index[j];
- const vector<AlignmentPoint*>& align = vec_node[-1 * edge.rule_->e_[j]]->align_;
- for (size_t k = 0; k < align.size(); k++) {
- remnant[0]->InsertAlignmentPoint(align[k]->s_, eindex + align[k]->t_);
- }
- }
- for (size_t i = 0; i < edge.rule_->a_.size(); i++) {
- short int parse_index_begin, parse_index_end;
- index_map_->MapIndex_Input_2_Parse(f_index[edge.rule_->a_[i].s_], parse_index_begin, parse_index_end);
- int findex = parse_index_begin;
- int eindex = e_index[edge.rule_->a_[i].t_];
- int word_count = index_map_->MapIndexWordCount(f_index[edge.rule_->a_[i].s_]);
- assert(word_count == parse_index_end - parse_index_begin + 1);
- for (int i = 0; i < word_count; i++)
- remnant[0]->InsertAlignmentPoint(findex + i, eindex);
- }
-
-
- //till now, we finished setting state values
- //next, use the state values to calculate constituent reorder feature
- SetConstReorderFeature(mapped_begin, mapped_end, features, remnant[0], vec_node, f_index);
- }
-
- void SetConstReorderFeature(short int mapped_begin, short int mapped_end, SparseVector<double>* features, const TargetTranslation* target_translation, const vector<const TargetTranslation*>& vec_node, std::vector<int>& findex) {
- if (b_srl_block_feature_ || b_srl_order_feature_){
- double logprob_srl_reorder_left = 0.0, logprob_srl_reorder_right = 0.0;
- for (size_t i = 0; i < focused_srl_->focus_predicates_.size(); i++) {
- const FocusedPredicate* pred = focused_srl_->focus_predicates_[i];
- if (!is_overlap(mapped_begin, mapped_end, pred->begin_, pred->end_)) continue; //have no overlap between this predicate (with its argument) and the current edge
-
- size_t j;
- for (j = 0; j < vec_node.size(); j++) {
- if (is_inside(pred->begin_, pred->end_, vec_node[j]->begin_pos_, vec_node[j]->end_pos_))
- break;
- }
- if (j < vec_node.size()) continue;
-
- vector<int> vecBlockStatus;
- vecBlockStatus.reserve(pred->vec_items_.size());
- for (j = 0; j < pred->vec_items_.size(); j++) {
- const STreeItem *con1 = pred->vec_items_[j]->tree_item_;
- if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {vecBlockStatus.push_back(0); continue;} //the node is partially outside the current edge
-
- string type = target_translation->IsTargetConstinousSpan2(con1->m_iBegin, con1->m_iEnd);
- vecBlockStatus.push_back(dict_block_status_->Convert(type, false));
-
- if (!b_srl_block_feature_) continue;
- //see if the node is covered by an NT
- size_t k;
- for (k = 0; k < vec_node.size(); k++) {
- if (is_inside(con1->m_iBegin, con1->m_iEnd, vec_node[k]->begin_pos_, vec_node[k]->end_pos_))
- break;
- }
- if (k < vec_node.size()) continue;
- int f_id = FD::Convert(string(pred->vec_items_[j]->role_) + type);
- if (f_id)
- features->add_value(f_id, 1);
- }
-
- if (!b_srl_order_feature_) continue;
-
- vector<int> vecPosition, vecRelativePosition;
- vector<int> vecRightPosition, vecRelativeRightPosition;
- vecPosition.reserve(pred->vec_items_.size());
- vecRelativePosition.reserve(pred->vec_items_.size());
- vecRightPosition.reserve(pred->vec_items_.size());
- vecRelativeRightPosition.reserve(pred->vec_items_.size());
- for (j = 0; j < pred->vec_items_.size(); j++) {
- const STreeItem *con1 = pred->vec_items_[j]->tree_item_;
- if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {vecPosition.push_back(-1); vecRightPosition.push_back(-1);continue;} //the node is partially outside the current edge
- int left1 = -1, right1 = -1;
- target_translation->FindLeftRightMostTargetSpan(con1->m_iBegin, con1->m_iEnd, left1, right1);
- vecPosition.push_back(left1);
- vecRightPosition.push_back(right1);
- }
- fnGetRelativePosition(vecPosition, vecRelativePosition);
- fnGetRelativePosition(vecRightPosition, vecRelativeRightPosition);
-
- for (j = 1; j < pred->vec_items_.size(); j++) {
- const STreeItem *con1 = pred->vec_items_[j - 1]->tree_item_;
- const STreeItem *con2 = pred->vec_items_[j]->tree_item_;
-
- if (con1->m_iBegin < mapped_begin || con2->m_iEnd > mapped_end) continue; //one of the two nodes is partially outside the current edge
-
- //both con1 and con2 are covered, need to check if they are covered by the same NT
- size_t k;
- for (k = 0; k < vec_node.size(); k++) {
- if (is_inside(con1->m_iBegin, con2->m_iEnd, vec_node[k]->begin_pos_, vec_node[k]->end_pos_))
- break;
- }
- if (k < vec_node.size()) continue;
-
- //they are not covered bye the same NT
- string outcome;
- string key;
- GenerateKey(pred->vec_items_[j-1]->tree_item_, pred->vec_items_[j]->tree_item_, vecBlockStatus[j-1], vecBlockStatus[j], key);
-
- fnGetOutcome(vecRelativePosition[j - 1], vecRelativePosition[j], outcome);
- double prob = CalculateConstReorderProb(srl_reorder_classifier_left_, map_srl_left_, key, outcome);
- //printf("%s %s %f\n", ostr.str().c_str(), outcome.c_str(), prob);
- logprob_srl_reorder_left += log10(prob);
-
-
- fnGetOutcome(vecRelativeRightPosition[j - 1], vecRelativeRightPosition[j], outcome);
- prob = CalculateConstReorderProb(srl_reorder_classifier_right_, map_srl_right_, key, outcome);
- logprob_srl_reorder_right += log10(prob);
- }
- }
-
- if (b_srl_order_feature_) {
- int f_id = FD::Convert("SRLReorderFeatureLeft");
- if (f_id && logprob_srl_reorder_left != 0.0)
- features->set_value(f_id, logprob_srl_reorder_left);
- f_id = FD::Convert("SRLReorderFeatureRight");
- if (f_id && logprob_srl_reorder_right != 0.0)
- features->set_value(f_id, logprob_srl_reorder_right);
- }
- }
-
- if (b_block_feature_ || b_order_feature_){
- double logprob_const_reorder_left = 0.0, logprob_const_reorder_right = 0.0;
-
- for (size_t i = 0; i < focused_consts_->focus_parents_.size(); i++) {
- STreeItem* parent = focused_consts_->focus_parents_[i];
- if (!is_overlap(mapped_begin, mapped_end, parent->m_iBegin, parent->m_iEnd)) continue; //have no overlap between this parent node and the current edge
-
- size_t j;
- for (j = 0; j < vec_node.size(); j++) {
- if (is_inside(parent->m_iBegin, parent->m_iEnd, vec_node[j]->begin_pos_, vec_node[j]->end_pos_))
- break;
- }
- if (j < vec_node.size()) continue;
-
-
- if (b_block_feature_) {
- if (parent->m_iBegin >= mapped_begin && parent->m_iEnd <= mapped_end) {
- string type = target_translation->IsTargetConstinousSpan2(parent->m_iBegin, parent->m_iEnd);
- int f_id = FD::Convert(string(parent->m_pszTerm) + type);
- if (f_id)
- features->add_value(f_id, 1);
- }
- }
-
- if (parent->m_vecChildren.size() == 1 || !b_order_feature_) continue;
-
- vector<int> vecChunkBlock;
- vecChunkBlock.reserve(parent->m_vecChildren.size());
-
- for (j = 0; j < parent->m_vecChildren.size(); j++) {
- STreeItem *con1 = parent->m_vecChildren[j];
- if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {vecChunkBlock.push_back(0); continue;} //the node is partially outside the current edge
-
- string type = target_translation->IsTargetConstinousSpan2(con1->m_iBegin, con1->m_iEnd);
- vecChunkBlock.push_back(dict_block_status_->Convert(type, false));
-
- /*if (!b_block_feature_) continue;
- //see if the node is covered by an NT
- size_t k;
- for (k = 0; k < vec_node.size(); k++) {
- if (is_inside(con1->m_iBegin, con1->m_iEnd, vec_node[k]->begin_pos_, vec_node[k]->end_pos_))
- break;
- }
- if (k < vec_node.size()) continue;
- int f_id = FD::Convert(string(con1->m_pszTerm) + type);
- if (f_id)
- features->add_value(f_id, 1);*/
- }
-
- if (!b_order_feature_) continue;
-
- vector<int> vecPosition, vecRelativePosition;
- vector<int> vecRightPosition, vecRelativeRightPosition;
- vecPosition.reserve(parent->m_vecChildren.size());
- vecRelativePosition.reserve(parent->m_vecChildren.size());
- vecRightPosition.reserve(parent->m_vecChildren.size());
- vecRelativeRightPosition.reserve(parent->m_vecChildren.size());
- for (j = 0; j < parent->m_vecChildren.size(); j++) {
- STreeItem *con1 = parent->m_vecChildren[j];
- if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {vecPosition.push_back(-1); vecRightPosition.push_back(-1);continue;} //the node is partially outside the current edge
- int left1 = -1, right1 = -1;
- target_translation->FindLeftRightMostTargetSpan(con1->m_iBegin, con1->m_iEnd, left1, right1);
- vecPosition.push_back(left1);
- vecRightPosition.push_back(right1);
- }
- fnGetRelativePosition(vecPosition, vecRelativePosition);
- fnGetRelativePosition(vecRightPosition, vecRelativeRightPosition);
-
- for (j = 1; j < parent->m_vecChildren.size(); j++) {
- STreeItem *con1 = parent->m_vecChildren[j - 1];
- STreeItem *con2 = parent->m_vecChildren[j];
-
- if (con1->m_iBegin < mapped_begin || con2->m_iEnd > mapped_end) continue; //one of the two nodes is partially outside the current edge
-
- //both con1 and con2 are covered, need to check if they are covered by the same NT
- size_t k;
- for (k = 0; k < vec_node.size(); k++) {
- if (is_inside(con1->m_iBegin, con2->m_iEnd, vec_node[k]->begin_pos_, vec_node[k]->end_pos_))
- break;
- }
- if (k < vec_node.size()) continue;
-
- //they are not covered bye the same NT
- string outcome;
- string key;
- GenerateKey(parent->m_vecChildren[j-1], parent->m_vecChildren[j], vecChunkBlock[j-1], vecChunkBlock[j], key);
-
- fnGetOutcome(vecRelativePosition[j - 1], vecRelativePosition[j], outcome);
- double prob = CalculateConstReorderProb(const_reorder_classifier_left_, map_left_, key, outcome);
- //printf("%s %s %f\n", ostr.str().c_str(), outcome.c_str(), prob);
- logprob_const_reorder_left += log10(prob);
-
-
- fnGetOutcome(vecRelativeRightPosition[j - 1], vecRelativeRightPosition[j], outcome);
- prob = CalculateConstReorderProb(const_reorder_classifier_right_, map_right_, key, outcome);
- logprob_const_reorder_right += log10(prob);
- }
- }
-
- if (b_order_feature_) {
- int f_id = FD::Convert("ConstReorderFeatureLeft");
- if (f_id && logprob_const_reorder_left != 0.0)
- features->set_value(f_id, logprob_const_reorder_left);
- f_id = FD::Convert("ConstReorderFeatureRight");
- if (f_id && logprob_const_reorder_right != 0.0)
- features->set_value(f_id, logprob_const_reorder_right);
- }
- }
-
- }
-
-
-private:
- void Byte_to_Char(unsigned char *str, int n) {
- str[0] = (n & 255);
- str[1] = n / 256;
- }
- void GenerateKey(const STreeItem *pCon1, const STreeItem *pCon2, int iBlockStatus1, int iBlockStatus2, string& key) {
- assert(iBlockStatus1 != 0);
- assert(iBlockStatus2 != 0);
- unsigned char szTerm[ 1001 ];
- Byte_to_Char(szTerm, pCon1->m_iBegin);
- Byte_to_Char(szTerm + 2, pCon2->m_iEnd);
- szTerm[4] = (char)iBlockStatus1;
- szTerm[5] = (char)iBlockStatus2;
- szTerm[6] = '\0';
- //sprintf(szTerm, "%d|%d|%d|%d|%s|%s", pCon1->m_iBegin, pCon1->m_iEnd, pCon2->m_iBegin, pCon2->m_iEnd, strBlockStatus1.c_str(), strBlockStatus2.c_str());
- key = string(szTerm, szTerm + 6);
-
- }
- void InitializeConstReorderClassifierOutput( ) {
- if (!b_order_feature_) return;
- int size_block_status = dict_block_status_->max();
-
- for (size_t i = 0; i < focused_consts_->focus_parents_.size(); i++) {
- STreeItem* parent = focused_consts_->focus_parents_[i];
-
- for (size_t j = 1; j < parent->m_vecChildren.size(); j++) {
- for (size_t k = 1; k <= size_block_status; k++) {
- for (size_t l = 1; l <= size_block_status; l++) {
- ostringstream ostr;
- GenerateFeature(parsed_tree_, parent, j, dict_block_status_->Convert(k), dict_block_status_->Convert(l), ostr);
-
- string strKey;
- GenerateKey(parent->m_vecChildren[j-1], parent->m_vecChildren[j], k, l, strKey);
-
- vector<double> vecOutput;
- const_reorder_classifier_left_->fnEval(ostr.str().c_str(), vecOutput);
- (*map_left_)[strKey] = vecOutput;
-
- const_reorder_classifier_right_->fnEval(ostr.str().c_str(), vecOutput);
- (*map_right_)[strKey] = vecOutput;
- }
- }
- }
- }
-
- }
-
- void InitializeSRLReorderClassifierOutput() {
- if (!b_srl_order_feature_) return;
- int size_block_status = dict_block_status_->max();
-
- for (size_t i = 0; i < focused_srl_->focus_predicates_.size(); i++) {
- const FocusedPredicate *pred = focused_srl_->focus_predicates_[i];
-
- for (size_t j = 1; j < pred->vec_items_.size(); j++) {
- for (size_t k = 1; k <= size_block_status; k++) {
- for (size_t l = 1; l <= size_block_status; l++) {
- ostringstream ostr;
-
- SArgumentReorderModel::fnGenerateFeature(parsed_tree_, pred->pred_, pred, j, dict_block_status_->Convert(k), dict_block_status_->Convert(l), ostr);
-
- string strKey;
- GenerateKey(pred->vec_items_[j - 1]->tree_item_, pred->vec_items_[j]->tree_item_, k, l, strKey);
-
- vector<double> vecOutput;
- srl_reorder_classifier_left_->fnEval(ostr.str().c_str(), vecOutput);
- (*map_srl_left_)[strKey] = vecOutput;
-
- srl_reorder_classifier_right_->fnEval(ostr.str().c_str(), vecOutput);
- (*map_srl_right_)[strKey] = vecOutput;
- }
- }
- }
- }
- }
-
- double CalculateConstReorderProb(const Tsuruoka_Maxent *const_reorder_classifier, const MapClassifier *map, const string& key, const string& outcome) {
- MapClassifier::const_iterator iter = (*map).find(key);
- assert(iter != map->end());
- int id = const_reorder_classifier->fnGetClassId(outcome);
- return iter->second[id];
- }
-
- void FreeSentenceVariables( ) {
- if (srl_sentence_ != NULL) {
- delete srl_sentence_;
- srl_sentence_ = NULL;
- }
- else {
- if (parsed_tree_ != NULL)
- delete parsed_tree_;
- parsed_tree_ = NULL;
- }
-
- if (focused_consts_ != NULL)
- delete focused_consts_;
- focused_consts_ = NULL;
-
- for (size_t i = 0; i < vec_target_tran_.size(); i++)
- delete vec_target_tran_[i];
- vec_target_tran_.clear();
-
- if (index_map_ != NULL)
- delete index_map_;
- index_map_ = NULL;
-
- if (map_left_ != NULL)
- delete map_left_;
- map_left_ = NULL;
- if (map_right_ != NULL)
- delete map_right_;
- map_right_ = NULL;
-
- if (map_srl_left_ != NULL)
- delete map_srl_left_;
- map_srl_left_ = NULL;
- if (map_srl_right_ != NULL)
- delete map_srl_right_;
- map_srl_right_ = NULL;
- }
-
- void InitializeClassifier(const char* pszFname, Tsuruoka_Maxent **ppClassifier) {
- (*ppClassifier) = new Tsuruoka_Maxent(pszFname);
- }
-
- void GenerateOutcome(const vector<int>& vecPos, vector<string>& vecOutcome) {
- vecOutcome.clear();
-
- for (size_t i = 1; i < vecPos.size(); i++) {
- if (vecPos[i] == -1 || vecPos[i] == vecPos[i - 1]) {
- vecOutcome.push_back("M"); //monotone
- continue;
- }
-
-
- if (vecPos[i - 1] == -1) {
- //vecPos[i] is not -1
- size_t j = i - 2;
- while (j > -1 && vecPos[j] == -1)
- j--;
-
- size_t k;
- for (k = 0; k < j; k++) {
- if (vecPos[k] > vecPos[j] || vecPos[k] <= vecPos[i])
- break;
- }
- if (k < j) {
- vecOutcome.push_back("DM");
- continue;
- }
-
- for (k = i + 1; k < vecPos.size(); k++)
- if (vecPos[k] < vecPos[i] && (j == -1 && vecPos[k] >= vecPos[j]))
- break;
- if (k < vecPos.size()) {
- vecOutcome.push_back("DM");
- continue;
- }
- vecOutcome.push_back("M");
- } else {
- //neither of vecPos[i-1] and vecPos[i] is -1
- if (vecPos[i - 1] < vecPos[i]) {
- //monotone or discon't monotone
- size_t j;
- for (j = 0; j < i - 1; j++)
- if (vecPos[j] > vecPos[i - 1] && vecPos[j] <= vecPos[i])
- break;
- if (j < i - 1) {
- vecOutcome.push_back("DM");
- continue;
- }
- for (j = i + 1; j < vecPos.size(); j++)
- if (vecPos[j] >= vecPos[i - 1] && vecPos[j] < vecPos[i])
- break;
- if (j < vecPos.size()) {
- vecOutcome.push_back("DM");
- continue;
- }
- vecOutcome.push_back("M");
- } else {
- //swap or discon't swap
- size_t j;
- for (j = 0; j < i - 1; j++)
- if (vecPos[j] > vecPos[i] && vecPos[j] <= vecPos[i - 1])
- break;
- if (j < i - 1) {
- vecOutcome.push_back("DS");
- continue;
- }
- for (j = i + 1; j < vecPos.size(); j++)
- if ( vecPos[j] >= vecPos[i] && vecPos[j] < vecPos[i - 1])
- break;
- if (j < vecPos.size()) {
- vecOutcome.push_back("DS");
- continue;
- }
- vecOutcome.push_back("S");
- }
- }
- }
-
- assert(vecOutcome.size() == vecPos.size() - 1);
- }
-
- void fnGetRelativePosition(const vector<int>& vecLeft, vector<int>& vecPosition) {
- vecPosition.clear();
-
- vector<float> vec;
- vec.reserve(vecLeft.size());
- for (size_t i = 0; i < vecLeft.size(); i++) {
- if (vecLeft[i] == -1) {
- if (i == 0)
- vec.push_back(-1);
- else
- vec.push_back(vecLeft[i-1] + 0.1);
- } else
- vec.push_back(vecLeft[i]);
- }
-
- for (size_t i = 0; i < vecLeft.size(); i++) {
- int count = 0;
-
- for (size_t j = 0; j < vecLeft.size(); j++) {
- if ( j == i) continue;
- if (vec[j] < vec[i]) {
- count++;
- } else if (vec[j] == vec[i] && j < i) {
- count++;
- }
- }
- vecPosition.push_back(count);
- }
-
- for (size_t i = 1; i < vecPosition.size(); i++) {
- if (vecPosition[i - 1] == vecPosition[i]) {
- for (size_t j = 0; j < vecLeft.size(); j++)
- cout << vecLeft[j] << " ";
- cout << "\n";
- assert(false);
- }
- }
- }
-
- inline void fnGetOutcome(int i1, int i2, string& strOutcome) {
- assert(i1 != i2);
- if (i1 < i2) {
- if (i2 > i1 + 1) strOutcome = string("DM");
- else strOutcome = string("M");
- } else {
- if (i1 > i2 + 1) strOutcome = string("DS");
- else strOutcome = string("S");
- }
- }
-
- //features in constituent_reorder_model.cc
- void GenerateFeature(const SParsedTree *pTree, const STreeItem *pParent, int iPos, const string& strBlockStatus1, const string& strBlockStatus2, ostringstream& ostr) {
- STreeItem *pCon1, *pCon2;
- pCon1 = pParent->m_vecChildren[iPos - 1];
- pCon2 = pParent->m_vecChildren[iPos];
-
- string left_label = string(pCon1->m_pszTerm);
- string right_label = string(pCon2->m_pszTerm);
- string parent_label = string(pParent->m_pszTerm);
-
- vector<string> vec_other_right_sibling;
- for (int i = iPos + 1; i < pParent->m_vecChildren.size(); i++)
- vec_other_right_sibling.push_back(string(pParent->m_vecChildren[i]->m_pszTerm));
- if (vec_other_right_sibling.size() == 0)
- vec_other_right_sibling.push_back(string("NULL"));
- vector<string> vec_other_left_sibling;
- for (int i = 0; i < iPos - 1; i++)
- vec_other_left_sibling.push_back(string(pParent->m_vecChildren[i]->m_pszTerm));
- if (vec_other_left_sibling.size() == 0)
- vec_other_left_sibling.push_back(string("NULL"));
-
- //generate features
- //f1
- ostr << "f1=" << left_label << "_" << right_label << "_" << parent_label;
- //f2
- for (int i = 0; i < vec_other_right_sibling.size(); i++)
- ostr << " f2=" << left_label << "_" << right_label << "_" << parent_label << "_" << vec_other_right_sibling[i];
- //f3
- for (int i = 0; i < vec_other_left_sibling.size(); i++)
- ostr << " f3=" << left_label << "_" << right_label << "_" << parent_label << "_" << vec_other_left_sibling[i];
- //f4
- ostr << " f4=" << left_label << "_" << right_label << "_" << pTree->m_vecTerminals[pCon1->m_iHeadWord]->m_ptParent->m_pszTerm;
- //f5
- ostr << " f5=" << left_label << "_" << right_label << "_" << pTree->m_vecTerminals[pCon1->m_iHeadWord]->m_pszTerm;
- //f6
- ostr << " f6=" << left_label << "_" << right_label << "_" << pTree->m_vecTerminals[pCon2->m_iHeadWord]->m_ptParent->m_pszTerm;
- //f7
- ostr << " f7=" << left_label << "_" << right_label << "_" << pTree->m_vecTerminals[pCon2->m_iHeadWord]->m_pszTerm;
- //f8
- ostr << " f8=" << left_label << "_" << right_label << "_" << strBlockStatus1;
- //f9
- ostr << " f9=" << left_label << "_" << right_label << "_" << strBlockStatus2;
-
- //f10
- ostr << " f10=" << left_label << "_" << parent_label;
- //f11
- ostr << " f11=" << right_label << "_" << parent_label;
- }
-
- SParsedTree* ReadParseTree(const std::string& parse_file) {
- SParseReader *reader = new SParseReader(parse_file.c_str(), false);
- SParsedTree *tree = reader->fnReadNextParseTree();
- //assert(tree != NULL);
- delete reader;
- return tree;
+struct ConstReorderFeatureImpl {
+ ConstReorderFeatureImpl(const std::string& param) {
+
+ b_block_feature_ = false;
+ b_order_feature_ = false;
+ b_srl_block_feature_ = false;
+ b_srl_order_feature_ = false;
+
+ vector<string> terms;
+ SplitOnWhitespace(param, &terms);
+ if (terms.size() == 1) {
+ b_block_feature_ = true;
+ b_order_feature_ = true;
+ } else if (terms.size() >= 3) {
+ if (terms[1].compare("1") == 0) b_block_feature_ = true;
+ if (terms[2].compare("1") == 0) b_order_feature_ = true;
+ if (terms.size() == 6) {
+ if (terms[4].compare("1") == 0) b_srl_block_feature_ = true;
+ if (terms[5].compare("1") == 0) b_srl_order_feature_ = true;
+
+ assert(b_srl_block_feature_ || b_srl_order_feature_);
+ }
+
+ } else {
+ assert("ERROR");
}
- SSrlSentence* ReadSRLSentence(const std::string& srl_file) {
- SSrlSentenceReader *reader = new SSrlSentenceReader(srl_file.c_str());
- SSrlSentence *srl = reader->fnReadNextSrlSentence();
- //assert(srl != NULL);
- delete reader;
- return srl;
+ const_reorder_classifier_left_ = NULL;
+ const_reorder_classifier_right_ = NULL;
+
+ srl_reorder_classifier_left_ = NULL;
+ srl_reorder_classifier_right_ = NULL;
+
+ if (b_order_feature_) {
+ InitializeClassifier((terms[0] + string(".left")).c_str(),
+ &const_reorder_classifier_left_);
+ InitializeClassifier((terms[0] + string(".right")).c_str(),
+ &const_reorder_classifier_right_);
}
-private:
- Tsuruoka_Maxent *const_reorder_classifier_left_;
- Tsuruoka_Maxent *const_reorder_classifier_right_;
+ if (b_srl_order_feature_) {
+ InitializeClassifier((terms[3] + string(".left")).c_str(),
+ &srl_reorder_classifier_left_);
+ InitializeClassifier((terms[3] + string(".right")).c_str(),
+ &srl_reorder_classifier_right_);
+ }
- Tsuruoka_Maxent *srl_reorder_classifier_left_;
- Tsuruoka_Maxent *srl_reorder_classifier_right_;
+ parsed_tree_ = NULL;
+ focused_consts_ = NULL;
+ index_map_ = NULL;
+
+ srl_sentence_ = NULL;
+ focused_srl_ = NULL;
+
+ map_left_ = NULL;
+ map_right_ = NULL;
+
+ map_srl_left_ = NULL;
+ map_srl_right_ = NULL;
+
+ dict_block_status_ = new Dict();
+ dict_block_status_->Convert("Unaligned", false);
+ dict_block_status_->Convert("Discon't", false);
+ dict_block_status_->Convert("Con't", false);
+ }
+ ~ConstReorderFeatureImpl() {
+ if (const_reorder_classifier_left_) delete const_reorder_classifier_left_;
+ if (const_reorder_classifier_right_) delete const_reorder_classifier_right_;
+ if (srl_reorder_classifier_left_) delete srl_reorder_classifier_left_;
+ if (srl_reorder_classifier_right_) delete srl_reorder_classifier_right_;
+ FreeSentenceVariables();
+
+ delete dict_block_status_;
+ }
+
+ static int ReserveStateSize() { return 1 * sizeof(TargetTranslation*); }
+
+ void InitializeInputSentence(const std::string& parse_file,
+ const std::string& srl_file,
+ const std::string& index_map_file) {
+ FreeSentenceVariables();
+ if (b_srl_block_feature_ || b_srl_order_feature_) {
+ assert(srl_file != "");
+ srl_sentence_ = ReadSRLSentence(srl_file);
+ parsed_tree_ = srl_sentence_->m_pTree;
+ } else {
+ assert(parse_file != "");
+ srl_sentence_ = NULL;
+ parsed_tree_ = ReadParseTree(parse_file);
+ }
- MapClassifier *map_left_;
- MapClassifier *map_right_;
+ if (b_block_feature_ || b_order_feature_) {
+ focused_consts_ = new FocusedConstituent(parsed_tree_);
- MapClassifier *map_srl_left_;
- MapClassifier *map_srl_right_;
+ if (b_order_feature_) {
+ // we can do the classifier "off-line"
+ map_left_ = new MapClassifier();
+ map_right_ = new MapClassifier();
+ InitializeConstReorderClassifierOutput();
+ }
+ }
- SParsedTree *parsed_tree_;
- FocusedConstituent *focused_consts_;
- vector<TargetTranslation*> vec_target_tran_;
+ if (b_srl_block_feature_ || b_srl_order_feature_) {
+ focused_srl_ = new FocusedSRL(srl_sentence_);
- bool b_order_feature_;
- bool b_block_feature_;
+ if (b_srl_order_feature_) {
+ map_srl_left_ = new MapClassifier();
+ map_srl_right_ = new MapClassifier();
+ InitializeSRLReorderClassifierOutput();
+ }
+ }
- bool b_srl_block_feature_;
- bool b_srl_order_feature_;
- SSrlSentence *srl_sentence_;
- FocusedSRL *focused_srl_;
+ index_map_ = new SIndexMap(index_map_file);
+
+ if (parsed_tree_ != NULL) {
+ size_t i = parsed_tree_->m_vecTerminals.size();
+ vec_target_tran_.reserve(20 * i * i * i);
+ } else
+ vec_target_tran_.reserve(1000000);
+ }
+
+ void SetConstReorderFeature(const Hypergraph::Edge& edge,
+ SparseVector<double>* features,
+ const vector<const void*>& ant_states,
+ void* state) {
+ if (parsed_tree_ == NULL) return;
+
+ short int mapped_begin, mapped_end;
+ index_map_->MapIndex_Input_2_Parse(edge.i_, edge.j_ - 1, mapped_begin,
+ mapped_end);
+
+ typedef TargetTranslation* PtrTargetTranslation;
+ PtrTargetTranslation* remnant =
+ reinterpret_cast<PtrTargetTranslation*>(state);
+
+ vector<const TargetTranslation*> vec_node;
+ vec_node.reserve(edge.tail_nodes_.size());
+ for (size_t i = 0; i < edge.tail_nodes_.size(); i++) {
+ const PtrTargetTranslation* astate =
+ reinterpret_cast<const PtrTargetTranslation*>(ant_states[i]);
+ vec_node.push_back(astate[0]);
+ }
- SIndexMap *index_map_;
+ int e_num_word = edge.rule_->e_.size();
+ for (size_t i = 0; i < vec_node.size(); i++) {
+ e_num_word += vec_node[i]->e_num_words_;
+ e_num_word--;
+ }
- Dict *dict_block_status_;
+ remnant[0] = new TargetTranslation(mapped_begin, mapped_end, edge.i_,
+ edge.j_ - 1, e_num_word);
+ vec_target_tran_.push_back(remnant[0]);
+
+ // reset the alignment
+ // for the source side, we know its position in source sentence
+ // for the target side, we always assume its starting position is 0
+ unsigned vc = 0;
+ const TRulePtr rule = edge.rule_;
+ std::vector<int> f_index(rule->f_.size());
+ int index = edge.i_;
+ for (unsigned i = 0; i < rule->f_.size(); i++) {
+ f_index[i] = index;
+ const WordID& c = rule->f_[i];
+ if (c < 1)
+ index = vec_node[vc++]->input_end_pos_ + 1;
+ else
+ index++;
+ }
+ assert(vc == vec_node.size());
+ assert(index == edge.j_);
+
+ std::vector<int> e_index(rule->e_.size());
+ index = 0;
+ vc = 0;
+ for (unsigned i = 0; i < rule->e_.size(); i++) {
+ e_index[i] = index;
+ const WordID& c = rule->e_[i];
+ if (c < 1) {
+ index += vec_node[-c]->e_num_words_;
+ vc++;
+ } else
+ index++;
+ }
+ assert(vc == vec_node.size());
+
+ size_t nt_pos = 0;
+ for (size_t i = 0; i < edge.rule_->f_.size(); i++) {
+ if (edge.rule_->f_[i] > 0) continue;
+
+ // it's an NT
+ size_t j;
+ for (j = 0; j < edge.rule_->e_.size(); j++)
+ if (edge.rule_->e_[j] * -1 == nt_pos) break;
+ assert(j != edge.rule_->e_.size());
+ nt_pos++;
+
+ // i aligns j
+ int eindex = e_index[j];
+ const vector<AlignmentPoint*>& align =
+ vec_node[-1 * edge.rule_->e_[j]]->align_;
+ for (size_t k = 0; k < align.size(); k++) {
+ remnant[0]->InsertAlignmentPoint(align[k]->s_, eindex + align[k]->t_);
+ }
+ }
+ for (size_t i = 0; i < edge.rule_->a_.size(); i++) {
+ short int parse_index_begin, parse_index_end;
+ index_map_->MapIndex_Input_2_Parse(f_index[edge.rule_->a_[i].s_],
+ parse_index_begin, parse_index_end);
+ int findex = parse_index_begin;
+ int eindex = e_index[edge.rule_->a_[i].t_];
+ int word_count =
+ index_map_->MapIndexWordCount(f_index[edge.rule_->a_[i].s_]);
+ assert(word_count == parse_index_end - parse_index_begin + 1);
+ for (int i = 0; i < word_count; i++)
+ remnant[0]->InsertAlignmentPoint(findex + i, eindex);
+ }
+
+ // till now, we finished setting state values
+ // next, use the state values to calculate constituent reorder feature
+ SetConstReorderFeature(mapped_begin, mapped_end, features, remnant[0],
+ vec_node, f_index);
+ }
+
+ void SetConstReorderFeature(short int mapped_begin, short int mapped_end,
+ SparseVector<double>* features,
+ const TargetTranslation* target_translation,
+ const vector<const TargetTranslation*>& vec_node,
+ std::vector<int>& findex) {
+ if (b_srl_block_feature_ || b_srl_order_feature_) {
+ double logprob_srl_reorder_left = 0.0, logprob_srl_reorder_right = 0.0;
+ for (size_t i = 0; i < focused_srl_->focus_predicates_.size(); i++) {
+ const FocusedPredicate* pred = focused_srl_->focus_predicates_[i];
+ if (!is_overlap(mapped_begin, mapped_end, pred->begin_, pred->end_))
+ continue; // have no overlap between this predicate (with its
+ // argument) and the current edge
+
+ size_t j;
+ for (j = 0; j < vec_node.size(); j++) {
+ if (is_inside(pred->begin_, pred->end_, vec_node[j]->begin_pos_,
+ vec_node[j]->end_pos_))
+ break;
+ }
+ if (j < vec_node.size()) continue;
+
+ vector<int> vecBlockStatus;
+ vecBlockStatus.reserve(pred->vec_items_.size());
+ for (j = 0; j < pred->vec_items_.size(); j++) {
+ const STreeItem* con1 = pred->vec_items_[j]->tree_item_;
+ if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {
+ vecBlockStatus.push_back(0);
+ continue;
+ } // the node is partially outside the current edge
+
+ string type = target_translation->IsTargetConstinousSpan2(
+ con1->m_iBegin, con1->m_iEnd);
+ vecBlockStatus.push_back(dict_block_status_->Convert(type, false));
+
+ if (!b_srl_block_feature_) continue;
+ // see if the node is covered by an NT
+ size_t k;
+ for (k = 0; k < vec_node.size(); k++) {
+ if (is_inside(con1->m_iBegin, con1->m_iEnd, vec_node[k]->begin_pos_,
+ vec_node[k]->end_pos_))
+ break;
+ }
+ if (k < vec_node.size()) continue;
+ int f_id = FD::Convert(string(pred->vec_items_[j]->role_) + type);
+ if (f_id) features->add_value(f_id, 1);
+ }
+
+ if (!b_srl_order_feature_) continue;
+
+ vector<int> vecPosition, vecRelativePosition;
+ vector<int> vecRightPosition, vecRelativeRightPosition;
+ vecPosition.reserve(pred->vec_items_.size());
+ vecRelativePosition.reserve(pred->vec_items_.size());
+ vecRightPosition.reserve(pred->vec_items_.size());
+ vecRelativeRightPosition.reserve(pred->vec_items_.size());
+ for (j = 0; j < pred->vec_items_.size(); j++) {
+ const STreeItem* con1 = pred->vec_items_[j]->tree_item_;
+ if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {
+ vecPosition.push_back(-1);
+ vecRightPosition.push_back(-1);
+ continue;
+ } // the node is partially outside the current edge
+ int left1 = -1, right1 = -1;
+ target_translation->FindLeftRightMostTargetSpan(
+ con1->m_iBegin, con1->m_iEnd, left1, right1);
+ vecPosition.push_back(left1);
+ vecRightPosition.push_back(right1);
+ }
+ fnGetRelativePosition(vecPosition, vecRelativePosition);
+ fnGetRelativePosition(vecRightPosition, vecRelativeRightPosition);
+
+ for (j = 1; j < pred->vec_items_.size(); j++) {
+ const STreeItem* con1 = pred->vec_items_[j - 1]->tree_item_;
+ const STreeItem* con2 = pred->vec_items_[j]->tree_item_;
+
+ if (con1->m_iBegin < mapped_begin || con2->m_iEnd > mapped_end)
+ continue; // one of the two nodes is partially outside the current
+ // edge
+
+ // both con1 and con2 are covered, need to check if they are covered
+ // by the same NT
+ size_t k;
+ for (k = 0; k < vec_node.size(); k++) {
+ if (is_inside(con1->m_iBegin, con2->m_iEnd, vec_node[k]->begin_pos_,
+ vec_node[k]->end_pos_))
+ break;
+ }
+ if (k < vec_node.size()) continue;
+
+ // they are not covered bye the same NT
+ string outcome;
+ string key;
+ GenerateKey(pred->vec_items_[j - 1]->tree_item_,
+ pred->vec_items_[j]->tree_item_, vecBlockStatus[j - 1],
+ vecBlockStatus[j], key);
+
+ fnGetOutcome(vecRelativePosition[j - 1], vecRelativePosition[j],
+ outcome);
+ double prob = CalculateConstReorderProb(srl_reorder_classifier_left_,
+ map_srl_left_, key, outcome);
+ // printf("%s %s %f\n", ostr.str().c_str(), outcome.c_str(), prob);
+ logprob_srl_reorder_left += log10(prob);
+
+ fnGetOutcome(vecRelativeRightPosition[j - 1],
+ vecRelativeRightPosition[j], outcome);
+ prob = CalculateConstReorderProb(srl_reorder_classifier_right_,
+ map_srl_right_, key, outcome);
+ logprob_srl_reorder_right += log10(prob);
+ }
+ }
+
+ if (b_srl_order_feature_) {
+ int f_id = FD::Convert("SRLReorderFeatureLeft");
+ if (f_id && logprob_srl_reorder_left != 0.0)
+ features->set_value(f_id, logprob_srl_reorder_left);
+ f_id = FD::Convert("SRLReorderFeatureRight");
+ if (f_id && logprob_srl_reorder_right != 0.0)
+ features->set_value(f_id, logprob_srl_reorder_right);
+ }
+ }
+
+ if (b_block_feature_ || b_order_feature_) {
+ double logprob_const_reorder_left = 0.0,
+ logprob_const_reorder_right = 0.0;
+
+ for (size_t i = 0; i < focused_consts_->focus_parents_.size(); i++) {
+ STreeItem* parent = focused_consts_->focus_parents_[i];
+ if (!is_overlap(mapped_begin, mapped_end, parent->m_iBegin,
+ parent->m_iEnd))
+ continue; // have no overlap between this parent node and the current
+ // edge
+
+ size_t j;
+ for (j = 0; j < vec_node.size(); j++) {
+ if (is_inside(parent->m_iBegin, parent->m_iEnd,
+ vec_node[j]->begin_pos_, vec_node[j]->end_pos_))
+ break;
+ }
+ if (j < vec_node.size()) continue;
+
+ if (b_block_feature_) {
+ if (parent->m_iBegin >= mapped_begin &&
+ parent->m_iEnd <= mapped_end) {
+ string type = target_translation->IsTargetConstinousSpan2(
+ parent->m_iBegin, parent->m_iEnd);
+ int f_id = FD::Convert(string(parent->m_pszTerm) + type);
+ if (f_id) features->add_value(f_id, 1);
+ }
+ }
+
+ if (parent->m_vecChildren.size() == 1 || !b_order_feature_) continue;
+
+ vector<int> vecChunkBlock;
+ vecChunkBlock.reserve(parent->m_vecChildren.size());
+
+ for (j = 0; j < parent->m_vecChildren.size(); j++) {
+ STreeItem* con1 = parent->m_vecChildren[j];
+ if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {
+ vecChunkBlock.push_back(0);
+ continue;
+ } // the node is partially outside the current edge
+
+ string type = target_translation->IsTargetConstinousSpan2(
+ con1->m_iBegin, con1->m_iEnd);
+ vecChunkBlock.push_back(dict_block_status_->Convert(type, false));
+
+ /*if (!b_block_feature_) continue;
+ //see if the node is covered by an NT
+ size_t k;
+ for (k = 0; k < vec_node.size(); k++) {
+ if (is_inside(con1->m_iBegin, con1->m_iEnd,
+ vec_node[k]->begin_pos_, vec_node[k]->end_pos_))
+ break;
+ }
+ if (k < vec_node.size()) continue;
+ int f_id = FD::Convert(string(con1->m_pszTerm) + type);
+ if (f_id)
+ features->add_value(f_id, 1);*/
+ }
+
+ if (!b_order_feature_) continue;
+
+ vector<int> vecPosition, vecRelativePosition;
+ vector<int> vecRightPosition, vecRelativeRightPosition;
+ vecPosition.reserve(parent->m_vecChildren.size());
+ vecRelativePosition.reserve(parent->m_vecChildren.size());
+ vecRightPosition.reserve(parent->m_vecChildren.size());
+ vecRelativeRightPosition.reserve(parent->m_vecChildren.size());
+ for (j = 0; j < parent->m_vecChildren.size(); j++) {
+ STreeItem* con1 = parent->m_vecChildren[j];
+ if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) {
+ vecPosition.push_back(-1);
+ vecRightPosition.push_back(-1);
+ continue;
+ } // the node is partially outside the current edge
+ int left1 = -1, right1 = -1;
+ target_translation->FindLeftRightMostTargetSpan(
+ con1->m_iBegin, con1->m_iEnd, left1, right1);
+ vecPosition.push_back(left1);
+ vecRightPosition.push_back(right1);
+ }
+ fnGetRelativePosition(vecPosition, vecRelativePosition);
+ fnGetRelativePosition(vecRightPosition, vecRelativeRightPosition);
+
+ for (j = 1; j < parent->m_vecChildren.size(); j++) {
+ STreeItem* con1 = parent->m_vecChildren[j - 1];
+ STreeItem* con2 = parent->m_vecChildren[j];
+
+ if (con1->m_iBegin < mapped_begin || con2->m_iEnd > mapped_end)
+ continue; // one of the two nodes is partially outside the current
+ // edge
+
+ // both con1 and con2 are covered, need to check if they are covered
+ // by the same NT
+ size_t k;
+ for (k = 0; k < vec_node.size(); k++) {
+ if (is_inside(con1->m_iBegin, con2->m_iEnd, vec_node[k]->begin_pos_,
+ vec_node[k]->end_pos_))
+ break;
+ }
+ if (k < vec_node.size()) continue;
+
+ // they are not covered bye the same NT
+ string outcome;
+ string key;
+ GenerateKey(parent->m_vecChildren[j - 1], parent->m_vecChildren[j],
+ vecChunkBlock[j - 1], vecChunkBlock[j], key);
+
+ fnGetOutcome(vecRelativePosition[j - 1], vecRelativePosition[j],
+ outcome);
+ double prob = CalculateConstReorderProb(
+ const_reorder_classifier_left_, map_left_, key, outcome);
+ // printf("%s %s %f\n", ostr.str().c_str(), outcome.c_str(), prob);
+ logprob_const_reorder_left += log10(prob);
+
+ fnGetOutcome(vecRelativeRightPosition[j - 1],
+ vecRelativeRightPosition[j], outcome);
+ prob = CalculateConstReorderProb(const_reorder_classifier_right_,
+ map_right_, key, outcome);
+ logprob_const_reorder_right += log10(prob);
+ }
+ }
+
+ if (b_order_feature_) {
+ int f_id = FD::Convert("ConstReorderFeatureLeft");
+ if (f_id && logprob_const_reorder_left != 0.0)
+ features->set_value(f_id, logprob_const_reorder_left);
+ f_id = FD::Convert("ConstReorderFeatureRight");
+ if (f_id && logprob_const_reorder_right != 0.0)
+ features->set_value(f_id, logprob_const_reorder_right);
+ }
+ }
+ }
+
+ private:
+ void Byte_to_Char(unsigned char* str, int n) {
+ str[0] = (n & 255);
+ str[1] = n / 256;
+ }
+ void GenerateKey(const STreeItem* pCon1, const STreeItem* pCon2,
+ int iBlockStatus1, int iBlockStatus2, string& key) {
+ assert(iBlockStatus1 != 0);
+ assert(iBlockStatus2 != 0);
+ unsigned char szTerm[1001];
+ Byte_to_Char(szTerm, pCon1->m_iBegin);
+ Byte_to_Char(szTerm + 2, pCon2->m_iEnd);
+ szTerm[4] = (char)iBlockStatus1;
+ szTerm[5] = (char)iBlockStatus2;
+ szTerm[6] = '\0';
+ // sprintf(szTerm, "%d|%d|%d|%d|%s|%s", pCon1->m_iBegin, pCon1->m_iEnd,
+ // pCon2->m_iBegin, pCon2->m_iEnd, strBlockStatus1.c_str(),
+ // strBlockStatus2.c_str());
+ key = string(szTerm, szTerm + 6);
+ }
+ void InitializeConstReorderClassifierOutput() {
+ if (!b_order_feature_) return;
+ int size_block_status = dict_block_status_->max();
+
+ for (size_t i = 0; i < focused_consts_->focus_parents_.size(); i++) {
+ STreeItem* parent = focused_consts_->focus_parents_[i];
+
+ for (size_t j = 1; j < parent->m_vecChildren.size(); j++) {
+ for (size_t k = 1; k <= size_block_status; k++) {
+ for (size_t l = 1; l <= size_block_status; l++) {
+ ostringstream ostr;
+ GenerateFeature(parsed_tree_, parent, j,
+ dict_block_status_->Convert(k),
+ dict_block_status_->Convert(l), ostr);
+
+ string strKey;
+ GenerateKey(parent->m_vecChildren[j - 1], parent->m_vecChildren[j],
+ k, l, strKey);
+
+ vector<double> vecOutput;
+ const_reorder_classifier_left_->fnEval(ostr.str().c_str(),
+ vecOutput);
+ (*map_left_)[strKey] = vecOutput;
+
+ const_reorder_classifier_right_->fnEval(ostr.str().c_str(),
+ vecOutput);
+ (*map_right_)[strKey] = vecOutput;
+ }
+ }
+ }
+ }
+ }
+
+ void InitializeSRLReorderClassifierOutput() {
+ if (!b_srl_order_feature_) return;
+ int size_block_status = dict_block_status_->max();
+
+ for (size_t i = 0; i < focused_srl_->focus_predicates_.size(); i++) {
+ const FocusedPredicate* pred = focused_srl_->focus_predicates_[i];
+
+ for (size_t j = 1; j < pred->vec_items_.size(); j++) {
+ for (size_t k = 1; k <= size_block_status; k++) {
+ for (size_t l = 1; l <= size_block_status; l++) {
+ ostringstream ostr;
+
+ SArgumentReorderModel::fnGenerateFeature(
+ parsed_tree_, pred->pred_, pred, j,
+ dict_block_status_->Convert(k), dict_block_status_->Convert(l),
+ ostr);
+
+ string strKey;
+ GenerateKey(pred->vec_items_[j - 1]->tree_item_,
+ pred->vec_items_[j]->tree_item_, k, l, strKey);
+
+ vector<double> vecOutput;
+ srl_reorder_classifier_left_->fnEval(ostr.str().c_str(), vecOutput);
+ (*map_srl_left_)[strKey] = vecOutput;
+
+ srl_reorder_classifier_right_->fnEval(ostr.str().c_str(),
+ vecOutput);
+ (*map_srl_right_)[strKey] = vecOutput;
+ }
+ }
+ }
+ }
+ }
+
+ double CalculateConstReorderProb(
+ const Tsuruoka_Maxent* const_reorder_classifier, const MapClassifier* map,
+ const string& key, const string& outcome) {
+ MapClassifier::const_iterator iter = (*map).find(key);
+ assert(iter != map->end());
+ int id = const_reorder_classifier->fnGetClassId(outcome);
+ return iter->second[id];
+ }
+
+ void FreeSentenceVariables() {
+ if (srl_sentence_ != NULL) {
+ delete srl_sentence_;
+ srl_sentence_ = NULL;
+ } else {
+ if (parsed_tree_ != NULL) delete parsed_tree_;
+ parsed_tree_ = NULL;
+ }
+
+ if (focused_consts_ != NULL) delete focused_consts_;
+ focused_consts_ = NULL;
+
+ for (size_t i = 0; i < vec_target_tran_.size(); i++)
+ delete vec_target_tran_[i];
+ vec_target_tran_.clear();
+
+ if (index_map_ != NULL) delete index_map_;
+ index_map_ = NULL;
+
+ if (map_left_ != NULL) delete map_left_;
+ map_left_ = NULL;
+ if (map_right_ != NULL) delete map_right_;
+ map_right_ = NULL;
+
+ if (map_srl_left_ != NULL) delete map_srl_left_;
+ map_srl_left_ = NULL;
+ if (map_srl_right_ != NULL) delete map_srl_right_;
+ map_srl_right_ = NULL;
+ }
+
+ void InitializeClassifier(const char* pszFname,
+ Tsuruoka_Maxent** ppClassifier) {
+ (*ppClassifier) = new Tsuruoka_Maxent(pszFname);
+ }
+
+ void GenerateOutcome(const vector<int>& vecPos, vector<string>& vecOutcome) {
+ vecOutcome.clear();
+
+ for (size_t i = 1; i < vecPos.size(); i++) {
+ if (vecPos[i] == -1 || vecPos[i] == vecPos[i - 1]) {
+ vecOutcome.push_back("M"); // monotone
+ continue;
+ }
+
+ if (vecPos[i - 1] == -1) {
+ // vecPos[i] is not -1
+ size_t j = i - 2;
+ while (j > -1 && vecPos[j] == -1) j--;
+
+ size_t k;
+ for (k = 0; k < j; k++) {
+ if (vecPos[k] > vecPos[j] || vecPos[k] <= vecPos[i]) break;
+ }
+ if (k < j) {
+ vecOutcome.push_back("DM");
+ continue;
+ }
+
+ for (k = i + 1; k < vecPos.size(); k++)
+ if (vecPos[k] < vecPos[i] && (j == -1 && vecPos[k] >= vecPos[j]))
+ break;
+ if (k < vecPos.size()) {
+ vecOutcome.push_back("DM");
+ continue;
+ }
+ vecOutcome.push_back("M");
+ } else {
+ // neither of vecPos[i-1] and vecPos[i] is -1
+ if (vecPos[i - 1] < vecPos[i]) {
+ // monotone or discon't monotone
+ size_t j;
+ for (j = 0; j < i - 1; j++)
+ if (vecPos[j] > vecPos[i - 1] && vecPos[j] <= vecPos[i]) break;
+ if (j < i - 1) {
+ vecOutcome.push_back("DM");
+ continue;
+ }
+ for (j = i + 1; j < vecPos.size(); j++)
+ if (vecPos[j] >= vecPos[i - 1] && vecPos[j] < vecPos[i]) break;
+ if (j < vecPos.size()) {
+ vecOutcome.push_back("DM");
+ continue;
+ }
+ vecOutcome.push_back("M");
+ } else {
+ // swap or discon't swap
+ size_t j;
+ for (j = 0; j < i - 1; j++)
+ if (vecPos[j] > vecPos[i] && vecPos[j] <= vecPos[i - 1]) break;
+ if (j < i - 1) {
+ vecOutcome.push_back("DS");
+ continue;
+ }
+ for (j = i + 1; j < vecPos.size(); j++)
+ if (vecPos[j] >= vecPos[i] && vecPos[j] < vecPos[i - 1]) break;
+ if (j < vecPos.size()) {
+ vecOutcome.push_back("DS");
+ continue;
+ }
+ vecOutcome.push_back("S");
+ }
+ }
+ }
+
+ assert(vecOutcome.size() == vecPos.size() - 1);
+ }
+
+ void fnGetRelativePosition(const vector<int>& vecLeft,
+ vector<int>& vecPosition) {
+ vecPosition.clear();
+
+ vector<float> vec;
+ vec.reserve(vecLeft.size());
+ for (size_t i = 0; i < vecLeft.size(); i++) {
+ if (vecLeft[i] == -1) {
+ if (i == 0)
+ vec.push_back(-1);
+ else
+ vec.push_back(vecLeft[i - 1] + 0.1);
+ } else
+ vec.push_back(vecLeft[i]);
+ }
+
+ for (size_t i = 0; i < vecLeft.size(); i++) {
+ int count = 0;
+
+ for (size_t j = 0; j < vecLeft.size(); j++) {
+ if (j == i) continue;
+ if (vec[j] < vec[i]) {
+ count++;
+ } else if (vec[j] == vec[i] && j < i) {
+ count++;
+ }
+ }
+ vecPosition.push_back(count);
+ }
+
+ for (size_t i = 1; i < vecPosition.size(); i++) {
+ if (vecPosition[i - 1] == vecPosition[i]) {
+ for (size_t j = 0; j < vecLeft.size(); j++) cout << vecLeft[j] << " ";
+ cout << "\n";
+ assert(false);
+ }
+ }
+ }
+
+ inline void fnGetOutcome(int i1, int i2, string& strOutcome) {
+ assert(i1 != i2);
+ if (i1 < i2) {
+ if (i2 > i1 + 1)
+ strOutcome = string("DM");
+ else
+ strOutcome = string("M");
+ } else {
+ if (i1 > i2 + 1)
+ strOutcome = string("DS");
+ else
+ strOutcome = string("S");
+ }
+ }
+
+ // features in constituent_reorder_model.cc
+ void GenerateFeature(const SParsedTree* pTree, const STreeItem* pParent,
+ int iPos, const string& strBlockStatus1,
+ const string& strBlockStatus2, ostringstream& ostr) {
+ STreeItem* pCon1, *pCon2;
+ pCon1 = pParent->m_vecChildren[iPos - 1];
+ pCon2 = pParent->m_vecChildren[iPos];
+
+ string left_label = string(pCon1->m_pszTerm);
+ string right_label = string(pCon2->m_pszTerm);
+ string parent_label = string(pParent->m_pszTerm);
+
+ vector<string> vec_other_right_sibling;
+ for (int i = iPos + 1; i < pParent->m_vecChildren.size(); i++)
+ vec_other_right_sibling.push_back(
+ string(pParent->m_vecChildren[i]->m_pszTerm));
+ if (vec_other_right_sibling.size() == 0)
+ vec_other_right_sibling.push_back(string("NULL"));
+ vector<string> vec_other_left_sibling;
+ for (int i = 0; i < iPos - 1; i++)
+ vec_other_left_sibling.push_back(
+ string(pParent->m_vecChildren[i]->m_pszTerm));
+ if (vec_other_left_sibling.size() == 0)
+ vec_other_left_sibling.push_back(string("NULL"));
+
+ // generate features
+ // f1
+ ostr << "f1=" << left_label << "_" << right_label << "_" << parent_label;
+ // f2
+ for (int i = 0; i < vec_other_right_sibling.size(); i++)
+ ostr << " f2=" << left_label << "_" << right_label << "_" << parent_label
+ << "_" << vec_other_right_sibling[i];
+ // f3
+ for (int i = 0; i < vec_other_left_sibling.size(); i++)
+ ostr << " f3=" << left_label << "_" << right_label << "_" << parent_label
+ << "_" << vec_other_left_sibling[i];
+ // f4
+ ostr << " f4=" << left_label << "_" << right_label << "_"
+ << pTree->m_vecTerminals[pCon1->m_iHeadWord]->m_ptParent->m_pszTerm;
+ // f5
+ ostr << " f5=" << left_label << "_" << right_label << "_"
+ << pTree->m_vecTerminals[pCon1->m_iHeadWord]->m_pszTerm;
+ // f6
+ ostr << " f6=" << left_label << "_" << right_label << "_"
+ << pTree->m_vecTerminals[pCon2->m_iHeadWord]->m_ptParent->m_pszTerm;
+ // f7
+ ostr << " f7=" << left_label << "_" << right_label << "_"
+ << pTree->m_vecTerminals[pCon2->m_iHeadWord]->m_pszTerm;
+ // f8
+ ostr << " f8=" << left_label << "_" << right_label << "_"
+ << strBlockStatus1;
+ // f9
+ ostr << " f9=" << left_label << "_" << right_label << "_"
+ << strBlockStatus2;
+
+ // f10
+ ostr << " f10=" << left_label << "_" << parent_label;
+ // f11
+ ostr << " f11=" << right_label << "_" << parent_label;
+ }
+
+ SParsedTree* ReadParseTree(const std::string& parse_file) {
+ SParseReader* reader = new SParseReader(parse_file.c_str(), false);
+ SParsedTree* tree = reader->fnReadNextParseTree();
+ // assert(tree != NULL);
+ delete reader;
+ return tree;
+ }
+
+ SSrlSentence* ReadSRLSentence(const std::string& srl_file) {
+ SSrlSentenceReader* reader = new SSrlSentenceReader(srl_file.c_str());
+ SSrlSentence* srl = reader->fnReadNextSrlSentence();
+ // assert(srl != NULL);
+ delete reader;
+ return srl;
+ }
+
+ private:
+ Tsuruoka_Maxent* const_reorder_classifier_left_;
+ Tsuruoka_Maxent* const_reorder_classifier_right_;
+
+ Tsuruoka_Maxent* srl_reorder_classifier_left_;
+ Tsuruoka_Maxent* srl_reorder_classifier_right_;
+
+ MapClassifier* map_left_;
+ MapClassifier* map_right_;
+
+ MapClassifier* map_srl_left_;
+ MapClassifier* map_srl_right_;
+
+ SParsedTree* parsed_tree_;
+ FocusedConstituent* focused_consts_;
+ vector<TargetTranslation*> vec_target_tran_;
+
+ bool b_order_feature_;
+ bool b_block_feature_;
+
+ bool b_srl_block_feature_;
+ bool b_srl_order_feature_;
+ SSrlSentence* srl_sentence_;
+ FocusedSRL* focused_srl_;
+
+ SIndexMap* index_map_;
+
+ Dict* dict_block_status_;
};
ConstReorderFeature::ConstReorderFeature(const std::string& param) {
- pimpl_ = new ConstReorderFeatureImpl(param);
- SetStateSize(ConstReorderFeatureImpl::ReserveStateSize());
- name_ = "ConstReorderFeature";
+ pimpl_ = new ConstReorderFeatureImpl(param);
+ SetStateSize(ConstReorderFeatureImpl::ReserveStateSize());
+ name_ = "ConstReorderFeature";
}
-ConstReorderFeature::~ConstReorderFeature( ) { //TODO
- delete pimpl_;
+ConstReorderFeature::~ConstReorderFeature() { // TODO
+ delete pimpl_;
}
void ConstReorderFeature::PrepareForInput(const SentenceMetadata& smeta) {
- string parse_file = smeta.GetSGMLValue("parse");
- string srl_file = smeta.GetSGMLValue("srl");
- assert(!(parse_file == "" && srl_file == ""));
+ string parse_file = smeta.GetSGMLValue("parse");
+ string srl_file = smeta.GetSGMLValue("srl");
+ assert(!(parse_file == "" && srl_file == ""));
- string indexmap_file = smeta.GetSGMLValue("index-map");
- pimpl_->InitializeInputSentence(parse_file, srl_file, indexmap_file);
+ string indexmap_file = smeta.GetSGMLValue("index-map");
+ pimpl_->InitializeInputSentence(parse_file, srl_file, indexmap_file);
}
-void ConstReorderFeature::TraversalFeaturesImpl(const SentenceMetadata& /* smeta */,
- const Hypergraph::Edge& edge,
- const vector<const void*>& ant_states,
- SparseVector<double>* features,
- SparseVector<double>* estimated_features,
- void* state) const {
- pimpl_->SetConstReorderFeature(edge, features, ant_states, state);
+void ConstReorderFeature::TraversalFeaturesImpl(
+ const SentenceMetadata& /* smeta */, const Hypergraph::Edge& edge,
+ const vector<const void*>& ant_states, SparseVector<double>* features,
+ SparseVector<double>* estimated_features, void* state) const {
+ pimpl_->SetConstReorderFeature(edge, features, ant_states, state);
}
-string ConstReorderFeature::usage(bool /*param*/,bool /*verbose*/) {
+string ConstReorderFeature::usage(bool /*param*/, bool /*verbose*/) {
return "ConstReorderFeature";
}
-boost::shared_ptr<FeatureFunction> CreateConstReorderModel(const std::string &param) {
- ConstReorderFeature *ret = new ConstReorderFeature(param);
- return boost::shared_ptr<FeatureFunction>(ret);
+boost::shared_ptr<FeatureFunction> CreateConstReorderModel(
+ const std::string& param) {
+ ConstReorderFeature* ret = new ConstReorderFeature(param);
+ return boost::shared_ptr<FeatureFunction>(ret);
}
-boost::shared_ptr<FeatureFunction> ConstReorderFeatureFactory::Create(std::string param) const {
- return CreateConstReorderModel(param);
+boost::shared_ptr<FeatureFunction> ConstReorderFeatureFactory::Create(
+ std::string param) const {
+ return CreateConstReorderModel(param);
}
-std::string ConstReorderFeatureFactory::usage(bool params,bool verbose) const {
+std::string ConstReorderFeatureFactory::usage(bool params, bool verbose) const {
return ConstReorderFeature::usage(params, verbose);
}
diff --git a/decoder/ff_const_reorder.h b/decoder/ff_const_reorder.h
index 1aed2584..6ed35768 100644
--- a/decoder/ff_const_reorder.h
+++ b/decoder/ff_const_reorder.h
@@ -16,26 +16,26 @@ struct ConstReorderFeatureImpl;
class ConstReorderFeature : public FeatureFunction {
public:
// param = "filename n"
- ConstReorderFeature(const std::string& param);
- ~ConstReorderFeature();
- static std::string usage(bool param,bool verbose);
+ ConstReorderFeature(const std::string& param);
+ ~ConstReorderFeature();
+ static std::string usage(bool param, bool verbose);
+
protected:
virtual void PrepareForInput(const SentenceMetadata& smeta);
- virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
- const HG::Edge& edge,
- const std::vector<const void*>& ant_contexts,
- SparseVector<double>* features,
- SparseVector<double>* estimated_features,
- void* out_context) const;
+ virtual void TraversalFeaturesImpl(
+ const SentenceMetadata& smeta, const HG::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features, SparseVector<double>* estimated_features,
+ void* out_context) const;
+
private:
ConstReorderFeatureImpl* pimpl_;
};
-
struct ConstReorderFeatureFactory : public FactoryBase<FeatureFunction> {
FP Create(std::string param) const;
- std::string usage(bool params,bool verbose) const;
+ std::string usage(bool params, bool verbose) const;
};
#endif /* FF_CONST_REORDER_H_ */