From df2709bd1b5326f6e1f1292cbcd6987d83134762 Mon Sep 17 00:00:00 2001 From: "Wu, Ke" Date: Sat, 6 Dec 2014 17:15:25 -0500 Subject: Remove SBitArray and use vector instead --- decoder/ff_const_reorder.cc | 161 ++++++-------------------------------------- 1 file changed, 22 insertions(+), 139 deletions(-) diff --git a/decoder/ff_const_reorder.cc b/decoder/ff_const_reorder.cc index e4fb4725..695c4de9 100644 --- a/decoder/ff_const_reorder.cc +++ b/decoder/ff_const_reorder.cc @@ -17,114 +17,6 @@ using namespace const_reorder; typedef HASH_MAP > MapClassifier; -struct SBitArray { - SBitArray(int size) : size_(size) { - int bit_size = size / 8; - if (size % 8 > 0) bit_size++; - - char_ = new unsigned char[bit_size]; - memset(char_, 0, bit_size); - } - ~SBitArray() { delete[] char_; } - - int Get(int index) const { - int i; - - i = index; - if (i < 0) i = size_ + i; - assert(i > -1 && i < size_); - - int byte_index, bit_index; - byte_index = i / 8; - bit_index = i % 8; - unsigned char res; - if (bit_index == 0) - res = char_[byte_index] & 0x01; - else if (bit_index == 1) - res = char_[byte_index] & 0x02; - else if (bit_index == 2) - res = char_[byte_index] & 0x04; - else if (bit_index == 3) - res = char_[byte_index] & 0x08; - else if (bit_index == 4) - res = char_[byte_index] & 0x10; - else if (bit_index == 5) - res = char_[byte_index] & 0x20; - else if (bit_index == 6) - res = char_[byte_index] & 0x40; - else if (bit_index == 7) - res = char_[byte_index] & 0x80; - else - assert(false); - if (res != 0) - return 1; - else - return 0; - } - - void Set(int index, int val) { - assert(val == 0 || val == 1); - int i; - - i = index; - if (i < 0) i = size_ + i; - assert(i > -1 && i < size_); - - int byte_index, bit_index; - byte_index = i / 8; - bit_index = i % 8; - unsigned char res; - - if (bit_index == 0) { - if (val == 0) - res = char_[byte_index] & 0xFE; - else - res = char_[byte_index] | 0x01; - } else if (bit_index == 1) { - if (val == 0) - res = char_[byte_index] & 0xFD; - else - res = char_[byte_index] | 0x02; - } else if (bit_index == 2) { - if (val == 0) - res = char_[byte_index] & 0xFB; - else - res = char_[byte_index] | 0x04; - } else if (bit_index == 3) { - if (val == 0) - res = char_[byte_index] & 0xF7; - else - res = char_[byte_index] | 0x08; - } else if (bit_index == 4) { - if (val == 0) - res = char_[byte_index] & 0xEF; - else - res = char_[byte_index] | 0x10; - } else if (bit_index == 5) { - if (val == 0) - res = char_[byte_index] & 0xDF; - else - res = char_[byte_index] | 0x20; - } else if (bit_index == 6) { - if (val == 0) - res = char_[byte_index] & 0xBF; - else - res = char_[byte_index] | 0x40; - } else if (bit_index == 7) { - if (val == 0) - res = char_[byte_index] & 0x7F; - else - res = char_[byte_index] | 0x80; - } else - assert(false); - char_[byte_index] = res; - } - - private: - const int size_; - unsigned char* char_; -}; - inline bool is_inside(int i, int left, int right) { if (i < left || i > right) return false; return true; @@ -184,33 +76,24 @@ struct TargetTranslation { e_num_words_(e_num_word), vec_left_most_(end_pos - begin_pos + 1, e_num_word), vec_right_most_(end_pos - begin_pos + 1, -1), - vec_f_align_bit_array_(end_pos - begin_pos + 1, NULL), - vec_e_align_bit_array_(e_num_word, NULL) { + vec_f_align_bit_array_(end_pos - begin_pos + 1), + vec_e_align_bit_array_(e_num_word) { int len = end_pos - begin_pos + 1; align_.reserve(1.5 * len); } - ~TargetTranslation() { - for (size_t i = 0; i < vec_f_align_bit_array_.size(); i++) - if (vec_f_align_bit_array_[i] != NULL) delete vec_f_align_bit_array_[i]; - - for (size_t i = 0; i < vec_e_align_bit_array_.size(); i++) - if (vec_e_align_bit_array_[i] != NULL) delete vec_e_align_bit_array_[i]; - - for (size_t i = 0; i < align_.size(); i++) delete align_[i]; - } void InsertAlignmentPoint(int s, int t) { int i = s - begin_pos_; - SBitArray*& b = vec_f_align_bit_array_[i]; - if (b == NULL) b = new SBitArray(e_num_words_); - b->Set(t, 1); + vector& b = vec_f_align_bit_array_[i]; + if (b.empty()) b.resize(e_num_words_); + b[t] = 1; - SBitArray*& a = vec_e_align_bit_array_[t]; - if (a == NULL) a = new SBitArray(end_pos_ - begin_pos_ + 1); - a->Set(i, 1); + vector& a = vec_e_align_bit_array_[t]; + if (a.empty()) a.resize(end_pos_ - begin_pos_ + 1); + a[i] = 1; - align_.push_back(new AlignmentPoint(s, t)); + align_.push_back({s, t}); if (t > vec_right_most_[i]) vec_right_most_[i] = t; if (t < vec_left_most_[i]) vec_left_most_[i] = t; @@ -229,10 +112,10 @@ struct TargetTranslation { if (target_begin == -1) return "0"; for (int i = target_begin; i <= target_end; i++) { - if (vec_e_align_bit_array_[i] == NULL) continue; + if (vec_e_align_bit_array_[i].empty()) continue; int j = begin; for (; j <= end; j++) { - if (vec_e_align_bit_array_[i]->Get(j - begin_pos_)) break; + if (vec_e_align_bit_array_[i][j - begin_pos_]) break; } if (j == end + 1) // e[i] is aligned, but e[i] doesn't align to any // source word in [begin_pos, end_pos] @@ -247,10 +130,10 @@ struct TargetTranslation { if (target_begin == -1) return "Unaligned"; for (int i = target_begin; i <= target_end; i++) { - if (vec_e_align_bit_array_[i] == NULL) continue; + if (vec_e_align_bit_array_[i].empty()) continue; int j = begin; for (; j <= end; j++) { - if (vec_e_align_bit_array_[i]->Get(j - begin_pos_)) break; + if (vec_e_align_bit_array_[i][j - begin_pos_]) break; } if (j == end + 1) // e[i] is aligned, but e[i] doesn't align to any // source word in [begin_pos, end_pos] @@ -277,17 +160,17 @@ struct TargetTranslation { target_end = -1; for (int i = begin - begin_pos_; i < end - begin_pos_ + 1; i++) { - if (vec_f_align_bit_array_[i] == NULL) continue; + if (vec_f_align_bit_array_[i].empty()) continue; for (int j = 0; j < target_begin; j++) - if (vec_f_align_bit_array_[i]->Get(j)) { + if (vec_f_align_bit_array_[i][j]) { target_begin = j; break; } } for (int i = end - begin_pos_; i > begin - begin_pos_ - 1; i--) { - if (vec_f_align_bit_array_[i] == NULL) continue; + if (vec_f_align_bit_array_[i].empty()) continue; for (int j = e_num_words_ - 1; j > target_end; j--) - if (vec_f_align_bit_array_[i]->Get(j)) { + if (vec_f_align_bit_array_[i][j]) { target_end = j; break; } @@ -299,13 +182,13 @@ struct TargetTranslation { const uint16_t begin_pos_, end_pos_; // the position in parse const uint16_t input_begin_pos_, input_end_pos_; // the position in input const uint16_t e_num_words_; - vector align_; + vector align_; private: vector vec_left_most_; vector vec_right_most_; - vector vec_f_align_bit_array_; - vector vec_e_align_bit_array_; + vector > vec_f_align_bit_array_; + vector > vec_e_align_bit_array_; }; struct FocusedConstituent { @@ -672,10 +555,10 @@ struct ConstReorderFeatureImpl { // i aligns j int eindex = e_index[j]; - const vector& align = + const vector& align = vec_node[-1 * edge.rule_->e_[j]]->align_; for (size_t k = 0; k < align.size(); k++) { - remnant[0]->InsertAlignmentPoint(align[k]->s_, eindex + align[k]->t_); + remnant[0]->InsertAlignmentPoint(align[k].s_, eindex + align[k].t_); } } for (size_t i = 0; i < edge.rule_->a_.size(); i++) { -- cgit v1.2.3 From ada5492cb0a1e866d85f8107520e139e06e0b70c Mon Sep 17 00:00:00 2001 From: "Wu, Ke" Date: Sat, 6 Dec 2014 17:24:50 -0500 Subject: Remove index mapping --- decoder/ff_const_reorder.cc | 146 ++------------------------------------------ 1 file changed, 5 insertions(+), 141 deletions(-) diff --git a/decoder/ff_const_reorder.cc b/decoder/ff_const_reorder.cc index 695c4de9..8874353d 100644 --- a/decoder/ff_const_reorder.cc +++ b/decoder/ff_const_reorder.cc @@ -240,123 +240,6 @@ struct FocusedSRL { vector focus_predicates_; }; -/* - * Note: - * In BOLT experiments, we need to merged some sequence words into one term - *(like from "1999 nian 1 yue 10 ri" to "1999_nian_1_yue_10_ri") due to some - *reasons; - * but in the parse file, we still use the parse tree before merging any - *words; - * therefore, the words in source sentence and parse tree diverse and we - *need to map a word in merged sentence into its original index; - * a word in source sentence maps 1 or more words in parse tree - * the index map info is stored at variable index_map_; - * if the index_map_ is NULL, indicating the word index in source sentence - *and parse tree is always same. - * - * In ConstReorderFeatureImpl, as to store alignment info, we use the word - *index of the parse tree - */ - -struct SIndexMap { - SIndexMap(const string& index_map_file) { - if (index_map_file == "") { - index_map_input_2_parse = NULL; - index_map_parse_2_input = NULL; - return; - } - vector terms; - { - ReadFile file(index_map_file); - string line; - assert(getline(*file.stream(), line)); - SplitOnWhitespace(line, &terms); - } - - index_map_input_2_parse = new short int[terms.size() + 1]; - int ix = 0; - size_t i; - for (i = 0; i < terms.size(); i++) { - index_map_input_2_parse[i] = ix; - ix += atoi(terms[i].c_str()); - } - index_map_input_2_parse[i] = ix; - // assert(ix == parsed_tree_->m_vecTerminals.size()); - - index_map_parse_2_input = new short int[ix + 1]; - int jx = 0; - for (i = 0; i < terms.size(); i++) { - int num_word = atoi(terms[i].c_str()); - for (int j = 0; j < num_word; j++) index_map_parse_2_input[jx++] = i; - } - index_map_parse_2_input[jx] = i; - assert(jx == ix); - } - - ~SIndexMap() { - if (index_map_input_2_parse != NULL) delete index_map_input_2_parse; - if (index_map_parse_2_input != NULL) delete index_map_parse_2_input; - } - - /* - * an input word maps to 1 or more words in parse - */ - void MapIndex_Input_2_Parse(short int ix, short int& mapped_begin, - short int& mapped_end) { - MapIndex_Input_2_Parse(ix, ix, mapped_begin, mapped_end); - } - - /* - * given the indices in input, - * return the indices in parse tree - */ - void MapIndex_Input_2_Parse(short int begin, short int end, - short int& mapped_begin, short int& mapped_end) { - if (index_map_input_2_parse == NULL) { - mapped_begin = begin; - mapped_end = end; - return; - } - - mapped_begin = index_map_input_2_parse[begin]; - mapped_end = index_map_input_2_parse[end + 1] - 1; - } - - /* - * given the indices in input, - * return the indices in parse tree - */ - void MapIndex_Parse_2_Input(short int mapped_begin, short int mapped_end, - short int& begin, short int& end) { - if (index_map_parse_2_input == NULL) { - begin = mapped_begin; - end = mapped_end; - return; - } - - begin = index_map_parse_2_input[mapped_begin]; - end = index_map_parse_2_input[mapped_end]; - - assert(mapped_begin == 0 || index_map_parse_2_input[mapped_begin - 1] != - index_map_parse_2_input[mapped_begin]); - assert(index_map_parse_2_input[mapped_end + 1] != - index_map_parse_2_input[mapped_end]); - } - - /* - * given a index in input - * return the number of its corresponding words in parse tree - */ - int MapIndexWordCount(int ix) { - if (index_map_input_2_parse == NULL) return 1; - return index_map_input_2_parse[ix + 1] - index_map_input_2_parse[ix]; - } - - private: - short int* index_map_input_2_parse; - short int* index_map_parse_2_input; -}; - struct ConstReorderFeatureImpl { ConstReorderFeatureImpl(const std::string& param) { @@ -406,7 +289,6 @@ struct ConstReorderFeatureImpl { parsed_tree_ = NULL; focused_consts_ = NULL; - index_map_ = NULL; srl_sentence_ = NULL; focused_srl_ = NULL; @@ -435,8 +317,7 @@ struct ConstReorderFeatureImpl { static int ReserveStateSize() { return 1 * sizeof(TargetTranslation*); } void InitializeInputSentence(const std::string& parse_file, - const std::string& srl_file, - const std::string& index_map_file) { + const std::string& srl_file) { FreeSentenceVariables(); if (b_srl_block_feature_ || b_srl_order_feature_) { assert(srl_file != ""); @@ -469,8 +350,6 @@ struct ConstReorderFeatureImpl { } } - index_map_ = new SIndexMap(index_map_file); - if (parsed_tree_ != NULL) { size_t i = parsed_tree_->m_vecTerminals.size(); vec_target_tran_.reserve(20 * i * i * i); @@ -484,9 +363,7 @@ struct ConstReorderFeatureImpl { void* state) { if (parsed_tree_ == NULL) return; - short int mapped_begin, mapped_end; - index_map_->MapIndex_Input_2_Parse(edge.i_, edge.j_ - 1, mapped_begin, - mapped_end); + short int mapped_begin = edge.i_, mapped_end = edge.j_ - 1; typedef TargetTranslation* PtrTargetTranslation; PtrTargetTranslation* remnant = @@ -562,16 +439,9 @@ struct ConstReorderFeatureImpl { } } for (size_t i = 0; i < edge.rule_->a_.size(); i++) { - short int parse_index_begin, parse_index_end; - index_map_->MapIndex_Input_2_Parse(f_index[edge.rule_->a_[i].s_], - parse_index_begin, parse_index_end); - int findex = parse_index_begin; + int findex = f_index[edge.rule_->a_[i].s_]; int eindex = e_index[edge.rule_->a_[i].t_]; - int word_count = - index_map_->MapIndexWordCount(f_index[edge.rule_->a_[i].s_]); - assert(word_count == parse_index_end - parse_index_begin + 1); - for (int i = 0; i < word_count; i++) - remnant[0]->InsertAlignmentPoint(findex + i, eindex); + remnant[0]->InsertAlignmentPoint(findex, eindex); } // till now, we finished setting state values @@ -946,9 +816,6 @@ struct ConstReorderFeatureImpl { delete vec_target_tran_[i]; vec_target_tran_.clear(); - if (index_map_ != NULL) delete index_map_; - index_map_ = NULL; - if (map_left_ != NULL) delete map_left_; map_left_ = NULL; if (map_right_ != NULL) delete map_right_; @@ -1193,8 +1060,6 @@ struct ConstReorderFeatureImpl { SSrlSentence* srl_sentence_; FocusedSRL* focused_srl_; - SIndexMap* index_map_; - Dict* dict_block_status_; }; @@ -1214,8 +1079,7 @@ void ConstReorderFeature::PrepareForInput(const SentenceMetadata& smeta) { string srl_file = smeta.GetSGMLValue("srl"); assert(!(parse_file == "" && srl_file == "")); - string indexmap_file = smeta.GetSGMLValue("index-map"); - pimpl_->InitializeInputSentence(parse_file, srl_file, indexmap_file); + pimpl_->InitializeInputSentence(parse_file, srl_file); } void ConstReorderFeature::TraversalFeaturesImpl( -- cgit v1.2.3 From b1e5513ab3012fa58713a64710a641f42161f9d5 Mon Sep 17 00:00:00 2001 From: "Wu, Ke" Date: Sun, 7 Dec 2014 11:01:04 -0500 Subject: Remove unused field in TargetTranslation --- decoder/ff_const_reorder.cc | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/decoder/ff_const_reorder.cc b/decoder/ff_const_reorder.cc index 8874353d..95546793 100644 --- a/decoder/ff_const_reorder.cc +++ b/decoder/ff_const_reorder.cc @@ -66,13 +66,9 @@ inline void NewAndCopyCharArray(char** p, const char* q) { // TODO:to make the alignment more efficient struct TargetTranslation { - typedef vector SingleWordAlign; - TargetTranslation(int begin_pos, int end_pos, int input_begin_pos, - int input_end_pos, int e_num_word) + TargetTranslation(int begin_pos, int end_pos,int e_num_word) : begin_pos_(begin_pos), end_pos_(end_pos), - input_begin_pos_(input_begin_pos), - input_end_pos_(input_end_pos), e_num_words_(e_num_word), vec_left_most_(end_pos - begin_pos + 1, e_num_word), vec_right_most_(end_pos - begin_pos + 1, -1), @@ -179,8 +175,7 @@ struct TargetTranslation { if (target_end == -1) target_begin = -1; } - const uint16_t begin_pos_, end_pos_; // the position in parse - const uint16_t input_begin_pos_, input_end_pos_; // the position in input + const uint16_t begin_pos_, end_pos_; // the position in input const uint16_t e_num_words_; vector align_; @@ -304,6 +299,7 @@ struct ConstReorderFeatureImpl { dict_block_status_->Convert("Discon't", false); dict_block_status_->Convert("Con't", false); } + ~ConstReorderFeatureImpl() { if (const_reorder_classifier_left_) delete const_reorder_classifier_left_; if (const_reorder_classifier_right_) delete const_reorder_classifier_right_; @@ -363,7 +359,7 @@ struct ConstReorderFeatureImpl { void* state) { if (parsed_tree_ == NULL) return; - short int mapped_begin = edge.i_, mapped_end = edge.j_ - 1; + short int begin = edge.i_, end = edge.j_ - 1; typedef TargetTranslation* PtrTargetTranslation; PtrTargetTranslation* remnant = @@ -383,8 +379,7 @@ struct ConstReorderFeatureImpl { e_num_word--; } - remnant[0] = new TargetTranslation(mapped_begin, mapped_end, edge.i_, - edge.j_ - 1, e_num_word); + remnant[0] = new TargetTranslation(begin, end, e_num_word); vec_target_tran_.push_back(remnant[0]); // reset the alignment @@ -398,7 +393,7 @@ struct ConstReorderFeatureImpl { f_index[i] = index; const WordID& c = rule->f_[i]; if (c < 1) - index = vec_node[vc++]->input_end_pos_ + 1; + index = vec_node[vc++]->end_pos_ + 1; else index++; } @@ -446,11 +441,11 @@ struct ConstReorderFeatureImpl { // till now, we finished setting state values // next, use the state values to calculate constituent reorder feature - SetConstReorderFeature(mapped_begin, mapped_end, features, remnant[0], + SetConstReorderFeature(begin, end, features, remnant[0], vec_node, f_index); } - void SetConstReorderFeature(short int mapped_begin, short int mapped_end, + void SetConstReorderFeature(short int begin, short int end, SparseVector* features, const TargetTranslation* target_translation, const vector& vec_node, @@ -459,7 +454,7 @@ struct ConstReorderFeatureImpl { double logprob_srl_reorder_left = 0.0, logprob_srl_reorder_right = 0.0; for (size_t i = 0; i < focused_srl_->focus_predicates_.size(); i++) { const FocusedPredicate* pred = focused_srl_->focus_predicates_[i]; - if (!is_overlap(mapped_begin, mapped_end, pred->begin_, pred->end_)) + if (!is_overlap(begin, end, pred->begin_, pred->end_)) continue; // have no overlap between this predicate (with its // argument) and the current edge @@ -475,7 +470,7 @@ struct ConstReorderFeatureImpl { vecBlockStatus.reserve(pred->vec_items_.size()); for (j = 0; j < pred->vec_items_.size(); j++) { const STreeItem* con1 = pred->vec_items_[j]->tree_item_; - if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) { + if (con1->m_iBegin < begin || con1->m_iEnd > end) { vecBlockStatus.push_back(0); continue; } // the node is partially outside the current edge @@ -507,7 +502,7 @@ struct ConstReorderFeatureImpl { vecRelativeRightPosition.reserve(pred->vec_items_.size()); for (j = 0; j < pred->vec_items_.size(); j++) { const STreeItem* con1 = pred->vec_items_[j]->tree_item_; - if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) { + if (con1->m_iBegin < begin || con1->m_iEnd > end) { vecPosition.push_back(-1); vecRightPosition.push_back(-1); continue; @@ -525,7 +520,7 @@ struct ConstReorderFeatureImpl { const STreeItem* con1 = pred->vec_items_[j - 1]->tree_item_; const STreeItem* con2 = pred->vec_items_[j]->tree_item_; - if (con1->m_iBegin < mapped_begin || con2->m_iEnd > mapped_end) + if (con1->m_iBegin < begin || con2->m_iEnd > end) continue; // one of the two nodes is partially outside the current // edge @@ -577,7 +572,7 @@ struct ConstReorderFeatureImpl { for (size_t i = 0; i < focused_consts_->focus_parents_.size(); i++) { STreeItem* parent = focused_consts_->focus_parents_[i]; - if (!is_overlap(mapped_begin, mapped_end, parent->m_iBegin, + if (!is_overlap(begin, end, parent->m_iBegin, parent->m_iEnd)) continue; // have no overlap between this parent node and the current // edge @@ -591,8 +586,8 @@ struct ConstReorderFeatureImpl { if (j < vec_node.size()) continue; if (b_block_feature_) { - if (parent->m_iBegin >= mapped_begin && - parent->m_iEnd <= mapped_end) { + if (parent->m_iBegin >= begin && + parent->m_iEnd <= end) { string type = target_translation->IsTargetConstinousSpan2( parent->m_iBegin, parent->m_iEnd); int f_id = FD::Convert(string(parent->m_pszTerm) + type); @@ -607,7 +602,7 @@ struct ConstReorderFeatureImpl { for (j = 0; j < parent->m_vecChildren.size(); j++) { STreeItem* con1 = parent->m_vecChildren[j]; - if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) { + if (con1->m_iBegin < begin || con1->m_iEnd > end) { vecChunkBlock.push_back(0); continue; } // the node is partially outside the current edge @@ -640,7 +635,7 @@ struct ConstReorderFeatureImpl { vecRelativeRightPosition.reserve(parent->m_vecChildren.size()); for (j = 0; j < parent->m_vecChildren.size(); j++) { STreeItem* con1 = parent->m_vecChildren[j]; - if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) { + if (con1->m_iBegin < begin || con1->m_iEnd > end) { vecPosition.push_back(-1); vecRightPosition.push_back(-1); continue; @@ -658,7 +653,7 @@ struct ConstReorderFeatureImpl { STreeItem* con1 = parent->m_vecChildren[j - 1]; STreeItem* con2 = parent->m_vecChildren[j]; - if (con1->m_iBegin < mapped_begin || con2->m_iEnd > mapped_end) + if (con1->m_iBegin < begin || con2->m_iEnd > end) continue; // one of the two nodes is partially outside the current // edge -- cgit v1.2.3