diff options
| author | Wu, Ke <wuke@cs.umd.edu> | 2014-12-09 10:38:37 -0500 | 
|---|---|---|
| committer | Wu, Ke <wuke@cs.umd.edu> | 2014-12-09 10:38:37 -0500 | 
| commit | 99a1107282cfb6e81e1781e0b0af35379615e306 (patch) | |
| tree | 113e4c80a4cda2952d28e30c1388223fd59f53d7 | |
| parent | 9dff48152f8139c944ef15c658d939060e52b43c (diff) | |
| parent | 8fb14af5dfb8032e2fb4fe6e94ed983baf39de0b (diff) | |
Merge branch 'const_reorder_no_state' into softsyn
| -rw-r--r-- | decoder/ff_const_reorder.cc | 346 | 
1 files changed, 44 insertions, 302 deletions
| diff --git a/decoder/ff_const_reorder.cc b/decoder/ff_const_reorder.cc index e4fb4725..95546793 100644 --- a/decoder/ff_const_reorder.cc +++ b/decoder/ff_const_reorder.cc @@ -17,114 +17,6 @@ using namespace const_reorder;  typedef HASH_MAP<std::string, vector<double> > MapClassifier; -struct SBitArray { -  SBitArray(int size) : size_(size) { -    int bit_size = size / 8; -    if (size % 8 > 0) bit_size++; - -    char_ = new unsigned char[bit_size]; -    memset(char_, 0, bit_size); -  } -  ~SBitArray() { delete[] char_; } - -  int Get(int index) const { -    int i; - -    i = index; -    if (i < 0) i = size_ + i; -    assert(i > -1 && i < size_); - -    int byte_index, bit_index; -    byte_index = i / 8; -    bit_index = i % 8; -    unsigned char res; -    if (bit_index == 0) -      res = char_[byte_index] & 0x01; -    else if (bit_index == 1) -      res = char_[byte_index] & 0x02; -    else if (bit_index == 2) -      res = char_[byte_index] & 0x04; -    else if (bit_index == 3) -      res = char_[byte_index] & 0x08; -    else if (bit_index == 4) -      res = char_[byte_index] & 0x10; -    else if (bit_index == 5) -      res = char_[byte_index] & 0x20; -    else if (bit_index == 6) -      res = char_[byte_index] & 0x40; -    else if (bit_index == 7) -      res = char_[byte_index] & 0x80; -    else -      assert(false); -    if (res != 0) -      return 1; -    else -      return 0; -  } - -  void Set(int index, int val) { -    assert(val == 0 || val == 1); -    int i; - -    i = index; -    if (i < 0) i = size_ + i; -    assert(i > -1 && i < size_); - -    int byte_index, bit_index; -    byte_index = i / 8; -    bit_index = i % 8; -    unsigned char res; - -    if (bit_index == 0) { -      if (val == 0) -        res = char_[byte_index] & 0xFE; -      else -        res = char_[byte_index] | 0x01; -    } else if (bit_index == 1) { -      if (val == 0) -        res = char_[byte_index] & 0xFD; -      else -        res = char_[byte_index] | 0x02; -    } else if (bit_index == 2) { -      if (val == 0) -        res = char_[byte_index] & 0xFB; -      else -        res = char_[byte_index] | 0x04; -    } else if (bit_index == 3) { -      if (val == 0) -        res = char_[byte_index] & 0xF7; -      else -        res = char_[byte_index] | 0x08; -    } else if (bit_index == 4) { -      if (val == 0) -        res = char_[byte_index] & 0xEF; -      else -        res = char_[byte_index] | 0x10; -    } else if (bit_index == 5) { -      if (val == 0) -        res = char_[byte_index] & 0xDF; -      else -        res = char_[byte_index] | 0x20; -    } else if (bit_index == 6) { -      if (val == 0) -        res = char_[byte_index] & 0xBF; -      else -        res = char_[byte_index] | 0x40; -    } else if (bit_index == 7) { -      if (val == 0) -        res = char_[byte_index] & 0x7F; -      else -        res = char_[byte_index] | 0x80; -    } else -      assert(false); -    char_[byte_index] = res; -  } - - private: -  const int size_; -  unsigned char* char_; -}; -  inline bool is_inside(int i, int left, int right) {    if (i < left || i > right) return false;    return true; @@ -174,43 +66,30 @@ inline void NewAndCopyCharArray(char** p, const char* q) {  // TODO:to make the alignment more efficient  struct TargetTranslation { -  typedef vector<int> SingleWordAlign; -  TargetTranslation(int begin_pos, int end_pos, int input_begin_pos, -                    int input_end_pos, int e_num_word) +  TargetTranslation(int begin_pos, int end_pos,int e_num_word)        : begin_pos_(begin_pos),          end_pos_(end_pos), -        input_begin_pos_(input_begin_pos), -        input_end_pos_(input_end_pos),          e_num_words_(e_num_word),          vec_left_most_(end_pos - begin_pos + 1, e_num_word),          vec_right_most_(end_pos - begin_pos + 1, -1), -        vec_f_align_bit_array_(end_pos - begin_pos + 1, NULL), -        vec_e_align_bit_array_(e_num_word, NULL) { +        vec_f_align_bit_array_(end_pos - begin_pos + 1), +        vec_e_align_bit_array_(e_num_word) {      int len = end_pos - begin_pos + 1;      align_.reserve(1.5 * len);    } -  ~TargetTranslation() { -    for (size_t i = 0; i < vec_f_align_bit_array_.size(); i++) -      if (vec_f_align_bit_array_[i] != NULL) delete vec_f_align_bit_array_[i]; - -    for (size_t i = 0; i < vec_e_align_bit_array_.size(); i++) -      if (vec_e_align_bit_array_[i] != NULL) delete vec_e_align_bit_array_[i]; - -    for (size_t i = 0; i < align_.size(); i++) delete align_[i]; -  }    void InsertAlignmentPoint(int s, int t) {      int i = s - begin_pos_; -    SBitArray*& b = vec_f_align_bit_array_[i]; -    if (b == NULL) b = new SBitArray(e_num_words_); -    b->Set(t, 1); +    vector<bool>& b = vec_f_align_bit_array_[i]; +    if (b.empty()) b.resize(e_num_words_); +    b[t] = 1; -    SBitArray*& a = vec_e_align_bit_array_[t]; -    if (a == NULL) a = new SBitArray(end_pos_ - begin_pos_ + 1); -    a->Set(i, 1); +    vector<bool>& a = vec_e_align_bit_array_[t]; +    if (a.empty()) a.resize(end_pos_ - begin_pos_ + 1); +    a[i] = 1; -    align_.push_back(new AlignmentPoint(s, t)); +    align_.push_back({s, t});      if (t > vec_right_most_[i]) vec_right_most_[i] = t;      if (t < vec_left_most_[i]) vec_left_most_[i] = t; @@ -229,10 +108,10 @@ struct TargetTranslation {      if (target_begin == -1) return "0";      for (int i = target_begin; i <= target_end; i++) { -      if (vec_e_align_bit_array_[i] == NULL) continue; +      if (vec_e_align_bit_array_[i].empty()) continue;        int j = begin;        for (; j <= end; j++) { -        if (vec_e_align_bit_array_[i]->Get(j - begin_pos_)) break; +        if (vec_e_align_bit_array_[i][j - begin_pos_]) break;        }        if (j == end + 1)  // e[i] is aligned, but e[i] doesn't align to any                           // source word in [begin_pos, end_pos] @@ -247,10 +126,10 @@ struct TargetTranslation {      if (target_begin == -1) return "Unaligned";      for (int i = target_begin; i <= target_end; i++) { -      if (vec_e_align_bit_array_[i] == NULL) continue; +      if (vec_e_align_bit_array_[i].empty()) continue;        int j = begin;        for (; j <= end; j++) { -        if (vec_e_align_bit_array_[i]->Get(j - begin_pos_)) break; +        if (vec_e_align_bit_array_[i][j - begin_pos_]) break;        }        if (j == end + 1)  // e[i] is aligned, but e[i] doesn't align to any                           // source word in [begin_pos, end_pos] @@ -277,17 +156,17 @@ struct TargetTranslation {      target_end = -1;      for (int i = begin - begin_pos_; i < end - begin_pos_ + 1; i++) { -      if (vec_f_align_bit_array_[i] == NULL) continue; +      if (vec_f_align_bit_array_[i].empty()) continue;        for (int j = 0; j < target_begin; j++) -        if (vec_f_align_bit_array_[i]->Get(j)) { +        if (vec_f_align_bit_array_[i][j]) {            target_begin = j;            break;          }      }      for (int i = end - begin_pos_; i > begin - begin_pos_ - 1; i--) { -      if (vec_f_align_bit_array_[i] == NULL) continue; +      if (vec_f_align_bit_array_[i].empty()) continue;        for (int j = e_num_words_ - 1; j > target_end; j--) -        if (vec_f_align_bit_array_[i]->Get(j)) { +        if (vec_f_align_bit_array_[i][j]) {            target_end = j;            break;          } @@ -296,16 +175,15 @@ struct TargetTranslation {      if (target_end == -1) target_begin = -1;    } -  const uint16_t begin_pos_, end_pos_;              // the position in parse -  const uint16_t input_begin_pos_, input_end_pos_;  // the position in input +  const uint16_t begin_pos_, end_pos_;              // the position in input    const uint16_t e_num_words_; -  vector<AlignmentPoint*> align_; +  vector<AlignmentPoint> align_;   private:    vector<short> vec_left_most_;    vector<short> vec_right_most_; -  vector<SBitArray*> vec_f_align_bit_array_; -  vector<SBitArray*> vec_e_align_bit_array_; +  vector<vector<bool> > vec_f_align_bit_array_; +  vector<vector<bool> > vec_e_align_bit_array_;  };  struct FocusedConstituent { @@ -357,123 +235,6 @@ struct FocusedSRL {    vector<const FocusedPredicate*> focus_predicates_;  }; -/* - * Note: - *      In BOLT experiments, we need to merged some sequence words into one term - *(like from "1999 nian 1 yue 10 ri" to "1999_nian_1_yue_10_ri") due to some - *reasons; - *      but in the parse file, we still use the parse tree before merging any - *words; - *      therefore, the words in source sentence and parse tree diverse and we - *need to map a word in merged sentence into its original index; - *      a word in source sentence maps 1 or more words in parse tree - *      the index map info is stored at variable index_map_; - *      if the index_map_ is NULL, indicating the word index in source sentence - *and parse tree is always same. - * - *      In ConstReorderFeatureImpl, as to store alignment info, we use the word - *index of the parse tree - */ - -struct SIndexMap { -  SIndexMap(const string& index_map_file) { -    if (index_map_file == "") { -      index_map_input_2_parse = NULL; -      index_map_parse_2_input = NULL; -      return; -    } -    vector<string> terms; -    { -      ReadFile file(index_map_file); -      string line; -      assert(getline(*file.stream(), line)); -      SplitOnWhitespace(line, &terms); -    } - -    index_map_input_2_parse = new short int[terms.size() + 1]; -    int ix = 0; -    size_t i; -    for (i = 0; i < terms.size(); i++) { -      index_map_input_2_parse[i] = ix; -      ix += atoi(terms[i].c_str()); -    } -    index_map_input_2_parse[i] = ix; -    // assert(ix == parsed_tree_->m_vecTerminals.size()); - -    index_map_parse_2_input = new short int[ix + 1]; -    int jx = 0; -    for (i = 0; i < terms.size(); i++) { -      int num_word = atoi(terms[i].c_str()); -      for (int j = 0; j < num_word; j++) index_map_parse_2_input[jx++] = i; -    } -    index_map_parse_2_input[jx] = i; -    assert(jx == ix); -  } - -  ~SIndexMap() { -    if (index_map_input_2_parse != NULL) delete index_map_input_2_parse; -    if (index_map_parse_2_input != NULL) delete index_map_parse_2_input; -  } - -  /* -   * an input word maps to 1 or more words in parse -   */ -  void MapIndex_Input_2_Parse(short int ix, short int& mapped_begin, -                              short int& mapped_end) { -    MapIndex_Input_2_Parse(ix, ix, mapped_begin, mapped_end); -  } - -  /* -   * given the indices in input, -   * return the indices in parse tree -   */ -  void MapIndex_Input_2_Parse(short int begin, short int end, -                              short int& mapped_begin, short int& mapped_end) { -    if (index_map_input_2_parse == NULL) { -      mapped_begin = begin; -      mapped_end = end; -      return; -    } - -    mapped_begin = index_map_input_2_parse[begin]; -    mapped_end = index_map_input_2_parse[end + 1] - 1; -  } - -  /* -   * given the indices in input, -   * return the indices in parse tree -   */ -  void MapIndex_Parse_2_Input(short int mapped_begin, short int mapped_end, -                              short int& begin, short int& end) { -    if (index_map_parse_2_input == NULL) { -      begin = mapped_begin; -      end = mapped_end; -      return; -    } - -    begin = index_map_parse_2_input[mapped_begin]; -    end = index_map_parse_2_input[mapped_end]; - -    assert(mapped_begin == 0 || index_map_parse_2_input[mapped_begin - 1] != -                                    index_map_parse_2_input[mapped_begin]); -    assert(index_map_parse_2_input[mapped_end + 1] != -           index_map_parse_2_input[mapped_end]); -  } - -  /* -   * given a index in input -   * return the number of its corresponding words in parse tree -   */ -  int MapIndexWordCount(int ix) { -    if (index_map_input_2_parse == NULL) return 1; -    return index_map_input_2_parse[ix + 1] - index_map_input_2_parse[ix]; -  } - - private: -  short int* index_map_input_2_parse; -  short int* index_map_parse_2_input; -}; -  struct ConstReorderFeatureImpl {    ConstReorderFeatureImpl(const std::string& param) { @@ -523,7 +284,6 @@ struct ConstReorderFeatureImpl {      parsed_tree_ = NULL;      focused_consts_ = NULL; -    index_map_ = NULL;      srl_sentence_ = NULL;      focused_srl_ = NULL; @@ -539,6 +299,7 @@ struct ConstReorderFeatureImpl {      dict_block_status_->Convert("Discon't", false);      dict_block_status_->Convert("Con't", false);    } +    ~ConstReorderFeatureImpl() {      if (const_reorder_classifier_left_) delete const_reorder_classifier_left_;      if (const_reorder_classifier_right_) delete const_reorder_classifier_right_; @@ -552,8 +313,7 @@ struct ConstReorderFeatureImpl {    static int ReserveStateSize() { return 1 * sizeof(TargetTranslation*); }    void InitializeInputSentence(const std::string& parse_file, -                               const std::string& srl_file, -                               const std::string& index_map_file) { +                               const std::string& srl_file) {      FreeSentenceVariables();      if (b_srl_block_feature_ || b_srl_order_feature_) {        assert(srl_file != ""); @@ -586,8 +346,6 @@ struct ConstReorderFeatureImpl {        }      } -    index_map_ = new SIndexMap(index_map_file); -      if (parsed_tree_ != NULL) {        size_t i = parsed_tree_->m_vecTerminals.size();        vec_target_tran_.reserve(20 * i * i * i); @@ -601,9 +359,7 @@ struct ConstReorderFeatureImpl {                                void* state) {      if (parsed_tree_ == NULL) return; -    short int mapped_begin, mapped_end; -    index_map_->MapIndex_Input_2_Parse(edge.i_, edge.j_ - 1, mapped_begin, -                                       mapped_end); +    short int begin = edge.i_, end = edge.j_ - 1;      typedef TargetTranslation* PtrTargetTranslation;      PtrTargetTranslation* remnant = @@ -623,8 +379,7 @@ struct ConstReorderFeatureImpl {        e_num_word--;      } -    remnant[0] = new TargetTranslation(mapped_begin, mapped_end, edge.i_, -                                       edge.j_ - 1, e_num_word); +    remnant[0] = new TargetTranslation(begin, end, e_num_word);      vec_target_tran_.push_back(remnant[0]);      // reset the alignment @@ -638,7 +393,7 @@ struct ConstReorderFeatureImpl {        f_index[i] = index;        const WordID& c = rule->f_[i];        if (c < 1) -        index = vec_node[vc++]->input_end_pos_ + 1; +        index = vec_node[vc++]->end_pos_ + 1;        else          index++;      } @@ -672,32 +427,25 @@ struct ConstReorderFeatureImpl {        // i aligns j        int eindex = e_index[j]; -      const vector<AlignmentPoint*>& align = +      const vector<AlignmentPoint>& align =            vec_node[-1 * edge.rule_->e_[j]]->align_;        for (size_t k = 0; k < align.size(); k++) { -        remnant[0]->InsertAlignmentPoint(align[k]->s_, eindex + align[k]->t_); +        remnant[0]->InsertAlignmentPoint(align[k].s_, eindex + align[k].t_);        }      }      for (size_t i = 0; i < edge.rule_->a_.size(); i++) { -      short int parse_index_begin, parse_index_end; -      index_map_->MapIndex_Input_2_Parse(f_index[edge.rule_->a_[i].s_], -                                         parse_index_begin, parse_index_end); -      int findex = parse_index_begin; +      int findex = f_index[edge.rule_->a_[i].s_];        int eindex = e_index[edge.rule_->a_[i].t_]; -      int word_count = -          index_map_->MapIndexWordCount(f_index[edge.rule_->a_[i].s_]); -      assert(word_count == parse_index_end - parse_index_begin + 1); -      for (int i = 0; i < word_count; i++) -        remnant[0]->InsertAlignmentPoint(findex + i, eindex); +      remnant[0]->InsertAlignmentPoint(findex, eindex);      }      // till now, we finished setting state values      // next, use the state values to calculate constituent reorder feature -    SetConstReorderFeature(mapped_begin, mapped_end, features, remnant[0], +    SetConstReorderFeature(begin, end, features, remnant[0],                             vec_node, f_index);    } -  void SetConstReorderFeature(short int mapped_begin, short int mapped_end, +  void SetConstReorderFeature(short int begin, short int end,                                SparseVector<double>* features,                                const TargetTranslation* target_translation,                                const vector<const TargetTranslation*>& vec_node, @@ -706,7 +454,7 @@ struct ConstReorderFeatureImpl {        double logprob_srl_reorder_left = 0.0, logprob_srl_reorder_right = 0.0;        for (size_t i = 0; i < focused_srl_->focus_predicates_.size(); i++) {          const FocusedPredicate* pred = focused_srl_->focus_predicates_[i]; -        if (!is_overlap(mapped_begin, mapped_end, pred->begin_, pred->end_)) +        if (!is_overlap(begin, end, pred->begin_, pred->end_))            continue;  // have no overlap between this predicate (with its                       // argument) and the current edge @@ -722,7 +470,7 @@ struct ConstReorderFeatureImpl {          vecBlockStatus.reserve(pred->vec_items_.size());          for (j = 0; j < pred->vec_items_.size(); j++) {            const STreeItem* con1 = pred->vec_items_[j]->tree_item_; -          if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) { +          if (con1->m_iBegin < begin || con1->m_iEnd > end) {              vecBlockStatus.push_back(0);              continue;            }  // the node is partially outside the current edge @@ -754,7 +502,7 @@ struct ConstReorderFeatureImpl {          vecRelativeRightPosition.reserve(pred->vec_items_.size());          for (j = 0; j < pred->vec_items_.size(); j++) {            const STreeItem* con1 = pred->vec_items_[j]->tree_item_; -          if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) { +          if (con1->m_iBegin < begin || con1->m_iEnd > end) {              vecPosition.push_back(-1);              vecRightPosition.push_back(-1);              continue; @@ -772,7 +520,7 @@ struct ConstReorderFeatureImpl {            const STreeItem* con1 = pred->vec_items_[j - 1]->tree_item_;            const STreeItem* con2 = pred->vec_items_[j]->tree_item_; -          if (con1->m_iBegin < mapped_begin || con2->m_iEnd > mapped_end) +          if (con1->m_iBegin < begin || con2->m_iEnd > end)              continue;  // one of the two nodes is partially outside the current                         // edge @@ -824,7 +572,7 @@ struct ConstReorderFeatureImpl {        for (size_t i = 0; i < focused_consts_->focus_parents_.size(); i++) {          STreeItem* parent = focused_consts_->focus_parents_[i]; -        if (!is_overlap(mapped_begin, mapped_end, parent->m_iBegin, +        if (!is_overlap(begin, end, parent->m_iBegin,                          parent->m_iEnd))            continue;  // have no overlap between this parent node and the current                       // edge @@ -838,8 +586,8 @@ struct ConstReorderFeatureImpl {          if (j < vec_node.size()) continue;          if (b_block_feature_) { -          if (parent->m_iBegin >= mapped_begin && -              parent->m_iEnd <= mapped_end) { +          if (parent->m_iBegin >= begin && +              parent->m_iEnd <= end) {              string type = target_translation->IsTargetConstinousSpan2(                  parent->m_iBegin, parent->m_iEnd);              int f_id = FD::Convert(string(parent->m_pszTerm) + type); @@ -854,7 +602,7 @@ struct ConstReorderFeatureImpl {          for (j = 0; j < parent->m_vecChildren.size(); j++) {            STreeItem* con1 = parent->m_vecChildren[j]; -          if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) { +          if (con1->m_iBegin < begin || con1->m_iEnd > end) {              vecChunkBlock.push_back(0);              continue;            }  // the node is partially outside the current edge @@ -887,7 +635,7 @@ struct ConstReorderFeatureImpl {          vecRelativeRightPosition.reserve(parent->m_vecChildren.size());          for (j = 0; j < parent->m_vecChildren.size(); j++) {            STreeItem* con1 = parent->m_vecChildren[j]; -          if (con1->m_iBegin < mapped_begin || con1->m_iEnd > mapped_end) { +          if (con1->m_iBegin < begin || con1->m_iEnd > end) {              vecPosition.push_back(-1);              vecRightPosition.push_back(-1);              continue; @@ -905,7 +653,7 @@ struct ConstReorderFeatureImpl {            STreeItem* con1 = parent->m_vecChildren[j - 1];            STreeItem* con2 = parent->m_vecChildren[j]; -          if (con1->m_iBegin < mapped_begin || con2->m_iEnd > mapped_end) +          if (con1->m_iBegin < begin || con2->m_iEnd > end)              continue;  // one of the two nodes is partially outside the current                         // edge @@ -1063,9 +811,6 @@ struct ConstReorderFeatureImpl {        delete vec_target_tran_[i];      vec_target_tran_.clear(); -    if (index_map_ != NULL) delete index_map_; -    index_map_ = NULL; -      if (map_left_ != NULL) delete map_left_;      map_left_ = NULL;      if (map_right_ != NULL) delete map_right_; @@ -1310,8 +1055,6 @@ struct ConstReorderFeatureImpl {    SSrlSentence* srl_sentence_;    FocusedSRL* focused_srl_; -  SIndexMap* index_map_; -    Dict* dict_block_status_;  }; @@ -1331,8 +1074,7 @@ void ConstReorderFeature::PrepareForInput(const SentenceMetadata& smeta) {    string srl_file = smeta.GetSGMLValue("srl");    assert(!(parse_file == "" && srl_file == "")); -  string indexmap_file = smeta.GetSGMLValue("index-map"); -  pimpl_->InitializeInputSentence(parse_file, srl_file, indexmap_file); +  pimpl_->InitializeInputSentence(parse_file, srl_file);  }  void ConstReorderFeature::TraversalFeaturesImpl( | 
