diff options
| author | Wu, Ke <wuke@cs.umd.edu> | 2014-12-06 17:24:50 -0500 | 
|---|---|---|
| committer | Wu, Ke <wuke@cs.umd.edu> | 2014-12-06 17:24:50 -0500 | 
| commit | 83a7d742c24f7401d7ff29ff362903cce98d8cb0 (patch) | |
| tree | b77849e56b66e286688d0f2450754aa4d9294422 | |
| parent | 6dbd2ee6635ec3d46349b04fe2ff74bd8a60beba (diff) | |
Remove index mapping
| -rw-r--r-- | decoder/ff_const_reorder.cc | 146 | 
1 files changed, 5 insertions, 141 deletions
| diff --git a/decoder/ff_const_reorder.cc b/decoder/ff_const_reorder.cc index 695c4de9..8874353d 100644 --- a/decoder/ff_const_reorder.cc +++ b/decoder/ff_const_reorder.cc @@ -240,123 +240,6 @@ struct FocusedSRL {    vector<const FocusedPredicate*> focus_predicates_;  }; -/* - * Note: - *      In BOLT experiments, we need to merged some sequence words into one term - *(like from "1999 nian 1 yue 10 ri" to "1999_nian_1_yue_10_ri") due to some - *reasons; - *      but in the parse file, we still use the parse tree before merging any - *words; - *      therefore, the words in source sentence and parse tree diverse and we - *need to map a word in merged sentence into its original index; - *      a word in source sentence maps 1 or more words in parse tree - *      the index map info is stored at variable index_map_; - *      if the index_map_ is NULL, indicating the word index in source sentence - *and parse tree is always same. - * - *      In ConstReorderFeatureImpl, as to store alignment info, we use the word - *index of the parse tree - */ - -struct SIndexMap { -  SIndexMap(const string& index_map_file) { -    if (index_map_file == "") { -      index_map_input_2_parse = NULL; -      index_map_parse_2_input = NULL; -      return; -    } -    vector<string> terms; -    { -      ReadFile file(index_map_file); -      string line; -      assert(getline(*file.stream(), line)); -      SplitOnWhitespace(line, &terms); -    } - -    index_map_input_2_parse = new short int[terms.size() + 1]; -    int ix = 0; -    size_t i; -    for (i = 0; i < terms.size(); i++) { -      index_map_input_2_parse[i] = ix; -      ix += atoi(terms[i].c_str()); -    } -    index_map_input_2_parse[i] = ix; -    // assert(ix == parsed_tree_->m_vecTerminals.size()); - -    index_map_parse_2_input = new short int[ix + 1]; -    int jx = 0; -    for (i = 0; i < terms.size(); i++) { -      int num_word = atoi(terms[i].c_str()); -      for (int j = 0; j < num_word; j++) index_map_parse_2_input[jx++] = i; -    } -    index_map_parse_2_input[jx] = i; -    assert(jx == ix); -  } - -  ~SIndexMap() { -    if (index_map_input_2_parse != NULL) delete index_map_input_2_parse; -    if (index_map_parse_2_input != NULL) delete index_map_parse_2_input; -  } - -  /* -   * an input word maps to 1 or more words in parse -   */ -  void MapIndex_Input_2_Parse(short int ix, short int& mapped_begin, -                              short int& mapped_end) { -    MapIndex_Input_2_Parse(ix, ix, mapped_begin, mapped_end); -  } - -  /* -   * given the indices in input, -   * return the indices in parse tree -   */ -  void MapIndex_Input_2_Parse(short int begin, short int end, -                              short int& mapped_begin, short int& mapped_end) { -    if (index_map_input_2_parse == NULL) { -      mapped_begin = begin; -      mapped_end = end; -      return; -    } - -    mapped_begin = index_map_input_2_parse[begin]; -    mapped_end = index_map_input_2_parse[end + 1] - 1; -  } - -  /* -   * given the indices in input, -   * return the indices in parse tree -   */ -  void MapIndex_Parse_2_Input(short int mapped_begin, short int mapped_end, -                              short int& begin, short int& end) { -    if (index_map_parse_2_input == NULL) { -      begin = mapped_begin; -      end = mapped_end; -      return; -    } - -    begin = index_map_parse_2_input[mapped_begin]; -    end = index_map_parse_2_input[mapped_end]; - -    assert(mapped_begin == 0 || index_map_parse_2_input[mapped_begin - 1] != -                                    index_map_parse_2_input[mapped_begin]); -    assert(index_map_parse_2_input[mapped_end + 1] != -           index_map_parse_2_input[mapped_end]); -  } - -  /* -   * given a index in input -   * return the number of its corresponding words in parse tree -   */ -  int MapIndexWordCount(int ix) { -    if (index_map_input_2_parse == NULL) return 1; -    return index_map_input_2_parse[ix + 1] - index_map_input_2_parse[ix]; -  } - - private: -  short int* index_map_input_2_parse; -  short int* index_map_parse_2_input; -}; -  struct ConstReorderFeatureImpl {    ConstReorderFeatureImpl(const std::string& param) { @@ -406,7 +289,6 @@ struct ConstReorderFeatureImpl {      parsed_tree_ = NULL;      focused_consts_ = NULL; -    index_map_ = NULL;      srl_sentence_ = NULL;      focused_srl_ = NULL; @@ -435,8 +317,7 @@ struct ConstReorderFeatureImpl {    static int ReserveStateSize() { return 1 * sizeof(TargetTranslation*); }    void InitializeInputSentence(const std::string& parse_file, -                               const std::string& srl_file, -                               const std::string& index_map_file) { +                               const std::string& srl_file) {      FreeSentenceVariables();      if (b_srl_block_feature_ || b_srl_order_feature_) {        assert(srl_file != ""); @@ -469,8 +350,6 @@ struct ConstReorderFeatureImpl {        }      } -    index_map_ = new SIndexMap(index_map_file); -      if (parsed_tree_ != NULL) {        size_t i = parsed_tree_->m_vecTerminals.size();        vec_target_tran_.reserve(20 * i * i * i); @@ -484,9 +363,7 @@ struct ConstReorderFeatureImpl {                                void* state) {      if (parsed_tree_ == NULL) return; -    short int mapped_begin, mapped_end; -    index_map_->MapIndex_Input_2_Parse(edge.i_, edge.j_ - 1, mapped_begin, -                                       mapped_end); +    short int mapped_begin = edge.i_, mapped_end = edge.j_ - 1;      typedef TargetTranslation* PtrTargetTranslation;      PtrTargetTranslation* remnant = @@ -562,16 +439,9 @@ struct ConstReorderFeatureImpl {        }      }      for (size_t i = 0; i < edge.rule_->a_.size(); i++) { -      short int parse_index_begin, parse_index_end; -      index_map_->MapIndex_Input_2_Parse(f_index[edge.rule_->a_[i].s_], -                                         parse_index_begin, parse_index_end); -      int findex = parse_index_begin; +      int findex = f_index[edge.rule_->a_[i].s_];        int eindex = e_index[edge.rule_->a_[i].t_]; -      int word_count = -          index_map_->MapIndexWordCount(f_index[edge.rule_->a_[i].s_]); -      assert(word_count == parse_index_end - parse_index_begin + 1); -      for (int i = 0; i < word_count; i++) -        remnant[0]->InsertAlignmentPoint(findex + i, eindex); +      remnant[0]->InsertAlignmentPoint(findex, eindex);      }      // till now, we finished setting state values @@ -946,9 +816,6 @@ struct ConstReorderFeatureImpl {        delete vec_target_tran_[i];      vec_target_tran_.clear(); -    if (index_map_ != NULL) delete index_map_; -    index_map_ = NULL; -      if (map_left_ != NULL) delete map_left_;      map_left_ = NULL;      if (map_right_ != NULL) delete map_right_; @@ -1193,8 +1060,6 @@ struct ConstReorderFeatureImpl {    SSrlSentence* srl_sentence_;    FocusedSRL* focused_srl_; -  SIndexMap* index_map_; -    Dict* dict_block_status_;  }; @@ -1214,8 +1079,7 @@ void ConstReorderFeature::PrepareForInput(const SentenceMetadata& smeta) {    string srl_file = smeta.GetSGMLValue("srl");    assert(!(parse_file == "" && srl_file == "")); -  string indexmap_file = smeta.GetSGMLValue("index-map"); -  pimpl_->InitializeInputSentence(parse_file, srl_file, indexmap_file); +  pimpl_->InitializeInputSentence(parse_file, srl_file);  }  void ConstReorderFeature::TraversalFeaturesImpl( | 
