diff options
author | Wu, Ke <wuke@cs.umd.edu> | 2014-12-06 16:32:35 -0500 |
---|---|---|
committer | Wu, Ke <wuke@cs.umd.edu> | 2014-12-06 16:32:35 -0500 |
commit | 9dff48152f8139c944ef15c658d939060e52b43c (patch) | |
tree | 28e931474d767cef82e87f7e28de5829011134f4 /decoder/ff_soft_syn.cc | |
parent | cf7cf1e63f034b703adf55ffe72e534854903aee (diff) |
Remove unused code
Diffstat (limited to 'decoder/ff_soft_syn.cc')
-rw-r--r-- | decoder/ff_soft_syn.cc | 329 |
1 files changed, 0 insertions, 329 deletions
diff --git a/decoder/ff_soft_syn.cc b/decoder/ff_soft_syn.cc index c108e58c..970e532b 100644 --- a/decoder/ff_soft_syn.cc +++ b/decoder/ff_soft_syn.cc @@ -324,332 +324,3 @@ boost::shared_ptr<FeatureFunction> SoftSynFeatureFactory::Create( std::string SoftSynFeatureFactory::usage(bool params, bool verbose) const { return SoftSynFeature::usage(params, verbose); } - -typedef HASH_MAP<std::string, double> MapDouble; -typedef HASH_MAP<std::string, MapDouble*> MapDoubleFeatures; - -/* - * Note: - * In BOLT experiments, we need to merged some sequence words into one term - *(like from "1999 nian 1 yue 10 ri" to "1999_nian_1_yue_10_ri") due to some - *reasons; - * but in the parse file, we still use the parse tree before merging any - *words; - * therefore, the words in source sentence and parse tree diverse and we - *need to map a word in merged sentence into its original index; - * a word in source sentence maps 1 or more words in parse tree - * the index map info is stored at variable index_map_; - * if the index_map_ is NULL, indicating the word index in source sentence - *and parse tree is always same. - * - */ - -struct SoftKBestSynFeatureImpl { - SoftKBestSynFeatureImpl(const string& /*params*/) { - index_map_ = NULL; - - map_features_ = NULL; - } - - ~SoftKBestSynFeatureImpl() { FreeSentenceVariables(); } - - void InitializeInputSentence(const std::string& parse_file, - const std::string& index_map_file) { - FreeSentenceVariables(); - ReadParseTree(parse_file, vec_parsed_tree_, vec_tree_prob_); - - if (index_map_file != "") ReadIndexMap(index_map_file); - - // we can do the features "off-line" - map_features_ = new MapDoubleFeatures(); - InitializeFeatures(map_features_); - } - - void SetSoftKBestSynFeature(const Hypergraph::Edge& edge, - SparseVector<double>* features) { - if (vec_parsed_tree_.size() == 0) return; - - short int mapped_begin, mapped_end; - MapIndex(edge.i_, edge.j_ - 1, mapped_begin, mapped_end); - - // soft feature for the whole span - const MapDouble* pMapFeature = - GenerateSoftFeature(mapped_begin, mapped_end, map_features_); - for (MapDouble::const_iterator iter = pMapFeature->begin(); - iter != pMapFeature->end(); iter++) { - int f_id = FD::Convert(iter->first); - if (f_id) features->set_value(f_id, iter->second); - } - } - - private: - void ReadIndexMap(const std::string& index_map_file) { - vector<string> terms; - { - ReadFile file(index_map_file); - string line; - assert(getline(*file.stream(), line)); - SplitOnWhitespace(line, &terms); - } - - index_map_ = new short int[terms.size() + 1]; - int ix = 0; - size_t i; - for (i = 0; i < terms.size(); i++) { - index_map_[i] = ix; - ix += atoi(terms[i].c_str()); - } - index_map_[i] = ix; - assert(vec_parsed_tree_.size() == 0 || - ix == vec_parsed_tree_[0]->m_vecTerminals.size()); - } - - void MapIndex(short int begin, short int end, short int& mapped_begin, - short int& mapped_end) { - if (index_map_ == NULL) { - mapped_begin = begin; - mapped_end = end; - return; - } - - mapped_begin = index_map_[begin]; - mapped_end = index_map_[end + 1] - 1; - } - - /* - * ff_const_reorder.cc::ConstReorderFeatureImpl also defines this function - */ - void FindConsts(const SParsedTree* tree, int begin, int end, - vector<STreeItem*>& consts) { - STreeItem* item; - item = tree->m_vecTerminals[begin]->m_ptParent; - while (true) { - while (item->m_ptParent != NULL && - item->m_ptParent->m_iBegin == item->m_iBegin && - item->m_ptParent->m_iEnd <= end) - item = item->m_ptParent; - - if (item->m_ptParent == NULL && item->m_vecChildren.size() == 1 && - strcmp(item->m_pszTerm, "ROOT") == 0) - item = item->m_vecChildren[0]; // we automatically add a "ROOT" node at - // the top, skip it if necessary. - - consts.push_back(item); - if (item->m_iEnd < end) - item = tree->m_vecTerminals[item->m_iEnd + 1]->m_ptParent; - else - break; - } - } - - /* - * according to Marton & Resnik (2008) - * a span cann't have both X+ style and X= style features - * a constituent XP is crossed only if the span not only covers parts of XP's - *content, but also covers one or more words outside XP - * a span may have X+, Y+ - * - * (note, we refer X* features to X= features in Marton & Resnik (2008)) - */ - void GenerateSoftFeature(int begin, int end, - const vector<SParsedTree*>& vec_tree, - const vector<double>& vec_prob, - MapDouble* pMapFeature) { - - for (size_t i = 0; i < vec_tree.size(); i++) { - const SParsedTree* tree = vec_tree[i]; - vector<STreeItem*> vecNode; - FindConsts(tree, begin, end, vecNode); - - if (vecNode.size() == 1) { - // match to one constituent - string feature_name = string(vecNode[0]->m_pszTerm) + string("*"); - MapDouble::iterator iter = pMapFeature->find(feature_name); - if (iter != pMapFeature->end()) { - iter->second += vec_prob[i]; - } else - (*pMapFeature)[feature_name] = vec_prob[i]; - } else { - // match to multiple constituents, find the lowest common parent (lcp) - STreeItem* lcp = vecNode[0]; - while (lcp->m_iEnd < end) lcp = lcp->m_ptParent; - - for (size_t j = 0; j < vecNode.size(); j++) { - STreeItem* item = vecNode[j]; - - while (item != lcp) { - if (item->m_iBegin < begin || item->m_iEnd > end) { - // item is crossed - string feature_name = string(item->m_pszTerm) + string("+"); - MapDouble::iterator iter = pMapFeature->find(feature_name); - if (iter != pMapFeature->end()) { - iter->second += vec_prob[i]; - } else - (*pMapFeature)[feature_name] = vec_prob[i]; - } - if (item->m_iBrotherIndex > 0 && - item->m_ptParent->m_vecChildren[item->m_iBrotherIndex - 1] - ->m_iBegin >= begin && - item->m_ptParent->m_vecChildren[item->m_iBrotherIndex - 1] - ->m_iEnd <= end) - break; // we don't want to collect crossed constituents twice - item = item->m_ptParent; - } - } - } - } - } - - const MapDouble* GenerateSoftFeature(int begin, int end, - MapDoubleFeatures* map_features) { - string key; - GenerateKey(begin, end, key); - MapDoubleFeatures::const_iterator iter = (*map_features).find(key); - assert(iter != map_features->end()); - return iter->second; - } - - void Byte_to_Char(unsigned char* str, int n) { - str[0] = (n & 255); - str[1] = n / 256; - } - - void GenerateKey(int begin, int end, string& key) { - unsigned char szTerm[1001]; - Byte_to_Char(szTerm, begin); - Byte_to_Char(szTerm + 2, end); - szTerm[4] = '\0'; - key = string(szTerm, szTerm + 4); - } - - void InitializeFeatures(MapDoubleFeatures* map_features) { - if (vec_parsed_tree_.size() == 0) return; - - const SParsedTree* pTree = vec_parsed_tree_[0]; - - vector<double> vec_prob; - vec_prob.reserve(vec_tree_prob_.size()); - double tmp = 0.0; - for (size_t i = 0; i < vec_tree_prob_.size(); i++) { - vec_prob.push_back(pow(10, vec_tree_prob_[i] - vec_tree_prob_[0])); - tmp += vec_prob[i]; - } - for (size_t i = 0; i < vec_prob.size(); i++) vec_prob[i] /= tmp; - - for (size_t i = 0; i < pTree->m_vecTerminals.size(); i++) - for (size_t j = i; j < pTree->m_vecTerminals.size(); j++) { - MapDouble* pMap = new MapDouble(); - GenerateSoftFeature(i, j, vec_parsed_tree_, vec_prob, pMap); - string key; - GenerateKey(i, j, key); - (*map_features)[key] = pMap; - } - } - - void FreeSentenceVariables() { - for (size_t i = 0; i < vec_parsed_tree_.size(); i++) { - if (vec_parsed_tree_[i] != NULL) delete vec_parsed_tree_[i]; - } - vec_parsed_tree_.clear(); - vec_tree_prob_.clear(); - if (index_map_ != NULL) delete[] index_map_; - index_map_ = NULL; - - if (map_features_ != NULL) { - for (MapDoubleFeatures::iterator iter = map_features_->begin(); - iter != map_features_->end(); iter++) - delete iter->second; - delete map_features_; - } - } - - void ReadParseTree(const std::string& parse_file, - vector<SParsedTree*>& vec_tree, vector<double>& vec_prob) { - ReadFile in(parse_file); - SParsedTree* tree; - string line; - while (getline(*in.stream(), line)) { - const char* p = strchr(line.c_str(), ' '); - assert(p != NULL); - string strProb = line.substr(0, line.find(' ')); - tree = SParsedTree::fnConvertFromString(p + 1); - tree->fnSetSpanInfo(); - tree->fnSetHeadWord(); - vec_tree.push_back(tree); - if (strProb == string("-Infinity")) { - vec_prob.push_back(-99.0); - break; - } else { - vec_prob.push_back(atof(strProb.c_str())); - } - } - } - - void ReadParseTree2(const std::string& parse_file, - vector<SParsedTree*>& vec_tree, - vector<double>& vec_prob) { - SParseReader* reader = new SParseReader(parse_file.c_str(), false); - double prob; - SParsedTree* tree; - while ((tree = reader->fnReadNextParseTreeWithProb(&prob)) != NULL) { - vec_tree.push_back(tree); - if (std::isinf(prob)) { - vec_prob.push_back(-99); - break; - } else - vec_prob.push_back(prob); - } - // assert(tree != NULL); - delete reader; - } - - private: - vector<SParsedTree*> vec_parsed_tree_; - vector<double> vec_tree_prob_; - - short int* index_map_; - - MapDoubleFeatures* map_features_; -}; - -SoftKBestSynFeature::SoftKBestSynFeature(std::string param) { - pimpl_ = new SoftKBestSynFeatureImpl(param); - name_ = "SoftKBestSynFeature"; -} - -SoftKBestSynFeature::~SoftKBestSynFeature() { delete pimpl_; } - -void SoftKBestSynFeature::PrepareForInput(const SentenceMetadata& smeta) { - string parse_file = smeta.GetSGMLValue("kbestparse"); - assert(parse_file != ""); - - string indexmap_file = smeta.GetSGMLValue("index-map"); - - pimpl_->InitializeInputSentence(parse_file, indexmap_file); -} - -void SoftKBestSynFeature::TraversalFeaturesImpl( - const SentenceMetadata& /* smeta */, const Hypergraph::Edge& edge, - const vector<const void*>& /*ant_states*/, SparseVector<double>* features, - SparseVector<double>* /*estimated_features*/, void* /*state*/) const { - pimpl_->SetSoftKBestSynFeature(edge, features); -} - -string SoftKBestSynFeature::usage(bool /*param*/, bool /*verbose*/) { - return "SoftKBestSynFeature"; -} - -boost::shared_ptr<FeatureFunction> CreateSoftKBestSynFeatureModel( - std::string param) { - SoftKBestSynFeature* ret = new SoftKBestSynFeature(param); - return boost::shared_ptr<FeatureFunction>(ret); -} - -boost::shared_ptr<FeatureFunction> SoftKBestSynFeatureFactory::Create( - std::string param) const { - return CreateSoftKBestSynFeatureModel(param); -} - -std::string SoftKBestSynFeatureFactory::usage(bool params, bool verbose) const { - return SoftKBestSynFeature::usage(params, verbose); -} |