summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--decoder/ff_soft_syn.cc329
-rw-r--r--decoder/ff_soft_syn.h52
2 files changed, 0 insertions, 381 deletions
diff --git a/decoder/ff_soft_syn.cc b/decoder/ff_soft_syn.cc
index c108e58c..970e532b 100644
--- a/decoder/ff_soft_syn.cc
+++ b/decoder/ff_soft_syn.cc
@@ -324,332 +324,3 @@ boost::shared_ptr<FeatureFunction> SoftSynFeatureFactory::Create(
std::string SoftSynFeatureFactory::usage(bool params, bool verbose) const {
return SoftSynFeature::usage(params, verbose);
}
-
-typedef HASH_MAP<std::string, double> MapDouble;
-typedef HASH_MAP<std::string, MapDouble*> MapDoubleFeatures;
-
-/*
- * Note:
- * In BOLT experiments, we need to merged some sequence words into one term
- *(like from "1999 nian 1 yue 10 ri" to "1999_nian_1_yue_10_ri") due to some
- *reasons;
- * but in the parse file, we still use the parse tree before merging any
- *words;
- * therefore, the words in source sentence and parse tree diverse and we
- *need to map a word in merged sentence into its original index;
- * a word in source sentence maps 1 or more words in parse tree
- * the index map info is stored at variable index_map_;
- * if the index_map_ is NULL, indicating the word index in source sentence
- *and parse tree is always same.
- *
- */
-
-struct SoftKBestSynFeatureImpl {
- SoftKBestSynFeatureImpl(const string& /*params*/) {
- index_map_ = NULL;
-
- map_features_ = NULL;
- }
-
- ~SoftKBestSynFeatureImpl() { FreeSentenceVariables(); }
-
- void InitializeInputSentence(const std::string& parse_file,
- const std::string& index_map_file) {
- FreeSentenceVariables();
- ReadParseTree(parse_file, vec_parsed_tree_, vec_tree_prob_);
-
- if (index_map_file != "") ReadIndexMap(index_map_file);
-
- // we can do the features "off-line"
- map_features_ = new MapDoubleFeatures();
- InitializeFeatures(map_features_);
- }
-
- void SetSoftKBestSynFeature(const Hypergraph::Edge& edge,
- SparseVector<double>* features) {
- if (vec_parsed_tree_.size() == 0) return;
-
- short int mapped_begin, mapped_end;
- MapIndex(edge.i_, edge.j_ - 1, mapped_begin, mapped_end);
-
- // soft feature for the whole span
- const MapDouble* pMapFeature =
- GenerateSoftFeature(mapped_begin, mapped_end, map_features_);
- for (MapDouble::const_iterator iter = pMapFeature->begin();
- iter != pMapFeature->end(); iter++) {
- int f_id = FD::Convert(iter->first);
- if (f_id) features->set_value(f_id, iter->second);
- }
- }
-
- private:
- void ReadIndexMap(const std::string& index_map_file) {
- vector<string> terms;
- {
- ReadFile file(index_map_file);
- string line;
- assert(getline(*file.stream(), line));
- SplitOnWhitespace(line, &terms);
- }
-
- index_map_ = new short int[terms.size() + 1];
- int ix = 0;
- size_t i;
- for (i = 0; i < terms.size(); i++) {
- index_map_[i] = ix;
- ix += atoi(terms[i].c_str());
- }
- index_map_[i] = ix;
- assert(vec_parsed_tree_.size() == 0 ||
- ix == vec_parsed_tree_[0]->m_vecTerminals.size());
- }
-
- void MapIndex(short int begin, short int end, short int& mapped_begin,
- short int& mapped_end) {
- if (index_map_ == NULL) {
- mapped_begin = begin;
- mapped_end = end;
- return;
- }
-
- mapped_begin = index_map_[begin];
- mapped_end = index_map_[end + 1] - 1;
- }
-
- /*
- * ff_const_reorder.cc::ConstReorderFeatureImpl also defines this function
- */
- void FindConsts(const SParsedTree* tree, int begin, int end,
- vector<STreeItem*>& consts) {
- STreeItem* item;
- item = tree->m_vecTerminals[begin]->m_ptParent;
- while (true) {
- while (item->m_ptParent != NULL &&
- item->m_ptParent->m_iBegin == item->m_iBegin &&
- item->m_ptParent->m_iEnd <= end)
- item = item->m_ptParent;
-
- if (item->m_ptParent == NULL && item->m_vecChildren.size() == 1 &&
- strcmp(item->m_pszTerm, "ROOT") == 0)
- item = item->m_vecChildren[0]; // we automatically add a "ROOT" node at
- // the top, skip it if necessary.
-
- consts.push_back(item);
- if (item->m_iEnd < end)
- item = tree->m_vecTerminals[item->m_iEnd + 1]->m_ptParent;
- else
- break;
- }
- }
-
- /*
- * according to Marton & Resnik (2008)
- * a span cann't have both X+ style and X= style features
- * a constituent XP is crossed only if the span not only covers parts of XP's
- *content, but also covers one or more words outside XP
- * a span may have X+, Y+
- *
- * (note, we refer X* features to X= features in Marton & Resnik (2008))
- */
- void GenerateSoftFeature(int begin, int end,
- const vector<SParsedTree*>& vec_tree,
- const vector<double>& vec_prob,
- MapDouble* pMapFeature) {
-
- for (size_t i = 0; i < vec_tree.size(); i++) {
- const SParsedTree* tree = vec_tree[i];
- vector<STreeItem*> vecNode;
- FindConsts(tree, begin, end, vecNode);
-
- if (vecNode.size() == 1) {
- // match to one constituent
- string feature_name = string(vecNode[0]->m_pszTerm) + string("*");
- MapDouble::iterator iter = pMapFeature->find(feature_name);
- if (iter != pMapFeature->end()) {
- iter->second += vec_prob[i];
- } else
- (*pMapFeature)[feature_name] = vec_prob[i];
- } else {
- // match to multiple constituents, find the lowest common parent (lcp)
- STreeItem* lcp = vecNode[0];
- while (lcp->m_iEnd < end) lcp = lcp->m_ptParent;
-
- for (size_t j = 0; j < vecNode.size(); j++) {
- STreeItem* item = vecNode[j];
-
- while (item != lcp) {
- if (item->m_iBegin < begin || item->m_iEnd > end) {
- // item is crossed
- string feature_name = string(item->m_pszTerm) + string("+");
- MapDouble::iterator iter = pMapFeature->find(feature_name);
- if (iter != pMapFeature->end()) {
- iter->second += vec_prob[i];
- } else
- (*pMapFeature)[feature_name] = vec_prob[i];
- }
- if (item->m_iBrotherIndex > 0 &&
- item->m_ptParent->m_vecChildren[item->m_iBrotherIndex - 1]
- ->m_iBegin >= begin &&
- item->m_ptParent->m_vecChildren[item->m_iBrotherIndex - 1]
- ->m_iEnd <= end)
- break; // we don't want to collect crossed constituents twice
- item = item->m_ptParent;
- }
- }
- }
- }
- }
-
- const MapDouble* GenerateSoftFeature(int begin, int end,
- MapDoubleFeatures* map_features) {
- string key;
- GenerateKey(begin, end, key);
- MapDoubleFeatures::const_iterator iter = (*map_features).find(key);
- assert(iter != map_features->end());
- return iter->second;
- }
-
- void Byte_to_Char(unsigned char* str, int n) {
- str[0] = (n & 255);
- str[1] = n / 256;
- }
-
- void GenerateKey(int begin, int end, string& key) {
- unsigned char szTerm[1001];
- Byte_to_Char(szTerm, begin);
- Byte_to_Char(szTerm + 2, end);
- szTerm[4] = '\0';
- key = string(szTerm, szTerm + 4);
- }
-
- void InitializeFeatures(MapDoubleFeatures* map_features) {
- if (vec_parsed_tree_.size() == 0) return;
-
- const SParsedTree* pTree = vec_parsed_tree_[0];
-
- vector<double> vec_prob;
- vec_prob.reserve(vec_tree_prob_.size());
- double tmp = 0.0;
- for (size_t i = 0; i < vec_tree_prob_.size(); i++) {
- vec_prob.push_back(pow(10, vec_tree_prob_[i] - vec_tree_prob_[0]));
- tmp += vec_prob[i];
- }
- for (size_t i = 0; i < vec_prob.size(); i++) vec_prob[i] /= tmp;
-
- for (size_t i = 0; i < pTree->m_vecTerminals.size(); i++)
- for (size_t j = i; j < pTree->m_vecTerminals.size(); j++) {
- MapDouble* pMap = new MapDouble();
- GenerateSoftFeature(i, j, vec_parsed_tree_, vec_prob, pMap);
- string key;
- GenerateKey(i, j, key);
- (*map_features)[key] = pMap;
- }
- }
-
- void FreeSentenceVariables() {
- for (size_t i = 0; i < vec_parsed_tree_.size(); i++) {
- if (vec_parsed_tree_[i] != NULL) delete vec_parsed_tree_[i];
- }
- vec_parsed_tree_.clear();
- vec_tree_prob_.clear();
- if (index_map_ != NULL) delete[] index_map_;
- index_map_ = NULL;
-
- if (map_features_ != NULL) {
- for (MapDoubleFeatures::iterator iter = map_features_->begin();
- iter != map_features_->end(); iter++)
- delete iter->second;
- delete map_features_;
- }
- }
-
- void ReadParseTree(const std::string& parse_file,
- vector<SParsedTree*>& vec_tree, vector<double>& vec_prob) {
- ReadFile in(parse_file);
- SParsedTree* tree;
- string line;
- while (getline(*in.stream(), line)) {
- const char* p = strchr(line.c_str(), ' ');
- assert(p != NULL);
- string strProb = line.substr(0, line.find(' '));
- tree = SParsedTree::fnConvertFromString(p + 1);
- tree->fnSetSpanInfo();
- tree->fnSetHeadWord();
- vec_tree.push_back(tree);
- if (strProb == string("-Infinity")) {
- vec_prob.push_back(-99.0);
- break;
- } else {
- vec_prob.push_back(atof(strProb.c_str()));
- }
- }
- }
-
- void ReadParseTree2(const std::string& parse_file,
- vector<SParsedTree*>& vec_tree,
- vector<double>& vec_prob) {
- SParseReader* reader = new SParseReader(parse_file.c_str(), false);
- double prob;
- SParsedTree* tree;
- while ((tree = reader->fnReadNextParseTreeWithProb(&prob)) != NULL) {
- vec_tree.push_back(tree);
- if (std::isinf(prob)) {
- vec_prob.push_back(-99);
- break;
- } else
- vec_prob.push_back(prob);
- }
- // assert(tree != NULL);
- delete reader;
- }
-
- private:
- vector<SParsedTree*> vec_parsed_tree_;
- vector<double> vec_tree_prob_;
-
- short int* index_map_;
-
- MapDoubleFeatures* map_features_;
-};
-
-SoftKBestSynFeature::SoftKBestSynFeature(std::string param) {
- pimpl_ = new SoftKBestSynFeatureImpl(param);
- name_ = "SoftKBestSynFeature";
-}
-
-SoftKBestSynFeature::~SoftKBestSynFeature() { delete pimpl_; }
-
-void SoftKBestSynFeature::PrepareForInput(const SentenceMetadata& smeta) {
- string parse_file = smeta.GetSGMLValue("kbestparse");
- assert(parse_file != "");
-
- string indexmap_file = smeta.GetSGMLValue("index-map");
-
- pimpl_->InitializeInputSentence(parse_file, indexmap_file);
-}
-
-void SoftKBestSynFeature::TraversalFeaturesImpl(
- const SentenceMetadata& /* smeta */, const Hypergraph::Edge& edge,
- const vector<const void*>& /*ant_states*/, SparseVector<double>* features,
- SparseVector<double>* /*estimated_features*/, void* /*state*/) const {
- pimpl_->SetSoftKBestSynFeature(edge, features);
-}
-
-string SoftKBestSynFeature::usage(bool /*param*/, bool /*verbose*/) {
- return "SoftKBestSynFeature";
-}
-
-boost::shared_ptr<FeatureFunction> CreateSoftKBestSynFeatureModel(
- std::string param) {
- SoftKBestSynFeature* ret = new SoftKBestSynFeature(param);
- return boost::shared_ptr<FeatureFunction>(ret);
-}
-
-boost::shared_ptr<FeatureFunction> SoftKBestSynFeatureFactory::Create(
- std::string param) const {
- return CreateSoftKBestSynFeatureModel(param);
-}
-
-std::string SoftKBestSynFeatureFactory::usage(bool params, bool verbose) const {
- return SoftKBestSynFeature::usage(params, verbose);
-}
diff --git a/decoder/ff_soft_syn.h b/decoder/ff_soft_syn.h
index 21618bc0..df9a6cc8 100644
--- a/decoder/ff_soft_syn.h
+++ b/decoder/ff_soft_syn.h
@@ -35,56 +35,4 @@ struct SoftSynFeatureFactory : public FactoryBase<FeatureFunction> {
std::string usage(bool params, bool verbose) const;
};
-struct SoftKBestSynFeatureImpl;
-
-class SoftKBestSynFeature : public FeatureFunction {
- public:
- SoftKBestSynFeature(std::string param);
- ~SoftKBestSynFeature();
- static std::string usage(bool param, bool verbose);
-
- protected:
- virtual void PrepareForInput(const SentenceMetadata& smeta);
-
- virtual void TraversalFeaturesImpl(
- const SentenceMetadata& smeta, const HG::Edge& edge,
- const std::vector<const void*>& ant_contexts,
- SparseVector<double>* features, SparseVector<double>* estimated_features,
- void* out_context) const;
-
- private:
- SoftKBestSynFeatureImpl* pimpl_;
-};
-
-struct SoftKBestSynFeatureFactory : public FactoryBase<FeatureFunction> {
- FP Create(std::string param) const;
- std::string usage(bool params, bool verbose) const;
-};
-
-struct SoftForestSynFeatureImpl;
-
-class SoftForestSynFeature : public FeatureFunction {
- public:
- SoftForestSynFeature(std::string param);
- ~SoftForestSynFeature();
- static std::string usage(bool param, bool verbose);
-
- protected:
- virtual void PrepareForInput(const SentenceMetadata& smeta);
-
- virtual void TraversalFeaturesImpl(
- const SentenceMetadata& smeta, const HG::Edge& edge,
- const std::vector<const void*>& ant_contexts,
- SparseVector<double>* features, SparseVector<double>* estimated_features,
- void* out_context) const;
-
- private:
- SoftForestSynFeatureImpl* pimpl_;
-};
-
-struct SoftForestSynFeatureFactory : public FactoryBase<FeatureFunction> {
- FP Create(std::string param) const;
- std::string usage(bool params, bool verbose) const;
-};
-
#endif /* FF_SOFT_SYN_H_ */