diff options
author | philblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-29 19:58:03 +0000 |
---|---|---|
committer | philblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-29 19:58:03 +0000 |
commit | 793f4dd670fafc2ba66dbc00487830eb892264b3 (patch) | |
tree | d3cea8dcd400d531b3d6dec6c1257ec541555ebc | |
parent | 081e85ef7b33c0dd6ec7d40f9ce2fcab72519072 (diff) |
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@68 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r-- | decoder/csplit.cc | 134 | ||||
-rwxr-xr-x | gi/pyp-topics/scripts/spans2labels.py | 2 |
2 files changed, 68 insertions, 68 deletions
diff --git a/decoder/csplit.cc b/decoder/csplit.cc index b1a30fb0..7d50e3af 100644 --- a/decoder/csplit.cc +++ b/decoder/csplit.cc @@ -40,73 +40,73 @@ struct CompoundSplitImpl { void BuildTrellis(const vector<string>& chars, Hypergraph* forest) { - vector<int> nodes(chars.size()+1, -1); - nodes[0] = forest->AddNode(kXCAT)->id_; // source - const int left_rule = forest->AddEdge(kWORDBREAK_RULE, Hypergraph::TailNodeVector())->id_; - forest->ConnectEdgeToHeadNode(left_rule, nodes[0]); - - const int max_split_ = max(static_cast<int>(chars.size()) - min_size_ + 1, 1); - cerr << "max: " << max_split_ << " " << " min: " << min_size_ << endl; - for (int i = min_size_; i < max_split_; ++i) - nodes[i] = forest->AddNode(kXCAT)->id_; - assert(nodes.back() == -1); - nodes.back() = forest->AddNode(kXCAT)->id_; // sink - - for (int i = 0; i < max_split_; ++i) { - if (nodes[i] < 0) continue; - const int start = min(i + min_size_, static_cast<int>(chars.size())); - for (int j = start; j <= chars.size(); ++j) { - if (nodes[j] < 0) continue; - string yield; - PasteTogetherStrings(chars, i, j, &yield); - // cerr << "[" << i << "," << j << "] " << yield << endl; - TRulePtr rule = TRulePtr(new TRule(*kTEMPLATE_RULE)); - rule->e_[1] = rule->f_[1] = TD::Convert(yield); - // cerr << rule->AsString() << endl; - int edge = forest->AddEdge( - rule, - Hypergraph::TailNodeVector(1, nodes[i]))->id_; - forest->ConnectEdgeToHeadNode(edge, nodes[j]); - forest->edges_[edge].i_ = i; - forest->edges_[edge].j_ = j; - - // handle "fugenelemente" here - // don't delete "fugenelemente" at the end of words - if (fugen_elements_ && j != chars.size()) { - const int len = yield.size(); - string alt; - int fid = 0; - if (len > (min_size_ + 2) && yield[len-1] == 's' && yield[len-2] == 'e') { - alt = yield.substr(0, len - 2); - fid = kFUGEN_S; - } else if (len > (min_size_ + 1) && yield[len-1] == 's') { - alt = yield.substr(0, len - 1); - fid = kFUGEN_S; - } else if (len > (min_size_ + 2) && yield[len-2] == 'e' && yield[len-1] == 'n') { - alt = yield.substr(0, len - 1); - fid = kFUGEN_N; - } - if (alt.size()) { - TRulePtr altrule = TRulePtr(new TRule(*rule)); - altrule->e_[1] = TD::Convert(alt); - // cerr << altrule->AsString() << endl; - int edge = forest->AddEdge( - altrule, - Hypergraph::TailNodeVector(1, nodes[i]))->id_; - forest->ConnectEdgeToHeadNode(edge, nodes[j]); - forest->edges_[edge].feature_values_.set_value(fid, 1.0); - forest->edges_[edge].i_ = i; - forest->edges_[edge].j_ = j; - } - } - } - } - - // add goal rule - Hypergraph::TailNodeVector tail(1, forest->nodes_.size() - 1); - Hypergraph::Node* goal = forest->AddNode(TD::Convert("Goal")*-1); - Hypergraph::Edge* hg_edge = forest->AddEdge(kGOAL_RULE, tail); - forest->ConnectEdgeToHeadNode(hg_edge, goal); +// vector<int> nodes(chars.size()+1, -1); +// nodes[0] = forest->AddNode(kXCAT)->id_; // source +// const int left_rule = forest->AddEdge(kWORDBREAK_RULE, Hypergraph::TailNodeVector())->id_; +// forest->ConnectEdgeToHeadNode(left_rule, nodes[0]); +// +// const int max_split_ = max(static_cast<int>(chars.size()) - min_size_ + 1, 1); +// cerr << "max: " << max_split_ << " " << " min: " << min_size_ << endl; +// for (int i = min_size_; i < max_split_; ++i) +// nodes[i] = forest->AddNode(kXCAT)->id_; +// assert(nodes.back() == -1); +// nodes.back() = forest->AddNode(kXCAT)->id_; // sink +// +// for (int i = 0; i < max_split_; ++i) { +// if (nodes[i] < 0) continue; +// const int start = min(i + min_size_, static_cast<int>(chars.size())); +// for (int j = start; j <= chars.size(); ++j) { +// if (nodes[j] < 0) continue; +// string yield; +// PasteTogetherStrings(chars, i, j, &yield); +// // cerr << "[" << i << "," << j << "] " << yield << endl; +// TRulePtr rule = TRulePtr(new TRule(*kTEMPLATE_RULE)); +// rule->e_[1] = rule->f_[1] = TD::Convert(yield); +// // cerr << rule->AsString() << endl; +// int edge = forest->AddEdge( +// rule, +// Hypergraph::TailNodeVector(1, nodes[i]))->id_; +// forest->ConnectEdgeToHeadNode(edge, nodes[j]); +// forest->edges_[edge].i_ = i; +// forest->edges_[edge].j_ = j; +// +// // handle "fugenelemente" here +// // don't delete "fugenelemente" at the end of words +// if (fugen_elements_ && j != chars.size()) { +// const int len = yield.size(); +// string alt; +// int fid = 0; +// if (len > (min_size_ + 2) && yield[len-1] == 's' && yield[len-2] == 'e') { +// alt = yield.substr(0, len - 2); +// fid = kFUGEN_S; +// } else if (len > (min_size_ + 1) && yield[len-1] == 's') { +// alt = yield.substr(0, len - 1); +// fid = kFUGEN_S; +// } else if (len > (min_size_ + 2) && yield[len-2] == 'e' && yield[len-1] == 'n') { +// alt = yield.substr(0, len - 1); +// fid = kFUGEN_N; +// } +// if (alt.size()) { +// TRulePtr altrule = TRulePtr(new TRule(*rule)); +// altrule->e_[1] = TD::Convert(alt); +// // cerr << altrule->AsString() << endl; +// int edge = forest->AddEdge( +// altrule, +// Hypergraph::TailNodeVector(1, nodes[i]))->id_; +// forest->ConnectEdgeToHeadNode(edge, nodes[j]); +// forest->edges_[edge].feature_values_.set_value(fid, 1.0); +// forest->edges_[edge].i_ = i; +// forest->edges_[edge].j_ = j; +// } +// } +// } +// } +// +// // add goal rule +// Hypergraph::TailNodeVector tail(1, forest->nodes_.size() - 1); +// Hypergraph::Node* goal = forest->AddNode(TD::Convert("Goal")*-1); +// Hypergraph::Edge* hg_edge = forest->AddEdge(kGOAL_RULE, tail); +// forest->ConnectEdgeToHeadNode(hg_edge, goal); } private: const bool fugen_elements_; diff --git a/gi/pyp-topics/scripts/spans2labels.py b/gi/pyp-topics/scripts/spans2labels.py index 7cbc521c..402bcd63 100755 --- a/gi/pyp-topics/scripts/spans2labels.py +++ b/gi/pyp-topics/scripts/spans2labels.py @@ -40,6 +40,6 @@ for line in sys.stdin: right_context = reduce(lambda x, y: x+y+" ", target[t2:t2+order], "").strip() context = "%s<PHRASE> %s" % (left_context, right_context) - label = phrase_context_index[(phrase,context)] + label = phrase_context_index.get((phrase,context, "<UNK>") print "%s-%s:X%s" % (t1-order,t2-order,label), print |