summaryrefslogtreecommitdiff
path: root/decoder/cfg.cc
diff options
context:
space:
mode:
authorgraehl@gmail.com <graehl@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-15 07:39:01 +0000
committergraehl@gmail.com <graehl@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-08-15 07:39:01 +0000
commit80c952989dfbc05b482a7a8265d0ca73079ee894 (patch)
treee1306f8c49006d34ab6418d8678c051e04358ef9 /decoder/cfg.cc
parent214c741eee9b01ccd05a1825ad9ed647adef41db (diff)
really fixed binarization. test
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@555 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/cfg.cc')
-rwxr-xr-xdecoder/cfg.cc70
1 files changed, 45 insertions, 25 deletions
diff --git a/decoder/cfg.cc b/decoder/cfg.cc
index c0598f16..be07c2c5 100755
--- a/decoder/cfg.cc
+++ b/decoder/cfg.cc
@@ -8,6 +8,12 @@
#include "fast_lexical_cast.hpp"
//#include "indices_after.h"
+#define CFGPRINT(x) IF_CFG_DEBUG(std::cerr<<x)
+#define CFGSHOWC(x,s) CFGPRINT(#x<<"="<<x<<s)
+#define CFGSHOW(x) CFGSHOWC(x,"\n")
+#define CFGSHOWS(x) CFGSHOWC(x," ")
+#define CFGSHOW2(x,y) CFGSHOWS(x) CFGSHOW(y)
+
using namespace std;
typedef CFG::Rule Rule;
@@ -108,7 +114,7 @@ struct prob_pos {
};
}//ns
-void CFG::UniqRules(NTHandle ni) {
+int CFG::UniqRules(NTHandle ni) {
typedef HASH_MAP<RHS,prob_pos,boost::hash<RHS> > BestRHS; // faster to use trie? maybe.
BestRHS bestp; // once inserted, the position part (output index) never changes. but the prob may be improved (overwrite ruleid at that position).
HASH_MAP_EMPTY(bestp,null_rhs);
@@ -129,6 +135,7 @@ void CFG::UniqRules(NTHandle ni) {
}
// post: oi = number of new adj
adj.resize(oi);
+ return oi;
}
void CFG::SortLocalBestFirst(NTHandle ni) {
@@ -145,10 +152,12 @@ namespace {
CFG::BinRhs null_bin_rhs(std::numeric_limits<int>::min(),std::numeric_limits<int>::min());
// index i >= N.size()? then it's in M[i-N.size()]
-WordID BinName(CFG::BinRhs const& b,CFG::NTs const& N,CFG::NTs const& M)
+//WordID first,WordID second,
+string BinStr(CFG::BinRhs const& b,CFG::NTs const& N,CFG::NTs const& M)
{
int nn=N.size();
ostringstream o;
+#undef BinNameOWORD
#define BinNameOWORD(w) \
do { \
int n=w; if (n>0) o << TD::Convert(n); \
@@ -161,8 +170,12 @@ WordID BinName(CFG::BinRhs const& b,CFG::NTs const& N,CFG::NTs const& M)
BinNameOWORD(b.first);
o<<'+';
BinNameOWORD(b.second);
-#undef BinNameOWORD
- return TD::Convert(o.str());
+ return o.str();
+}
+
+WordID BinName(CFG::BinRhs const& b,CFG::NTs const& N,CFG::NTs const& M)
+{
+ return TD::Convert(BinStr(b,N,M));
}
}//ns
@@ -177,33 +190,44 @@ void CFG::Binarize(CFGBinarize const& b) {
cerr << "Binarizing "<<b<<endl;
HASH_MAP<BinRhs,NTHandle,boost::hash<BinRhs> > bin2lhs; // we're going to hash cons rather than build an explicit trie from right to left.
HASH_MAP_EMPTY(bin2lhs,null_bin_rhs);
- int rhsmin=b.bin_unary?0:1;
// iterate using indices and not iterators because we'll be adding to both nts and rules list? we could instead pessimistically reserve space for both, but this is simpler. also: store original end of nts since we won't need to reprocess newly added ones.
+ int rhsmin=b.bin_unary?0:1;
NTs new_nts; // these will be appended at the end, so we don't have to worry about iterator invalidation
Rules new_rules;
//TODO: this could be factored easily into in-place (append to new_* like below) and functional (nondestructive copy) versions (copy orig to target and append to target)
- int newnt=-nts.size();
+ int newnt=-nts.size(); // we're going to store binary rhs with -nt to keep distinct from words (>=0)
int newruleid=rules.size();
BinRhs bin;
for (NTs::const_iterator n=nts.begin(),nn=nts.end();n!=nn;++n) {
NT const& nt=*n;
for (Ruleids::const_iterator ir=nt.ruleids.begin(),er=nt.ruleids.end();ir!=er;++ir) {
+ CFGPRINT("Rule id# ") CFGSHOWS(*ir);IF_CFG_DEBUG(PrintRule(cerr<<" '",*ir,CFGFormat());cerr<<"'\n");
RHS &rhs=rules[*ir].rhs; // we're going to binarize this while adding newly created rules to new_...
if (rhs.empty()) continue;
- bin.second=rhs.back();
- for (int r=rhs.size()-2;r>=rhsmin;--r) { // pairs from right to left (normally we leave the last pair alone)
- bin.first=rhs[r];
- if (newnt==(bin.second=(get_default(bin2lhs,bin,newnt)))) {
- new_nts.push_back(NT(newruleid));
- new_rules.push_back(Rule(-newnt,bin));
- ++newruleid;
- if (b.bin_name_nts)
- new_nts.back().from.nt=BinName(bin,nts,new_nts);
- --newnt;
+ int r=rhs.size()-2; // loop below: [r,r+1) is to be reduced into a (maybe new) binary NT
+ if (rhsmin<=r) { // means r>=0 also
+ bin.second=rhs[r+1];
+ int bin_to; // the replacement for bin
+ assert(newruleid==rules.size()+new_rules.size());assert(-newnt==nts.size()+new_nts.size());
+ // also true at start/end of loop:
+ for (;;) { // pairs from right to left (normally we leave the last pair alone)
+
+ bin.first=rhs[r];
+ bin_to=get_default(bin2lhs,bin,newnt);
+ CFGSHOWS(r) CFGSHOWS(newnt) CFGPRINT("bin="<<BinStr(bin,nts,new_nts)<<"=>") CFGSHOW(bin_to);
+ if (newnt==bin_to) { // it's new!
+ new_nts.push_back(NT(newruleid++));
+ //now -newnt is the index of the last (after new_nts is appended) nt. bin is its rhs. bin_to is its lhs
+ new_rules.push_back(Rule(-newnt,bin));
+ --newnt;
+ if (b.bin_name_nts)
+ new_nts.back().from.nt=BinName(bin,nts,new_nts);
+ }
+ bin.second=bin_to;
+ --r;
+ if (r<rhsmin) break;
}
- }
- if (rhsmin<rhs.size()) {
- rhs[rhsmin]=bin.second;
+ rhs[rhsmin]=bin_to;
rhs.resize(rhsmin+1);
}
}
@@ -246,9 +270,7 @@ void CFG::Init(Hypergraph const& hg,bool target_side,bool copy_features,bool pus
prob_t &crp=cfgr.p;
crp=e.edge_prob_;
cfgr.lhs=e.head_node_;
-#if CFG_DEBUG
- cfgr.rule=e.rule_;
-#endif
+ IF_CFG_TRULE(cfgr.rule=e.rule_;)
if (copy_features) cfgr.f=e.feature_values_;
if (push_weights) crp /=np[e.head_node_];
TRule const& er=*e.rule_;
@@ -287,9 +309,7 @@ void CFG::PrintRule(std::ostream &o,RuleHandle rulei,CFGFormat const& f) const {
f.print_lhs(o,*this,r.lhs);
f.print_rhs(o,*this,r.rhs.begin(),r.rhs.end());
f.print_features(o,r.p,r.f);
-#if CFG_DEBUG
- if (r.rule) o<<f.partsep<<*r.rule;
-#endif
+ IF_CFG_TRULE(if (r.rule) o<<f.partsep<<*r.rule;)
}
void CFG::Print(std::ostream &o,CFGFormat const& f) const {