From c142f3bde0fa673ddb3f6fc7ed3d08e71f8ff8eb Mon Sep 17 00:00:00 2001 From: "graehl@gmail.com" Date: Sun, 15 Aug 2010 05:05:19 +0000 Subject: fixed binarization. test git-svn-id: https://ws10smt.googlecode.com/svn/trunk@554 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/cfg.cc | 30 ++++++++++++++++++++++++------ decoder/cfg.h | 6 +++++- decoder/cfg_format.h | 1 + decoder/cfg_test.cc | 35 ++++++++++++++++++++++++++++------- decoder/hg_test.h | 2 +- 5 files changed, 59 insertions(+), 15 deletions(-) (limited to 'decoder') diff --git a/decoder/cfg.cc b/decoder/cfg.cc index c2d96b33..c0598f16 100755 --- a/decoder/cfg.cc +++ b/decoder/cfg.cc @@ -182,7 +182,7 @@ void CFG::Binarize(CFGBinarize const& b) { NTs new_nts; // these will be appended at the end, so we don't have to worry about iterator invalidation Rules new_rules; //TODO: this could be factored easily into in-place (append to new_* like below) and functional (nondestructive copy) versions (copy orig to target and append to target) - int newnt=nts.size(); + int newnt=-nts.size(); int newruleid=rules.size(); BinRhs bin; for (NTs::const_iterator n=nts.begin(),nn=nts.end();n!=nn;++n) { @@ -192,21 +192,29 @@ void CFG::Binarize(CFGBinarize const& b) { if (rhs.empty()) continue; bin.second=rhs.back(); for (int r=rhs.size()-2;r>=rhsmin;--r) { // pairs from right to left (normally we leave the last pair alone) - rhs.pop_back(); bin.first=rhs[r]; if (newnt==(bin.second=(get_default(bin2lhs,bin,newnt)))) { - new_nts.push_back(NT()); - new_nts.back().ruleids.push_back(newruleid); - new_rules.push_back(Rule(newnt,bin)); + new_nts.push_back(NT(newruleid)); + new_rules.push_back(Rule(-newnt,bin)); + ++newruleid; if (b.bin_name_nts) new_nts.back().from.nt=BinName(bin,nts,new_nts); - ++newnt;++newruleid; + --newnt; } } + if (rhsmin BinRhs; + typedef std::pair BinRhs; struct Rule { std::size_t hash_impl() const { @@ -144,6 +144,8 @@ struct CFG { }; struct NT { + NT() { } + explicit NT(RuleHandle r) : ruleids(1,r) { } std::size_t hash_impl() const { using namespace boost; return hash_value(ruleids); } bool operator ==(NT const &o) const { return ruleids==o.ruleids; // don't care about from @@ -181,6 +183,7 @@ struct CFG { void Init(Hypergraph const& hg,bool target_side=true,bool copy_features=false,bool push_weights=true); void Print(std::ostream &o,CFGFormat const& format) const; // see cfg_format.h void PrintRule(std::ostream &o,RuleHandle rulei,CFGFormat const& format) const; + void Print(std::ostream &o) const; // default format void Swap(CFG &o) { // make sure this includes all fields (easier to see here than in .cc) using namespace std; swap(uninit,o.uninit); @@ -302,5 +305,6 @@ inline void swap(CFG &a,CFG &b) { a.Swap(b); } +std::ostream &operator<<(std::ostream &o,CFG const &x); #endif diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h index d56d42f2..a9b3fd9f 100755 --- a/decoder/cfg_format.h +++ b/decoder/cfg_format.h @@ -111,6 +111,7 @@ struct CFGFormat { } } + //TODO: default to no nt names (nt_span=0) void set_defaults() { identity_scfg=false; features=true; diff --git a/decoder/cfg_test.cc b/decoder/cfg_test.cc index c4c37a2c..81efa768 100755 --- a/decoder/cfg_test.cc +++ b/decoder/cfg_test.cc @@ -3,18 +3,27 @@ #include "hg_test.h" #include "cfg_options.h" +#define CSHOW_V 1 +#if CSHOW_V +# define CSHOWDO(x) x +#else +# define CSHOWDO(x) +#endif +#define CSHOW(x) CSHOWDO(cerr<<#x<<'='<>v); + EXPECT_TRUE(ws>>featw); + CSHOW(featw) HGSetup::JsonTestFile(&hg,file); -// hg.Reweight(v); - cfg.Init(hg,true,false,false); + hg.Reweight(featw); + cfg.Init(hg,true,true,false); } static void SetUpTestCase() { @@ -27,10 +36,22 @@ TEST_F(CFGTest,Binarize) { Hypergraph hg; CFG cfg; JsonFN(hg,cfg,perro_json,perro_wts); + CSHOW("\nCFG Test.\n"); + CFGBinarize b; CFGFormat form; - form.features=true; - cerr<<"\nCFG Test.\n\n"; - cfg.Print(cerr,form); + form.nt_span=true; + for (int i=-1;i<16;++i) { + b.bin_l2r=i>=0; + b.bin_unary=i&1; + b.bin_name_nts=i&2; + b.bin_uniq=i&4; + b.bin_topo=i&8; + CFG cc=cfg; + EXPECT_EQ(cc,cfg); + CSHOW("\nBinarizing: "<