diff options
-rw-r--r-- | decoder/cdec.cc | 2 | ||||
-rwxr-xr-x | decoder/cfg.cc | 12 | ||||
-rwxr-xr-x | decoder/cfg.h | 3 | ||||
-rwxr-xr-x | decoder/cfg_binarize.h | 4 | ||||
-rw-r--r-- | utils/sparse_vector.h | 11 |
5 files changed, 22 insertions, 10 deletions
diff --git a/decoder/cdec.cc b/decoder/cdec.cc index 0a02801e..b156c268 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -379,7 +379,7 @@ void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,s } forest.PruneInsideOutside(beam_prune,density_prune,pm,false,1,conf["promise_power"].as<double>()); if (!forestname.empty()) forestname=" "+forestname; - forest_stats(forest," Pruned "+forestname+" forest",false,false,false); + forest_stats(forest," Pruned "+forestname+" forest",false,false,0,false); cerr << " Pruned "<<forestname<<" forest portion of edges kept: "<<forest.edges_.size()/presize<<endl; } } diff --git a/decoder/cfg.cc b/decoder/cfg.cc index f51da9bf..c02f46ec 100755 --- a/decoder/cfg.cc +++ b/decoder/cfg.cc @@ -10,7 +10,7 @@ #include "show.h" #define DUNIQ(x) x -#define DBIN(x) x +#define DBIN(x) #define DSP(x) x //SP:binarize by splitting. #define DCFG(x) IF_CFG_DEBUG(x) @@ -464,6 +464,8 @@ void CFG::BinarizeSplit(CFGBinarize const& b) { void CFG::Binarize(CFGBinarize const& b) { if (!b.Binarizing()) return; cerr << "Binarizing "<<b<<endl; + if (b.bin_thresh>0) + BinarizeThresh(b); if (b.bin_split) BinarizeSplit(b); if (b.bin_l2r) @@ -473,6 +475,14 @@ void CFG::Binarize(CFGBinarize const& b) { } +namespace { +} + +void CFG::BinarizeThresh(CFGBinarize const& b) { + throw runtime_error("TODO: some fancy linked list thing - see NOTES.partial.binarize"); +} + + void CFG::BinarizeL2R(bool bin_unary,bool name) { add_virtual_rules<BinRhs> v(*this,name); cerr << "Binarizing left->right " << (bin_unary?"real to unary":"stop at binary") <<endl; diff --git a/decoder/cfg.h b/decoder/cfg.h index 5a418234..9be0926d 100755 --- a/decoder/cfg.h +++ b/decoder/cfg.h @@ -306,7 +306,8 @@ struct CFG { void BinarizeL2R(bool bin_unary=false,bool name_nts=false); void Binarize(CFGBinarize const& binarize_options); // see cfg_binarize.h for docs - void BinarizeSplit(CFGBinarize const& binarize_options); // there may be many options affecting split. + void BinarizeSplit(CFGBinarize const& binarize_options); + void BinarizeThresh(CFGBinarize const& binarize_options); // maybe unbundle opts later typedef std::vector<NT> NTs; NTs nts; diff --git a/decoder/cfg_binarize.h b/decoder/cfg_binarize.h index 41eba11b..ae06f8bf 100755 --- a/decoder/cfg_binarize.h +++ b/decoder/cfg_binarize.h @@ -37,8 +37,8 @@ struct CFGBinarize { } void Validate() { if (bin_thresh>0&&!bin_l2r) { - std::cerr<<"\nWARNING: greedy binarization not yet supported; using l2r (right branching) instead.\n"; - bin_l2r=true; +// std::cerr<<"\nWARNING: greedy binarization not yet supported; using l2r (right branching) instead.\n"; +// bin_l2r=true; } if (false && bin_l2r && bin_split) { // actually, split may be slightly incomplete due to finite number of passes. std::cerr<<"\nWARNING: l2r and split are both complete binarization and redundant. Using split.\n"; diff --git a/utils/sparse_vector.h b/utils/sparse_vector.h index e3904403..5d0dac27 100644 --- a/utils/sparse_vector.h +++ b/utils/sparse_vector.h @@ -95,7 +95,7 @@ public: typedef char const* Str; template <class O> - void print(O &o,Str pre="",Str post="",Str kvsep="=",Str pairsep=" ") const { + void print(O &o,Str kvsep="=",Str pairsep=" ",Str pre="",Str post="") const { o << pre; bool first=true; for (const_iterator i=values_.begin(),e=values_.end();i!=e;++i) { @@ -121,7 +121,7 @@ public: // either key val alternating whitespace sep, or key=val (kvsep char is '='). end at eof or terminator (non-ws) char template <class S> - void read(S &s,DupPolicy dp=NO_DUPS,bool use_kvsep=true,char kvsep='=',bool stop_at_terminator=false,char terminator=')') { + void read(S &s,DupPolicy dp=NO_DUPS,bool use_kvsep=true,char kvsep='=',bool use_pairsep=true,char optional_pairsep=';',bool stop_at_terminator=false,char terminator=')') { values_.clear(); std::string id; WordID k; @@ -130,11 +130,12 @@ public: #define SPARSE_MUST_READ(x) if (!(x)) error(#x); int ki; while (s) { - if (stop_at_terminator) { + if (stop_at_terminator||use_pairsep) { char c; if (!(s>>c)) goto eof; - s.unget(); - if (c==terminator) return; + if (stop_at_terminator && c==terminator) return; + if (!use_pairsep || c!=optional_pairsep) + s.unget(); } if (!(s>>id)) goto eof; if (use_kvsep && (ki=id.find(kvsep))!=std::string::npos) { |