From addc7291bd44db96a01f399f8ee4efbe226245e2 Mon Sep 17 00:00:00 2001 From: graehl Date: Tue, 17 Aug 2010 22:05:59 +0000 Subject: vest generate / map vector print / read compatability git-svn-id: https://ws10smt.googlecode.com/svn/trunk@585 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/cdec.cc | 2 +- decoder/cfg.cc | 12 +++++++++++- decoder/cfg.h | 3 ++- decoder/cfg_binarize.h | 4 ++-- utils/sparse_vector.h | 11 ++++++----- 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/decoder/cdec.cc b/decoder/cdec.cc index 0a02801e..b156c268 100644 --- a/decoder/cdec.cc +++ b/decoder/cdec.cc @@ -379,7 +379,7 @@ void maybe_prune(Hypergraph &forest,po::variables_map const& conf,string nbeam,s } forest.PruneInsideOutside(beam_prune,density_prune,pm,false,1,conf["promise_power"].as()); if (!forestname.empty()) forestname=" "+forestname; - forest_stats(forest," Pruned "+forestname+" forest",false,false,false); + forest_stats(forest," Pruned "+forestname+" forest",false,false,0,false); cerr << " Pruned "<0) + BinarizeThresh(b); if (b.bin_split) BinarizeSplit(b); if (b.bin_l2r) @@ -473,6 +475,14 @@ void CFG::Binarize(CFGBinarize const& b) { } +namespace { +} + +void CFG::BinarizeThresh(CFGBinarize const& b) { + throw runtime_error("TODO: some fancy linked list thing - see NOTES.partial.binarize"); +} + + void CFG::BinarizeL2R(bool bin_unary,bool name) { add_virtual_rules v(*this,name); cerr << "Binarizing left->right " << (bin_unary?"real to unary":"stop at binary") < NTs; NTs nts; diff --git a/decoder/cfg_binarize.h b/decoder/cfg_binarize.h index 41eba11b..ae06f8bf 100755 --- a/decoder/cfg_binarize.h +++ b/decoder/cfg_binarize.h @@ -37,8 +37,8 @@ struct CFGBinarize { } void Validate() { if (bin_thresh>0&&!bin_l2r) { - std::cerr<<"\nWARNING: greedy binarization not yet supported; using l2r (right branching) instead.\n"; - bin_l2r=true; +// std::cerr<<"\nWARNING: greedy binarization not yet supported; using l2r (right branching) instead.\n"; +// bin_l2r=true; } if (false && bin_l2r && bin_split) { // actually, split may be slightly incomplete due to finite number of passes. std::cerr<<"\nWARNING: l2r and split are both complete binarization and redundant. Using split.\n"; diff --git a/utils/sparse_vector.h b/utils/sparse_vector.h index e3904403..5d0dac27 100644 --- a/utils/sparse_vector.h +++ b/utils/sparse_vector.h @@ -95,7 +95,7 @@ public: typedef char const* Str; template - void print(O &o,Str pre="",Str post="",Str kvsep="=",Str pairsep=" ") const { + void print(O &o,Str kvsep="=",Str pairsep=" ",Str pre="",Str post="") const { o << pre; bool first=true; for (const_iterator i=values_.begin(),e=values_.end();i!=e;++i) { @@ -121,7 +121,7 @@ public: // either key val alternating whitespace sep, or key=val (kvsep char is '='). end at eof or terminator (non-ws) char template - void read(S &s,DupPolicy dp=NO_DUPS,bool use_kvsep=true,char kvsep='=',bool stop_at_terminator=false,char terminator=')') { + void read(S &s,DupPolicy dp=NO_DUPS,bool use_kvsep=true,char kvsep='=',bool use_pairsep=true,char optional_pairsep=';',bool stop_at_terminator=false,char terminator=')') { values_.clear(); std::string id; WordID k; @@ -130,11 +130,12 @@ public: #define SPARSE_MUST_READ(x) if (!(x)) error(#x); int ki; while (s) { - if (stop_at_terminator) { + if (stop_at_terminator||use_pairsep) { char c; if (!(s>>c)) goto eof; - s.unget(); - if (c==terminator) return; + if (stop_at_terminator && c==terminator) return; + if (!use_pairsep || c!=optional_pairsep) + s.unget(); } if (!(s>>id)) goto eof; if (use_kvsep && (ki=id.find(kvsep))!=std::string::npos) { -- cgit v1.2.3