#ifndef CFG_FORMAT_H #define CFG_FORMAT_H #include #include #include "wordid.h" #include "feature_vector.h" #include "program_options.h" struct CFGFormat { bool identity_scfg; bool features; bool logprob_feat; bool comma_nt; bool nt_span; std::string goal_nt_name; std::string nt_prefix; std::string logprob_feat_name; std::string partsep; bool goal_nt() const { return !goal_nt_name.empty(); } template // template to support both printable_opts and boost nonprintable void AddOptions(Opts *opts) { //using namespace boost::program_options; //using namespace std; opts->add_options() ("identity_scfg",defaulted_value(&identity_scfg),"output an identity SCFG: add an identity target side - '[X12] ||| [X13,1] a ||| [1] a ||| feat= ...' - the redundant target '[1] a |||' is omitted otherwise.") ("features",defaulted_value(&features),"print the CFG feature vector") ("logprob_feat",defaulted_value(&logprob_feat),"print a LogProb=-1.5 feature irrespective of --features.") ("logprob_feat_name",defaulted_value(&logprob_feat_name),"alternate name for the LogProb feature") ("cfg_comma_nt",defaulted_value(&comma_nt),"if false, omit the usual [NP,1] ',1' variable index in the source side") ("goal_nt_name",defaulted_value(&goal_nt_name),"if nonempty, the first production will be '[goal_nt_name] ||| [x123] ||| LogProb=y' where x123 is the actual goal nt, and y is the pushed prob, if any") ("nt_prefix",defaulted_value(&nt_prefix),"NTs are [123] where 123 is the node number starting at 0, and the highest node (last in file) is the goal node in an acyclic hypergraph") ("nt_span",defaulted_value(&nt_span),"prefix A(i,j) for NT coming from hypergraph node with category A on span [i,j). this is after --nt_prefix if any") ; } void print(std::ostream &o) const { o<<"["; if (identity_scfg) o<<"Identity SCFG "; if (features) o<<"+Features "; if (logprob_feat) o< void print_source_nt(std::ostream &o,CFG const&cfg,int id,int position=1) const { o<<'['; print_nt_name(o,cfg,id); if (comma_nt) o<<','< void print_nt_name(std::ostream &o,CFG const& cfg,int id) const { o< void print_lhs(std::ostream &o,CFG const& cfg,int id) const { o<<'['; print_nt_name(o,cfg,id); o<<']'; } template void print_rhs(std::ostream &o,CFG const&cfg,Iter begin,Iter end) const { o<0) o << TD::Convert(w); else print_source_nt(o,cfg,-w,++pos); } if (identity_scfg) { o<0) o << TD::Convert(w); else o << '['<<++pos<<']'; } } } void print_features(std::ostream &o,prob_t p,SparseVector const& fv=SparseVector()) const { bool logp=(logprob_feat && p!=prob_t::One()); if (features || logp) { o << partsep; if (logp) o << logprob_feat_name<<'='<