From 32154b45828f05add1db7c89752ef4220c0fdf16 Mon Sep 17 00:00:00 2001 From: "graehl@gmail.com" Date: Tue, 10 Aug 2010 10:02:04 +0000 Subject: cdec --cfg_output=- git-svn-id: https://ws10smt.googlecode.com/svn/trunk@499 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/cfg_format.h | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) (limited to 'decoder/cfg_format.h') diff --git a/decoder/cfg_format.h b/decoder/cfg_format.h index 1bce3d06..169632a6 100755 --- a/decoder/cfg_format.h +++ b/decoder/cfg_format.h @@ -3,9 +3,19 @@ #include #include +#include "wordid.h" +#include "feature_vector.h" struct CFGFormat { - bool identity_scfg;bool features;bool logprob_feat;bool cfg_comma_nt;std::string goal_nt_name;std::string nt_prefix; + bool identity_scfg; + bool features; + bool logprob_feat; + bool cfg_comma_nt; + bool nt_span; + std::string goal_nt_name; + std::string nt_prefix; + std::string logprob_feat_name; + std::string partsep; template // template to support both printable_opts and boost nonprintable void AddOptions(Opts *opts) { using namespace boost::program_options; @@ -14,19 +24,81 @@ struct CFGFormat { ("identity_scfg",defaulted_value(&identity_scfg),"output an identity SCFG: add an identity target side - '[X12] ||| [X13,1] a ||| [1] a ||| feat= ...' - the redundant target '[1] a |||' is omitted otherwise.") ("features",defaulted_value(&features),"print the CFG feature vector") ("logprob_feat",defaulted_value(&logprob_feat),"print a LogProb=-1.5 feature irrespective of --features.") + ("logprob_feat_name",defaulted_value(&logprob_feat_name),"alternate name for the LogProb feature") ("cfg_comma_nt",defaulted_value(&cfg_comma_nt),"if false, omit the usual [NP,1] ',1' variable index in the source side") ("goal_nt_name",defaulted_value(&goal_nt_name),"if nonempty, the first production will be '[goal_nt_name] ||| [x123] ||| LogProb=y' where x123 is the actual goal nt, and y is the pushed prob, if any") ("nt_prefix",defaulted_value(&nt_prefix),"NTs are [123] where 123 is the node number starting at 0, and the highest node (last in file) is the goal node in an acyclic hypergraph") + ("nt_span",defaulted_value(&nt_span),"prefix A(i,j) for NT coming from hypergraph node with category A on span [i,j). this is after --nt_prefix if any") ; } + + template + void print_source_nt(std::ostream &o,CFG const&cfg,int id,int position=1) const { + o<<'['; + print_nt_name(o,cfg,id); + if (cfg_comma_nt) o<<','< + void print_nt_name(std::ostream &o,CFG const& cfg,int id) const { + o< + void print_lhs(std::ostream &o,CFG const& cfg,int id) const { + o<<'['; + print_nt_name(o,cfg,id); + o<<']'; + } + + template + void print_rhs(std::ostream &o,CFG const&cfg,Iter begin,Iter end) const { + o<0) o << TD::Convert(w); + else print_source_nt(o,cfg,-w,++pos); + } + if (identity_scfg) { + o<0) o << TD::Convert(w); + else o << '['<<++pos<<']'; + } + } + } + + void print_features(std::ostream &o,prob_t p,FeatureVector const& fv=FeatureVector()) const { + bool logp=(logprob_feat && p!=1); + if (features || logp) { + o << partsep; + if (logp) + o << logprob_feat_name<<'='<