From ead8845217c5e6e48f3680ead6f859ec8e110eb2 Mon Sep 17 00:00:00 2001 From: graehl Date: Fri, 13 Aug 2010 08:20:47 +0000 Subject: (NEEDS TESTING) cfg index rules->nts, sort by prob, remove duplicates keeping highest prob, topo sort (and after binarize topo sort). beginning to apply_fsa_models (PrefixTrieNode) git-svn-id: https://ws10smt.googlecode.com/svn/trunk@539 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/cfg_binarize.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'decoder/cfg_binarize.h') diff --git a/decoder/cfg_binarize.h b/decoder/cfg_binarize.h index f76619d2..c5303622 100755 --- a/decoder/cfg_binarize.h +++ b/decoder/cfg_binarize.h @@ -18,6 +18,8 @@ struct CFGBinarize { bool bin_l2r; bool bin_unary; bool bin_name_nts; + bool bin_uniq; + bool bin_topo; template // template to support both printable_opts and boost nonprintable void AddOptions(Opts *opts) { opts->add_options() @@ -25,6 +27,8 @@ struct CFGBinarize { ("cfg_binarize_unary", defaulted_value(&bin_unary),"if true, a rule-completing production A->BC may be binarized as A->U U->BC if U->BC would be used at least cfg_binarize_at times.") ("cfg_binarize_l2r", defaulted_value(&bin_l2r),"force left to right (a (b (c d))) binarization (ignore _at threshold)") ("cfg_binarize_name_nts", defaulted_value(&bin_name_nts),"create named virtual NT tokens e.g. 'A12+the' when binarizing 'B->[A12] the cat'") + ("cfg_binarize_uniq", defaulted_value(&bin_uniq),"in case of duplicate rules, keep only the one with highest prob") + ("cfg_binarize_topo", defaulted_value(&bin_topo),"reorder nonterminals after binarization to maintain definition before use (topological order). otherwise the virtual NTs will all appear after the regular NTs") ; } void Validate() { @@ -40,6 +44,8 @@ struct CFGBinarize { return bin_l2r || bin_at>0; } void set_defaults() { + bin_topo=false; + bin_uniq=true; bin_at=0; bin_unary=false; bin_name_nts=true; -- cgit v1.2.3