diff options
author | graehl@gmail.com <graehl@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-08-15 07:39:01 +0000 |
---|---|---|
committer | graehl@gmail.com <graehl@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-08-15 07:39:01 +0000 |
commit | 6d3cf2f3aeaa5d008f5031f70da8d728181486bc (patch) | |
tree | 69b0d6e35b65075ddfeb97a7fbf85f87ec513dfe /decoder/cfg.h | |
parent | c142f3bde0fa673ddb3f6fc7ed3d08e71f8ff8eb (diff) |
really fixed binarization. test
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@555 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'decoder/cfg.h')
-rwxr-xr-x | decoder/cfg.h | 26 |
1 files changed, 19 insertions, 7 deletions
diff --git a/decoder/cfg.h b/decoder/cfg.h index b6dd6d99..c07a6901 100755 --- a/decoder/cfg.h +++ b/decoder/cfg.h @@ -5,12 +5,22 @@ #ifndef CFG_DEBUG # define CFG_DEBUG 0 #endif +#ifndef CFG_KEEP_TRULE +# define CFG_KEEP_TRULE 0 +#endif + #if CFG_DEBUG -# define IF_CFG_DEBUG(x) x +# define IF_CFG_DEBUG(x) x; #else # define IF_CFG_DEBUG(x) #endif +#if CFG_KEEP_TRULE +# define IF_CFG_TRULE(x) x; +#else +# define IF_CFG_TRULE(x) +#endif + /* for target FSA intersection, we want to produce a simple (feature weighted) CFG using the target projection of a hg. this is essentially isomorphic to the hypergraph, and we're copying part of the rule info (we'll maintain a pointer to the original hg edge for posterity/debugging; and perhaps avoid making a copy of the feature vector). but we may also want to support CFG read from text files (w/ features), without needing to have a backing hypergraph. so hg pointer may be null? multiple types of CFG? always copy the feature vector? especially if we choose to binarize, we won't want to rely on 1:1 alignment w/ hg question: how much does making a copy (essentially) of hg simplify things? is the space used worth it? is the node in/out edges index really that much of a waste? is the use of indices that annoying? @@ -76,7 +86,7 @@ struct CFG { // for binarizing - no costs/probs Rule() : lhs(-1) { } bool is_null() const { return lhs<0; } - void set_null() { lhs=-1; rhs.clear();f.clear(); IF_CFG_DEBUG(rule.reset();) } + void set_null() { lhs=-1; rhs.clear();f.clear(); IF_CFG_TRULE(rule.reset();) } Rule(int lhs,BinRhs const& binrhs) : lhs(lhs),rhs(2),p(1) { rhs[0]=binrhs.first; @@ -87,14 +97,14 @@ struct CFG { RHS rhs; prob_t p; // h unused for now (there's nothing admissable, and p is already using 1st pass inside as pushed toward top) FeatureVector f; // may be empty, unless copy_features on Init - IF_CFG_DEBUG(TRulePtr rule;) + IF_CFG_TRULE(TRulePtr rule;) void Swap(Rule &o) { using namespace std; swap(lhs,o.lhs); swap(rhs,o.rhs); swap(p,o.p); swap(f,o.f); - IF_CFG_DEBUG(swap(rule,o.rule);) + IF_CFG_TRULE(swap(rule,o.rule);) } template<class V> void visit_rhs_nts(V &v) const { @@ -171,9 +181,11 @@ struct CFG { bool Empty() const { return nts.empty(); } void UnindexRules(); // save some space? void ReindexRules(); // scan over rules and rebuild NT::ruleids (e.g. after using UniqRules) - void UniqRules(NTHandle ni); // keep only the highest prob rule for each rhs and lhs=nt - doesn't remove from Rules; just removes from nts[ni].ruleids. keeps the same order in this sense: for a given signature (rhs), that signature's first representative in the old ruleids will become the new position of the best. as a consequence, if you SortLocalBestFirst() then UniqRules(), the result is still best first. but you may also call this on unsorted ruleids. - inline void UniqRules() { - for (int i=0,e=nts.size();i!=e;++i) UniqRules(i); + int UniqRules(NTHandle ni); // keep only the highest prob rule for each rhs and lhs=nt - doesn't remove from Rules; just removes from nts[ni].ruleids. keeps the same order in this sense: for a given signature (rhs), that signature's first representative in the old ruleids will become the new position of the best. as a consequence, if you SortLocalBestFirst() then UniqRules(), the result is still best first. but you may also call this on unsorted ruleids. returns number of rules kept + inline int UniqRules() { + int nkept=0; + for (int i=0,e=nts.size();i!=e;++i) nkept+=UniqRules(i); + return nkept; } void SortLocalBestFirst(NTHandle ni); // post: nts[ni].ruleids lists rules from highest p to lowest. when doing best-first earley intersection/parsing, you don't want to use the global marginal viterbi; you want to ignore outside in ordering edges for a node, so call this. stable in case of ties |