diff options
Diffstat (limited to 'decoder')
-rwxr-xr-x | decoder/apply_fsa_models.cc | 63 | ||||
-rwxr-xr-x | decoder/cfg.h | 21 | ||||
-rwxr-xr-x | decoder/hg_cfg.h | 11 |
3 files changed, 79 insertions, 16 deletions
diff --git a/decoder/apply_fsa_models.cc b/decoder/apply_fsa_models.cc index 5493d29f..fd3e1289 100755 --- a/decoder/apply_fsa_models.cc +++ b/decoder/apply_fsa_models.cc @@ -24,6 +24,8 @@ #define DPFSA(x) x //prefix trie +#define DBUILDTRIE(x) x + #define PRINT_PREFIX 1 #if PRINT_PREFIX # define IF_PRINT_PREFIX(x) x @@ -146,6 +148,12 @@ struct PrefixTrieNode { return ret; } + unsigned size() const { + unsigned a=adj.size(); + unsigned e=edge_for.size(); + return a>e?a:e; + } + void print_back_str(std::ostream &o) const { BPs back=back_vec(); unsigned i=back.size(); @@ -201,6 +209,7 @@ public: m.clear(); for (int i=0;i<adj.size();++i) { PrefixTrieEdge const& e=adj[i]; + SHOWM2(DPFSA,"index_adj",i,e); m[e.w]=e; } } @@ -211,27 +220,49 @@ public: // assert(e.p.is_1()); // actually, after done_building, e will have telescoped dest->p/p. NTHandle n=e.w; assert(n>=0); + SHOWM3(DPFSA,"index_lhs",i,e,n); v[n]=e.dest; } } + template <class PV> + void done_root(PV &v) { + done_building_r(); +// index_adj(); // we want an index for the root node?. don't think so - index_lhs handles it. + index_lhs(v); + } + // call only once. void done_building_r() { done_building(); for (int i=0;i<adj.size();++i) - adj[i].dest->done_building_r(); + if (adj[i].dest) // skip final edge + adj[i].dest->done_building_r(); } // for done_building; compute incremental (telescoped) edge p - PrefixTrieEdge const& operator()(PrefixTrieEdgeFor::value_type const& pair) const { - PrefixTrieEdge &e=const_cast<PrefixTrieEdge&>(pair.second); + PrefixTrieEdge /*const&*/ operator()(PrefixTrieEdgeFor::value_type & pair) const { + PrefixTrieEdge &e=pair.second;//const_cast<PrefixTrieEdge&>(pair.second); e.p=(e.dest->p)/p; return e; } // call only once. void done_building() { + SHOWM3(DBUILDTRIE,"done_building",edge_for.size(),adj.size(),1); +#if 0 adj.reinit_map(edge_for,*this); +#else + adj.reinit(edge_for.size()); + Adj::iterator o=adj.begin(); + for (PrefixTrieEdgeFor::iterator i=edge_for.begin(),e=edge_for.end();i!=e;++i) { + SHOWM3(DBUILDTRIE,"edge_for",o-adj.begin(),i->first,i->second); + PrefixTrieEdge &edge=i->second; + edge.p=(edge.dest->p)/p; + *o++=edge; +// (*this)(*i); + } +#endif // if (final) p_final/=p; std::sort(adj.begin(),adj.end()); //TODO: store adjacent differences on edges (compared to @@ -294,12 +325,22 @@ public: ~PrefixTrieNode() { destroy_children(); } + void print(std::ostream &o) const { + o << lhs << "->" << p; + o << ',' << size() << ','; + print_back_str(o); + } + PRINT_SELF(PrefixTrieNode) }; //Trie starts with lhs (nonneg index), then continues w/ rhs (mixed >0 word, else NT) // trie ends with final edge, which points to a per-lhs prefix node struct PrefixTrie { + void print(std::ostream &o) const { + o << cfgp << ' ' << root; + } + PRINT_SELF(PrefixTrie); CFG *cfgp; Rules const* rulesp; Rules const& rules() const { return *rulesp; } @@ -312,10 +353,9 @@ struct PrefixTrie { PrefixTrie(CFG &cfg) : cfgp(&cfg),rulesp(&cfg.rules),lhs2(cfg.nts.size(),0),lhs2complete(cfg.nts.size()) { // cfg.SortLocalBestFirst(); // instead we'll sort in done_building_r print_cfg=cfgp; + SHOWM2(DBUILDTRIE,"PrefixTrie()",rulesp->size(),lhs2.size()); cfg.VisitRuleIds(*this); - root.done_building_r(); - root.index_adj(); // maybe the index we use for building trie should be replaced by a much larger/faster table since we look up by lhs many times in parsing? - root.index_lhs(lhs2); + root.done_root(lhs2); } void operator()(int ri) const { @@ -323,12 +363,15 @@ struct PrefixTrie { NTHandle lhs=r.lhs; best_t p=r.p; NodeP n=const_cast<PrefixTrieNode&>(root).build_lhs(lhs,p); + SHOWM3(DBUILDTRIE,"Prefixtrie rule id",ri,root,p); for (RHS::const_iterator i=r.rhs.begin(),e=r.rhs.end();;++i) { + SHOWM2(DBUILDTRIE,"PrefixTrie build or final",i-r.rhs.begin(),*n); if (i==e) { n->set_final(lhs,p); break; } n=n->build(*i,p); + SHOWM2(DBUILDTRIE,"PrefixTrie built",*i,*n); } // root.build(lhs,r.p)->build(r.rhs,r.p); } @@ -390,8 +433,7 @@ struct ItemKey { } } NTHandle lhs() const { return dot->lhs; } - typedef ItemKey self_type; - SELF_TYPE_PRINT + PRINT_SELF(ItemKey) }; inline ItemHash hash_value(ItemKey const& x) { return x.hash(); @@ -424,8 +466,7 @@ struct Item : ItemPrio,ItemKey { o<<" next="<<next; o<< ']'; } - typedef Item self_type; - SELF_TYPE_PRINT + PRINT_SELF(Item) }; struct GetItemKey { @@ -553,6 +594,8 @@ template <class F> void ApplyFsa<F>::ApplyEarley() { hgcfg.GiveCFG(cfg); + print_cfg=&cfg; + print_fsa=&fsa; Chart<F> chart(cfg,smeta,fsa); // don't need to uniq - option to do that already exists in cfg_options //TODO: diff --git a/decoder/cfg.h b/decoder/cfg.h index 79ae6f33..95cb5fd7 100755 --- a/decoder/cfg.h +++ b/decoder/cfg.h @@ -1,6 +1,8 @@ #ifndef CDEC_CFG_H #define CDEC_CFG_H +#define DVISITRULEID(x) + // for now, debug means remembering and printing the TRule behind each CFG rule #ifndef CFG_DEBUG # define CFG_DEBUG 1 @@ -133,6 +135,10 @@ struct CFG { swap(f,o.f); IF_CFG_TRULE(swap(rule,o.rule);) } + friend inline void swap(Rule &a,Rule &b) { + a.Swap(b); + } + template<class V> void visit_rhs_nts(V &v) const { for (RHS::const_iterator i=rhs.begin(),e=rhs.end();i!=e;++i) { @@ -195,6 +201,9 @@ struct CFG { swap(ruleids,o.ruleids); swap(from,o.from); } + friend inline void swap(NT &a,NT &b) { + a.Swap(b); + } }; CFG() : hg_() { uninit=true; } @@ -294,14 +303,19 @@ struct CFG { // call after rules are indexed. template <class V> void VisitRuleIds(V &v) { - for (int i=0,e=nts.size();i<e;++i) - for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.begin();j!=jj;++j) + for (int i=0,e=nts.size();i<e;++i) { + SHOWM(DVISITRULEID,"VisitRuleIds nt",i); + for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.end();j!=jj;++j) { + SHOWM2(DVISITRULEID,"VisitRuleIds",i,*j); v(*j); + } + } + } template <class V> void VisitRuleIds(V const& v) { for (int i=0,e=nts.size();i<e;++i) - for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.begin();j!=jj;++j) + for (Ruleids::const_iterator j=nts[i].ruleids.begin(),jj=nts[i].ruleids.end();j!=jj;++j) v(*j); } @@ -351,7 +365,6 @@ inline std::size_t hash_value(CFG::NT const& r) { return r.hash_impl(); } - inline void swap(CFG &a,CFG &b) { a.Swap(b); } diff --git a/decoder/hg_cfg.h b/decoder/hg_cfg.h index ba936990..b90aca47 100755 --- a/decoder/hg_cfg.h +++ b/decoder/hg_cfg.h @@ -7,8 +7,12 @@ class Hypergraph; // in case you might want the CFG whether or not you apply FSA models: struct HgCFG { - HgCFG(Hypergraph const& ih) : ih(ih) { + void set_defaults() { have_cfg=binarized=have_features=uniqed=false; + want_features=true; + } + HgCFG(Hypergraph const& ih) : ih(ih) { + set_defaults(); } Hypergraph const& ih; CFG cfg; @@ -32,11 +36,14 @@ struct HgCFG { if (!have_cfg) InitCFG(to); else { + cfg.VisitRuleIds(*this); have_cfg=false; to.Clear(); - to.Swap(cfg); + swap(to,cfg); } } + void operator()(int ri) const { + } CFG const& GetCFG() const { assert(have_cfg); return cfg; |